// --------------------------------------------------------------------------------------------------------------------
//
// Copyright (c) by respective owners including Yahoo!, Microsoft, and
// individual contributors. All rights reserved. Released under a BSD
// license as described in the file LICENSE.
//
// --------------------------------------------------------------------------------------------------------------------
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;
using System.Diagnostics.Contracts;
using System.Linq;
using VW.Labels;
using VW.Serializer.Intermediate;
namespace VW.Serializer
{
///
/// Build from JSON following https://github.com/JohnLangford/vowpal_wabbit/wiki/JSON
///
public sealed class VowpalWabbitJsonBuilder : IDisposable
{
///
/// Mapping from properties to types for labels.
///
private static readonly Dictionary labelPropertyMapping;
private readonly VowpalWabbit vw;
private readonly VowpalWabbitDefaultMarshaller defaultMarshaller;
private readonly JsonSerializer jsonSerializer;
// required for reference resolution
private readonly VowpalWabbitJsonSerializer serializer;
private readonly VowpalWabbitJsonReferenceResolver referenceResolver;
private readonly List namespaceStrings;
private JsonReader reader;
private bool foundMulti;
private JObject labelObject;
private ILabel label;
private int featureCount;
private VowpalWabbitJsonParseState extensionState;
private List extensions;
static VowpalWabbitJsonBuilder()
{
// find mapping from property names to types
var q = from t in new[] { typeof(SimpleLabel), typeof(ContextualBanditLabel) }
from p in t.GetProperties()
let jsonProperty = (JsonPropertyAttribute)p.GetCustomAttributes(typeof(JsonPropertyAttribute), true).FirstOrDefault()
where jsonProperty != null
select new
{
Type = t,
JsonProperty = jsonProperty,
Property = p
};
labelPropertyMapping = q.ToDictionary(
e => (e.JsonProperty.PropertyName ?? e.Property.Name).ToLowerInvariant(),
e => e.Type);
}
///
/// Initializes a new instance of .
///
public VowpalWabbitJsonBuilder(IVowpalWabbitExamplePool vwPool, VowpalWabbitDefaultMarshaller defaultMarshaller, JsonSerializer jsonSerializer, int multiIndex = -1)
: this(null, vwPool, defaultMarshaller, jsonSerializer, multiIndex)
{
}
///
/// Initializes a new instance of .
///
public VowpalWabbitJsonBuilder(VowpalWabbitJsonSerializer serializer, IVowpalWabbitExamplePool vwPool, VowpalWabbitDefaultMarshaller defaultMarshaller, JsonSerializer jsonSerializer, int multiIndex = -1)
{
Contract.Requires(serializer != null);
Contract.Requires(vw != null);
Contract.Requires(defaultMarshaller != null);
Contract.Requires(jsonSerializer != null);
this.extensionState = new VowpalWabbitJsonParseState
{
JsonBuilder = this,
VW = vwPool.Native,
MultiIndex = multiIndex
};
this.namespaceStrings = new List();
this.foundMulti = false;
if (serializer != null)
this.referenceResolver = serializer.ReferenceResolver;
this.serializer = serializer;
this.vw = vwPool.Native;
this.defaultMarshaller = defaultMarshaller;
this.jsonSerializer = jsonSerializer;
this.DefaultNamespaceContext = new VowpalWabbitMarshalContext(this.vw);
}
// useful for tracking down bugs
// private string DefaultNamespaceContextStackTrace;
///
/// The marshalling context for the default namespace. Can be modified until .
///
public VowpalWabbitMarshalContext DefaultNamespaceContext { get; private set; }
///
/// The index the label was assigned to for multi line examples.
///
public int LabelIndex { get; private set; }
///
/// The label that was deserialized.
///
public ILabel Label { get; private set; }
///
/// Creates the managed example representation.
///
/// Returns the managed example.
public VowpalWabbitExample CreateExample()
{
try
{
if (this.featureCount == 0)
return null;
var vwExample = this.DefaultNamespaceContext.ExampleBuilder.CreateExample();
if (this.vw.Settings.EnableStringExampleGeneration)
{
var str = this.DefaultNamespaceContext.ToString();
if (str.Length > 0)
this.namespaceStrings.Insert(0, str);
vwExample.VowpalWabbitString = string.Join(" ", this.namespaceStrings);
}
return vwExample;
}
finally
{
// useful for tracking down bugs
// this.DefaultNamespaceContextStackTrace = "Create Example" + Environment.StackTrace;
this.DefaultNamespaceContext.Dispose();
this.DefaultNamespaceContext = null;
}
}
// re-entering from extension
internal void Parse(List path, VowpalWabbitMarshalContext namespaceContext, Namespace ns)
{
this.featureCount = this.defaultMarshaller.MarshalNamespace(namespaceContext, ns, () => this.ParseProperties(path)) + this.featureCount;
}
///
/// Parse VW JSON
///
public void Parse(JsonReader reader, VowpalWabbitMarshalContext context, Namespace ns, List extensions = null)
{
this.namespaceStrings.Clear();
this.reader = reader;
this.extensions = extensions;
// handle the case when the reader is already positioned at JsonToken.StartObject
if (reader.TokenType == JsonToken.None && !reader.Read())
return;
// don't barf on null values.
if (reader.TokenType == JsonToken.Null)
return;
if (reader.TokenType != JsonToken.StartObject)
throw new VowpalWabbitJsonException(this.reader,
$"Expected start object. Found '{reader.TokenType}' and value '{reader.Value}' for namespace {ns.Name}");
// re-direct default namespace to the one passed
var saveDefaultNamespaceContext = this.DefaultNamespaceContext;
try
{
using (this.DefaultNamespaceContext = new VowpalWabbitMarshalContext(this.vw, context.ExampleBuilder))
{
VowpalWabbitJsonParseContext localContext = null;
try
{
// setup current namespace
localContext = new VowpalWabbitJsonParseContext
{
Namespace = ns,
Context = new VowpalWabbitMarshalContext(this.vw, context.ExampleBuilder),
JsonProperty = ns.Name
};
{
this.defaultMarshaller.MarshalNamespace(
localContext.Context,
ns,
() => this.ParseProperties(new List { localContext }));
// append string features if we found some
if (this.vw.Settings.EnableStringExampleGeneration)
{
context.StringExample
.Append(localContext.Context.StringExample)
.Append(string.Join(" ", this.namespaceStrings));
}
}
}
finally
{
if (localContext != null && localContext.Context != null)
{
localContext.Context.Dispose();
localContext.Context = null;
}
}
}
}
finally
{
this.DefaultNamespaceContext = saveDefaultNamespaceContext;
}
}
///
/// Parses the example.
///
/// The example to parse.
///
/// Optional label, taking precedence over "_label" property found in .
/// If null, will be inspected and the "_label" property used as label.
///
/// Action to be executed when special properties are discovered.
/// The VowpalWabbit native example.
public void Parse(JsonReader reader, ILabel label = null, List extensions = null)
{
this.featureCount = 0;
this.labelObject = null;
this.foundMulti = false;
// avoid parameter passing for the sake of non-reentrantness
this.reader = reader;
this.label = label;
this.extensions = extensions;
if (label != null)
this.defaultMarshaller.MarshalLabel(this.DefaultNamespaceContext, label);
// handle the case when the reader is already positioned at JsonToken.StartObject
if (reader.TokenType == JsonToken.None && !reader.Read())
return;
if (reader.TokenType != JsonToken.StartObject)
throw new VowpalWabbitJsonException(this.reader,
string.Format("Expected start object. Found '{0}' and value '{1}'",
reader.TokenType, reader.Value));
var ns = new Namespace(this.vw);
var path = new List
{
new VowpalWabbitJsonParseContext
{
Namespace = ns,
Context = this.DefaultNamespaceContext,
JsonProperty = string.Empty
}
};
this.extensionState.Reader = reader;
this.extensionState.Path = path;
// TODO: duplicate namespace recursion to enable async
// featureCount might be modified inside ParseProperties...
this.featureCount = this.defaultMarshaller.MarshalNamespace(this.DefaultNamespaceContext, ns, () => this.ParseProperties(path)) + this.featureCount;
if (this.labelObject != null)
{
var propertyName = ((JProperty)this.labelObject.First).Name;
Type labelType;
if (!labelPropertyMapping.TryGetValue(propertyName.ToLowerInvariant(), out labelType))
throw new VowpalWabbitJsonException(this.reader, "The first property ('" + propertyName + "') must match to a property of a VowpalWabbit label type.");
var labelObj = (ILabel)this.labelObject.ToObject(labelType);
if (this.foundMulti)
this.Label = labelObj;
else
this.defaultMarshaller.MarshalLabel(this.DefaultNamespaceContext, labelObj);
}
}
private void ParseSpecialProperty(VowpalWabbitJsonParseContext context, string propertyName)
{
var propertyConfiguration = this.vw.Settings.PropertyConfiguration;
// special fields
if (propertyName.Equals(propertyConfiguration.LabelProperty, StringComparison.OrdinalIgnoreCase))
{
// passed in label has precedence
if (label == null)
this.ParseLabel();
else
reader.Skip();
}
else if (propertyName.Equals(propertyConfiguration.TextProperty, StringComparison.OrdinalIgnoreCase))
{
// parse text segment feature
this.defaultMarshaller.MarshalFeatureStringSplit(
context.Context,
context.Namespace,
new Feature(propertyName),
reader.ReadAsString());
}
else if (propertyName.Equals(propertyConfiguration.LabelIndexProperty, StringComparison.OrdinalIgnoreCase))
{
if (!this.reader.Read())
throw new VowpalWabbitJsonException(this.reader, "Unexpected end");
// skip
if (this.reader.TokenType == JsonToken.Null)
return;
this.LabelIndex = (int)(long)this.reader.Value;
}
else if (propertyName.StartsWith(propertyConfiguration.LabelPropertyPrefix, StringComparison.OrdinalIgnoreCase))
{
if (!this.reader.Read())
throw new VowpalWabbitJsonException(this.reader, "Unexpected end");
// skip
if (this.reader.TokenType == JsonToken.Null)
return;
if (this.labelObject == null)
this.labelObject = new JObject();
var targetPropertyName = propertyName.Substring(propertyConfiguration.LabelPropertyPrefix.Length);
this.labelObject.Add(targetPropertyName, new JValue(this.reader.Value));
}
else
{
if (propertyName.Equals(propertyConfiguration.MultiProperty, StringComparison.Ordinal))
this.foundMulti = true;
// forward to handler
if (this.extensions != null)
foreach (var extension in this.extensions)
if (extension(this.extensionState, propertyName))
return;
// if not handled, skip it
reader.Skip();
}
}
private void ParseLabel()
{
// peak the first property name
if (!this.reader.Read())
throw new VowpalWabbitJsonException(this.reader, "Unexpected end");
switch (reader.TokenType)
{
case JsonToken.StartObject:
{
// parse complex object
if (!reader.Read() || reader.TokenType != JsonToken.PropertyName)
throw new VowpalWabbitJsonException(this.reader, "Expected at least a single property to determine the label object");
var propertyName = (string)reader.Value;
var prefixReader = new PrefixedJsonReader(this.reader,
Tuple.Create(JsonToken.StartObject, (object)null),
Tuple.Create(JsonToken.PropertyName, (object)propertyName));
Type labelType;
if (!labelPropertyMapping.TryGetValue(propertyName.ToLowerInvariant(), out labelType))
throw new VowpalWabbitJsonException(this.reader, "The first property ('" + propertyName + "') must match to a property of a VowpalWabbit label type.");
var label = (ILabel)jsonSerializer.Deserialize(prefixReader, labelType);
this.defaultMarshaller.MarshalLabel(this.DefaultNamespaceContext, label);
}
break;
case JsonToken.Integer:
case JsonToken.Float:
case JsonToken.String:
{
// pass label directly to VW
var labelString = reader.Value.ToString();
this.defaultMarshaller.MarshalLabel(this.DefaultNamespaceContext, new StringLabel(labelString));
}
break;
case JsonToken.Null:
// ignore
break;
default:
throw new VowpalWabbitJsonException(this.reader, "Expected label object");
}
}
///
/// Expects that actual feature value.
///
private void ParseFeature(List path, string featureName)
{
switch (featureName)
{
case "$id":
{
if (this.referenceResolver == null)
return;
var id = (string)reader.Value;
if (!reader.Read() ||
reader.TokenType != JsonToken.PropertyName ||
(string)reader.Value != "$values")
throw new VowpalWabbitJsonException(this.reader, "Expecting '$values' property");
// read $values
if (!reader.Read())
throw new VowpalWabbitJsonException(this.reader, "Unexpected end");
// create re-useable marshalling call
var marshalAction = this.ParseFeatureReUsable();
// keep action for re-use
this.referenceResolver.AddReference(id, marshalAction);
// go up 2 levels to find actual namespace, the last one is actually the property we want to serialize
featureName = path.Last().JsonProperty;
var context = path[path.Count - 2];
marshalAction.Marshal(this.defaultMarshaller, context.Context, context.Namespace, featureName);
}
return;
case "$ref":
{
if (this.referenceResolver == null || this.serializer == null)
return;
var id = (string)reader.Value;
// go up 2 levels to find actual namespace, the last one is actually the property we want to serialize
featureName = path.Last().JsonProperty;
var ns = path[path.Count - 2].Namespace;
this.referenceResolver.Resolve(
this.serializer,
id,
marshalAction =>
{
// setup fresh context
using (var context = new VowpalWabbitMarshalContext(this.vw, this.DefaultNamespaceContext.ExampleBuilder))
{
this.featureCount += this.defaultMarshaller.MarshalNamespace(
context,
ns,
() => marshalAction.Marshal(this.defaultMarshaller, context, ns, featureName));
// append default namespaces features if we found some
if (this.vw.Settings.EnableStringExampleGeneration)
{
var str = context.ToString();
if (str.Length > 0)
this.namespaceStrings.Add(str);
}
}
});
}
return;
}
var localContext = path.Last();
this.ParseFeature(path, localContext.Context, localContext.Namespace, featureName);
}
private IVowpalWabbitMarshalAction ParseFeatureReUsable()
{
// make sure the returned action is independent of the current parsing context, so we can ship it
switch (reader.TokenType)
{
case JsonToken.Float:
return VowpalWabbitMarshalActions.Create((double)reader.Value);
case JsonToken.Integer:
return VowpalWabbitMarshalActions.Create((long)reader.Value);
case JsonToken.String:
return VowpalWabbitMarshalActions.Create((string)reader.Value);
case JsonToken.Boolean:
return VowpalWabbitMarshalActions.Create((bool)reader.Value);
case JsonToken.Comment:
case JsonToken.Null:
// probably best to ignore?
break;
case JsonToken.StartArray:
return this.ParseFeatureArrayReUsable();
}
return null;
}
///
/// Expects: "1,2.2,3]" (excluding the leading [)
///
private IVowpalWabbitMarshalAction ParseFeatureArrayReUsable()
{
var values = new float[16];
var index = 0;
while (reader.Read())
{
float val;
switch (reader.TokenType)
{
case JsonToken.Integer:
val = (float)(long)reader.Value;
break;
case JsonToken.Float:
val = (float)(double)reader.Value;
break;
case JsonToken.EndArray:
goto done;
default:
throw new VowpalWabbitJsonException(this.reader, "Unxpected token " + reader.TokenType + " while deserializing dense feature array");
}
if (index == values.Length)
{
var newValues = new float[values.Length * 2];
Array.Copy(values, newValues, values.Length);
values = newValues;
}
values[index++] = val;
}
done:
return VowpalWabbitMarshalActions.Create(values, index);
}
///
/// Expects that actual feature value.
///
private void ParseFeature(List path, VowpalWabbitMarshalContext context, Namespace ns, string featureName)
{
switch (reader.TokenType)
{
case JsonToken.Float:
VowpalWabbitMarshalActions.Marshal(this.defaultMarshaller, context, ns, featureName, (double)reader.Value);
break;
case JsonToken.Integer:
VowpalWabbitMarshalActions.Marshal(this.defaultMarshaller, context, ns, featureName, (long)reader.Value);
break;
case JsonToken.String:
VowpalWabbitMarshalActions.Marshal(this.defaultMarshaller, context, ns, featureName, (string)reader.Value);
break;
case JsonToken.Boolean:
VowpalWabbitMarshalActions.Marshal(this.defaultMarshaller, context, ns, featureName, (bool)reader.Value);
break;
case JsonToken.Comment:
case JsonToken.Null:
// probably best to ignore?
break;
case JsonToken.StartArray:
this.WrapInNamespace(path, featureName, lastContext => this.ParseFeatureArray(path));
break;
default:
throw new VowpalWabbitJsonException(this.reader, "Unexpected token " + reader.TokenType + " while deserializing primitive feature");
}
}
///
/// Expects: "1,2.2,3]" (excluding the leading [)
///
private void ParseFeatureArray(List path)
{
var context = path.Last().Context;
var ns = path.Last().Namespace;
ulong index = 0;
while (reader.Read())
{
switch (reader.TokenType)
{
case JsonToken.Integer:
MarshalFloatFeature(context, ns, index, (float)(long)reader.Value);
break;
case JsonToken.Float:
MarshalFloatFeature(context, ns, index, (float)(double)reader.Value);
break;
case JsonToken.StartObject:
ParseProperties(path);
break;
case JsonToken.EndArray:
return;
case JsonToken.Null:
// just ignore nulls
break;
default:
throw new VowpalWabbitJsonException(this.reader, "Unxpected token " + reader.TokenType + " while deserializing dense feature array");
}
index++;
}
}
private static void MarshalFloatFeature(VowpalWabbitMarshalContext context, Namespace ns, ulong index, float value)
{
context.NamespaceBuilder.AddFeature(ns.NamespaceHash + index, value);
if (context.StringExample != null)
{
context.AppendStringExample(
false,
" {0}:" + (context.VW.Settings.EnableStringFloatCompact ? "{1}" : "{1:E20}"),
index,
value);
}
}
private void WrapInNamespace(List path, string namespaceValue, Action action)
{
VowpalWabbitJsonParseContext parseContext = null;
VowpalWabbitMarshalContext marshalContext = null;
try
{
var ns = new Namespace(this.vw, namespaceValue);
marshalContext = new VowpalWabbitMarshalContext(this.vw, this.DefaultNamespaceContext.ExampleBuilder);
parseContext = new VowpalWabbitJsonParseContext
{
Namespace = ns,
Context = marshalContext,
JsonProperty = namespaceValue
};
// the namespace is only added on dispose, to be able to check if at least a single feature has been added
marshalContext.NamespaceBuilder = marshalContext.ExampleBuilder.AddNamespace(ns.FeatureGroup);
var position = 0;
var stringExample = marshalContext.StringExample;
if (marshalContext.StringExample != null)
position = stringExample.Append(ns.NamespaceString).Length;
path.Add(parseContext);
action(parseContext);
// append default namespaces features if we found some
if (this.vw.Settings.EnableStringExampleGeneration)
{
var str = marshalContext.ToString();
if (str.Length > 0)
this.namespaceStrings.Add(str);
}
this.featureCount += (int)marshalContext.NamespaceBuilder.FeatureCount;
}
finally
{
path.RemoveAt(path.Count - 1);
if (marshalContext.NamespaceBuilder != null)
{
marshalContext.NamespaceBuilder.Dispose();
marshalContext.NamespaceBuilder = null;
}
if (parseContext != null && parseContext.Context != null)
{
parseContext.Context.Dispose();
parseContext.Context = null;
}
}
}
///
/// Parses { "feature1":1, "feature2":true, .... }
///
private void ParseNamespaceAndFeatures(List path, string namespaceValue)
{
this.WrapInNamespace(path, namespaceValue, context => this.ParseProperties(path));
}
private void ParseProperties(List path)
{
var propertyConfiguration = this.vw.Settings.PropertyConfiguration;
while (reader.Read())
{
switch (reader.TokenType)
{
case JsonToken.PropertyName:
var propertyName = (string)reader.Value;
if (propertyName.StartsWith(propertyConfiguration.FeatureIgnorePrefix, StringComparison.Ordinal) ||
propertyConfiguration.IsSpecialProperty(propertyName))
{
this.ParseSpecialProperty(path.Last(), propertyName);
continue;
}
if (!reader.Read())
throw new VowpalWabbitJsonException(this.reader, "Unexpected end while parsing namespace");
// TODO: this.Context might have to be a stack...
if (reader.TokenType == JsonToken.StartObject)
this.ParseNamespaceAndFeatures(path, propertyName);
else
this.ParseFeature(path, propertyName);
break;
case JsonToken.EndObject:
return;
}
}
}
///
/// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources.
///
public void Dispose()
{
this.Dispose(true);
GC.SuppressFinalize(this);
}
private void Dispose(bool disposing)
{
if (disposing)
{
if (this.DefaultNamespaceContext != null)
{
// useful for tracking down bugs
// this.DefaultNamespaceContextStackTrace = "Dispose" + Environment.StackTrace;
this.DefaultNamespaceContext.Dispose();
this.DefaultNamespaceContext = null;
}
}
}
}
///
/// A parsing context holding the current state during JSON parsing.
///
public sealed class VowpalWabbitJsonParseContext
{
///
/// The current marshalling context.
///
public VowpalWabbitMarshalContext Context { get; set; }
///
/// The current namespace.
///
public Namespace Namespace { get; set; }
///
/// The current JSON property being processed.
///
public string JsonProperty { get; set; }
}
}