// -------------------------------------------------------------------------------------------------------------------- // // Copyright (c) by respective owners including Yahoo!, Microsoft, and // individual contributors. All rights reserved. Released under a BSD // license as described in the file LICENSE. // // -------------------------------------------------------------------------------------------------------------------- using Newtonsoft.Json; using System; using System.Collections.Generic; using System.Diagnostics.Contracts; using System.Globalization; using System.IO; using System.Linq; using System.Threading.Tasks; using VW.Labels; using VW.Serializer.Intermediate; namespace VW.Serializer { /// /// The current JSON parse state. /// public sealed class VowpalWabbitJsonParseState { /// /// The native VW instance. /// public VowpalWabbit VW { get; set; } /// /// The JSON reader. /// public JsonReader Reader { get; set; } /// /// The VW example JSON builder. /// public VowpalWabbitJsonBuilder JsonBuilder { get; set; } /// /// The current property path within the JSON. /// public List Path { get; set; } /// /// The current _multi element index. /// public int MultiIndex { get; set; } /// /// Triggers parsing at the current state of the using the default namespace. /// public void Parse() { using (var context = new VowpalWabbitMarshalContext(this.VW, this.JsonBuilder.DefaultNamespaceContext.ExampleBuilder)) { var ns = new Namespace(this.VW); this.Parse(context, ns); } } /// /// Triggers parsing at the current state of the using the given . /// /// The namespace the JSON should be marshalled into. /// The namespace the JSON should be marshalled into. public void Parse(VowpalWabbitMarshalContext namespaceContext, Namespace ns) { this.JsonBuilder.Parse(this.Path, namespaceContext, ns); } } /// /// Delegate definition for JSON parsing extension. E.g. if one wants to extract "_timestamp" or a like. /// /// The current parsing state. /// The property encountered. /// True if the extension handled this property, false otherwise. /// Only fires for "ignore prefixed" properties. public delegate bool VowpalWabbitJsonExtension(VowpalWabbitJsonParseState state, string property); /// /// A deserializer from JSON to Vowpal Wabbit native examples. /// public sealed class VowpalWabbitJsonSerializer : IDisposable { private readonly IVowpalWabbitExamplePool vwPool; private readonly JsonSerializer jsonSerializer; private readonly VowpalWabbitJsonReferenceResolver referenceResolver; private int unresolved; private readonly object lockObject = new object(); private bool ready = false; private List marshalRequests; private List extensions; /// /// Initializes a new instance of the class. /// /// The VW native instance. /// An optional reference resolver. public VowpalWabbitJsonSerializer(IVowpalWabbitExamplePool vwPool, VowpalWabbitJsonReferenceResolver referenceResolver = null) { Contract.Requires(vwPool != null); this.extensions = new List { this.HandleMultiProperty }; this.jsonSerializer = new JsonSerializer(); this.vwPool = vwPool; this.referenceResolver = referenceResolver; this.ExampleBuilder = new VowpalWabbitJsonBuilder(this, this.vwPool, VowpalWabbitDefaultMarshaller.Instance, this.jsonSerializer); } /// /// Registers a parsing extension. /// /// The extension to be rgistered. public void RegisterExtension(VowpalWabbitJsonExtension extension) { this.extensions.Add(extension); } /// /// Userful if this deserializer is published through VowpalWabbitJsonReferenceResolver. /// public object UserContext { get; set; } /// /// Single line example or shared example. /// public VowpalWabbitJsonBuilder ExampleBuilder { get; private set; } /// /// Multi-line examples. /// public List ExampleBuilders { get; private set; } internal VowpalWabbitJsonReferenceResolver ReferenceResolver { get { return this.referenceResolver; } } internal void IncreaseUnresolved() { // only called during the initial parsing run this.unresolved++; } internal bool Resolve(Action marshal) { lock (this.lockObject) { // ready is false until the initial parsing run is complete if (this.ready) { // the object doesn't get anymore unresolved marshal requests if (this.marshalRequests != null) { foreach (var req in this.marshalRequests) req(); this.unresolved -= this.marshalRequests.Count; this.marshalRequests = null; } marshal(); this.unresolved--; if (this.unresolved < 0) throw new InvalidOperationException("Number of unresolved requested must not be negative"); return this.unresolved == 0; } else { // we need to track the requests and wait until the initial parsing is done if (this.marshalRequests == null) this.marshalRequests = new List(); this.marshalRequests.Add(marshal); return false; } } } /// /// Creates the VW example, be it single or multi-line. /// /// The marshalled VW example. public VowpalWabbitExampleCollection CreateExamples() { lock (this.lockObject) { if (this.unresolved == 0) return this.CreateExamplesInternal(); if (this.marshalRequests != null && this.unresolved == this.marshalRequests.Count) { return this.CreateExamplesInternal(); } // wait for delayed completion this.ready = true; return null; } } /// /// Creates the VW example, be it single or multi-line. /// /// The label to be applied. /// The index of the example in the multi-line example this label should be applied on. /// public VowpalWabbitExampleCollection CreateExamples(ILabel label, int index) { if (index >= this.ExampleBuilders.Count) throw new InvalidDataException($"Label index {index} is invalid. Only {this.ExampleBuilders.Count} examples available."); VowpalWabbitDefaultMarshaller.Instance.MarshalLabel( this.ExampleBuilders[index].DefaultNamespaceContext, label); return this.CreateExamples(); } /// /// Parses and creates the example. /// /// The example to parse. /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. /// The VowpalWabbit native example. public VowpalWabbitExampleCollection ParseAndCreate(string json, ILabel label = null, int? index = null) { this.Parse(json, label, index); return this.CreateExamples(); } /// /// Parses the example. /// /// The example to parse. /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. /// The VowpalWabbit native example. public VowpalWabbitExampleCollection ParseAndCreate(JsonReader reader, ILabel label = null, int? index = null) { this.Parse(reader, label, index); return this.CreateExamples(); } /// /// Parses the example. /// /// The example to parse. /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. public void Parse(string json, ILabel label = null, int? index = null) { using (var textReader = new JsonTextReader(new StringReader(json))) { this.Parse(textReader, label); } } /// /// Returns the number of action dependent examples found within . /// /// The JSON to be inspected. /// Returns the number of action dependent examples. public static int GetNumberOfActionDependentExamples(string json) { using (var textReader = new JsonTextReader(new StringReader(json))) { return GetNumberOfActionDependentExamples(textReader); } } /// /// Returns the number of action dependent examples found within . /// /// The JSON. /// The optional multi property name. /// Returns the number of action dependent examples. public static int GetNumberOfActionDependentExamples(JsonReader reader, string multiProperty = PropertyConfiguration.MultiPropertyDefault) { // handle the case when the reader is already positioned at JsonToken.StartObject if (reader.TokenType == JsonToken.None && !reader.Read()) throw new VowpalWabbitJsonException(reader, "Expected non-empty JSON"); if (reader.TokenType != JsonToken.StartObject) throw new VowpalWabbitJsonException(reader, "Expected start object"); while (reader.Read()) { if (!(reader.TokenType == JsonToken.PropertyName && (string)reader.Value == multiProperty)) { reader.Skip(); continue; } if (!reader.Read() || reader.TokenType != JsonToken.StartArray) throw new VowpalWabbitJsonException(reader, "Expected start arrray"); var exampleCount = 0; while (reader.Read() && reader.TokenType != JsonToken.EndArray) { exampleCount++; reader.Skip(); } return exampleCount; } return 0; } private bool HandleMultiProperty(VowpalWabbitJsonParseState state, string property) { var multiPropertyName = this.vwPool.Native.Settings.PropertyConfiguration.MultiProperty; if (!property.Equals(multiPropertyName, StringComparison.OrdinalIgnoreCase)) return false; var reader = state.Reader; if (!reader.Read() || reader.TokenType != JsonToken.StartArray) throw new VowpalWabbitJsonException(reader, "Expected start array for '" + multiPropertyName + "'"); if (this.ExampleBuilders == null) this.ExampleBuilders = new List(); state.MultiIndex = 0; while (reader.Read()) { switch (reader.TokenType) { case JsonToken.StartObject: VowpalWabbitJsonBuilder builder = null; try { builder = new VowpalWabbitJsonBuilder(this, this.vwPool, VowpalWabbitDefaultMarshaller.Instance, this.jsonSerializer, state.MultiIndex); this.ExampleBuilders.Add(builder); } catch (Exception) { builder.Dispose(); throw; } // pass the label to the selected example builder.Parse(reader, index != null && index == this.ExampleBuilders.Count - 1 ? label : null, this.extensions); state.MultiIndex++; break; case JsonToken.EndArray: return true; default: throw new VowpalWabbitJsonException(reader, "Unexpected token: " + reader.TokenType); } } throw new VowpalWabbitJsonException(reader, "Unexpected end"); } // TODO: keeping it local might be nicer... private int? index; private ILabel label; /// /// Parses the example. /// /// The example to parse. /// /// Optional label, taking precedence over "_label" property found in . /// If null, will be inspected and the "_label" property used as label. /// /// Optional index of example the given label should be applied for multi-line examples. public void Parse(JsonReader reader, ILabel label = null, int? index = null) { this.index = index; this.label = label; // only pass the label if it's not targeted at a particular index this.ExampleBuilder.Parse(reader, index == null ? label : null, this.extensions); // check if the outer example found a label if (this.ExampleBuilder.Label != null) { if (this.ExampleBuilder.LabelIndex >= this.ExampleBuilders.Count) throw new InvalidDataException($"Label index {this.ExampleBuilder.LabelIndex} is invalid. Only {this.ExampleBuilders.Count} examples available."); VowpalWabbitDefaultMarshaller.Instance.MarshalLabel( this.ExampleBuilders[this.ExampleBuilder.LabelIndex].DefaultNamespaceContext, this.ExampleBuilder.Label); } } /// /// Creates the examples ready for learning or prediction. /// public VowpalWabbitExampleCollection CreateExamplesInternal() { try { if (this.ExampleBuilders == null) { return new VowpalWabbitSingleLineExampleCollection(this.vwPool.Native, this.ExampleBuilder.CreateExample()); } else { // making sure we don't leak memory VowpalWabbitExample sharedExample = null; var examples = new VowpalWabbitExample[this.ExampleBuilders.Count]; try { // mark shared example as shared VowpalWabbitDefaultMarshaller.Instance.MarshalLabel(this.ExampleBuilder.DefaultNamespaceContext, SharedLabel.Instance); sharedExample = this.ExampleBuilder.CreateExample(); for (int i = 0; i < this.ExampleBuilders.Count; i++) examples[i] = this.ExampleBuilders[i].CreateExample(); return new VowpalWabbitMultiLineExampleCollection(this.vwPool.Native, sharedExample, examples); } catch (Exception) { if (sharedExample != null) sharedExample.Dispose(); foreach (var e in examples) if (e != null) e.Dispose(); throw; } } } finally { this.ExampleBuilder.Dispose(); this.ExampleBuilder = null; if (this.ExampleBuilders != null) { foreach (var eb in this.ExampleBuilders) eb.Dispose(); this.ExampleBuilders = null; } } } /// /// Performs application-defined tasks associated with freeing, releasing, or resetting unmanaged resources. /// public void Dispose() { this.Dispose(true); GC.SuppressFinalize(this); } private void Dispose(bool disposing) { // Remark: might be called multiple times from VowpalWabbitJsonReferenceResolver if (disposing) { // cleanup in case CreateExample() wasn't called if (this.ExampleBuilder != null) { this.ExampleBuilder.Dispose(); this.ExampleBuilder = null; } if (this.ExampleBuilders != null) { foreach (var eb in this.ExampleBuilders) eb.Dispose(); this.ExampleBuilders = null; } } } } }