// Copyright 2017 The TensorFlow Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto2"; package toco; import "tensorflow/lite/toco/types.proto"; // Supported I/O file formats. Some formats may be input-only or output-only. enum FileFormat { FILE_FORMAT_UNKNOWN = 0; // GraphDef, third_party/tensorflow/core/framework/graph.proto TENSORFLOW_GRAPHDEF = 1; // Tensorflow's mobile inference model. // third_party/tensorflow/contrib/tflite/schema.fbs TFLITE = 2; // GraphViz // Export-only. GRAPHVIZ_DOT = 3; } // TocoFlags encodes extra parameters that drive tooling operations, that // are not normally encoded in model files and in general may not be thought // of as properties of models, instead describing how models are to be // processed in the context of the present tooling job. // // Next ID to use: 31. message TocoFlags { // Input file format optional FileFormat input_format = 1; // Output file format optional FileFormat output_format = 2; // Similar to inference_type, but allows to control specifically the // quantization of input arrays, separately from other arrays. // // If not set, then the value of inference_type is implicitly used, i.e. // by default input arrays are quantized like other arrays. // // Like inference_type, this only affects real-number arrays. By "real-number" // we mean float arrays, and quantized arrays. This excludes plain // integer arrays, strings arrays, and every other data type. // // The typical use for this flag is for vision models taking a bitmap // as input, typically with uint8 channels, yet still requiring floating-point // inference. For such image models, the uint8 input is quantized, i.e. // the uint8 values are interpreted as real numbers, and the quantization // parameters used for such input arrays are their mean_value, std_value // parameters. optional IODataType inference_input_type = 11; // Sets the type of real-number arrays in the output file, that is, controls // the representation (quantization) of real numbers in the output file, // except for input arrays, which are controlled by inference_input_type. // // NOTE: this flag only impacts real-number arrays. By "real-number" // we mean float arrays, and quantized arrays. This excludes plain // integer arrays, strings arrays, and every other data type. // // For real-number arrays, the impact of this flag is to allow the output // file to choose a different real-numbers representation (quantization) // from what the input file used. For any other types of arrays, changing // the data type would not make sense. // // Specifically: // - If FLOAT, then real-numbers arrays will be of type float in // the output file. If they were quantized in the input file, then // they get dequantized. // - If QUANTIZED_UINT8, then real-numbers arrays will be quantized // as uint8 in the output file. If they were float in the input file, // then they get quantized. // - If not set, then all real-numbers arrays retain the same type in the // output file as they have in the input file. // optional IODataType inference_type = 4; // default_ranges_min and default_ranges_max are helpers to experiment // with quantization of models. Normally, quantization requires the input // model to have (min, max) range information for every activations array. // This is needed in order to know how to quantize arrays and still achieve // satisfactory accuracy. However, in some circumstances one would just like // to estimate the performance of quantized inference, without caring about // accuracy. That is what default_ranges_min and default_ranges_max are for: // when specified, they will be used as default (min, max) range boundaries // for all activation arrays that lack (min, max) range information, thus // allowing for quantization to proceed. // // It should be clear from the above explanation that these parameters are // for experimentation purposes only and should not be used in production: // they make it easy to quantize models, but the resulting quantized model // will be inaccurate. // // These values only apply to arrays quantized with the kUint8 data type. optional float default_ranges_min = 5; optional float default_ranges_max = 6; // Equivalent versions of default_ranges_min/_max for arrays quantized with // the kInt16 data type. optional float default_int16_ranges_min = 15; optional float default_int16_ranges_max = 16; // Ignore and discard FakeQuant nodes. For instance, that can be used to // generate plain float code without fake-quantization from a quantized // graph. optional bool drop_fake_quant = 7; // Normally, FakeQuant nodes must be strict boundaries for graph // transformations, in order to ensure that quantized inference has the // exact same arithmetic behavior as quantized training --- which is the // whole point of quantized training and of FakeQuant nodes in the first // place. However, that entails subtle requirements on where exactly // FakeQuant nodes must be placed in the graph. Some quantized graphs // have FakeQuant nodes at unexpected locations, that prevent graph // transformations that are necessary in order to generate inference // code for these graphs. Such graphs should be fixed, but as a // temporary work-around, setting this reorder_across_fake_quant flag // allows toco to perform necessary graph transformations on them, // at the cost of no longer faithfully matching inference and training // arithmetic. optional bool reorder_across_fake_quant = 8; // If true, allow TOCO to create TF Lite Custom operators for all the // unsupported Tensorflow ops. optional bool allow_custom_ops = 10; // Applies only to the case when the input format is TENSORFLOW_GRAPHDEF. // If true, then control dependencies will be immediately dropped during // import. // If not set, the default behavior is as follows: // - Default to false if the output format is TENSORFLOW_GRAPHDEF. // - Default to true in all other cases. optional bool drop_control_dependency = 12; // Disables transformations that fuse subgraphs such as known LSTMs (not all // LSTMs are identified). optional bool debug_disable_recurrent_cell_fusion = 13; // Uses the FakeQuantWithMinMaxArgs.num_bits attribute to adjust quantized // array data types throughout the graph. The graph must be properly annotated // with FakeQuant* ops on at least the edges and may contain additional ops on // the interior of the graph to widen/narrow as desired. // // Input and output array data types may change because of this propagation // and users must be sure to query the final data_type values. optional bool propagate_fake_quant_num_bits = 14; // Some fast uint8 GEMM kernels require uint8 weights to avoid the value 0. // This flag allows nudging them to 1 to allow proceeding, with moderate // inaccuracy. optional bool allow_nudging_weights_to_use_fast_gemm_kernel = 17; // Minimum size of constant arrays to deduplicate; arrays smaller will not be // deduplicated. optional int64 dedupe_array_min_size_bytes = 18 [default = 64]; // Split the LSTM inputs from 5 tensors to 18 tensors for TFLite. // Ignored if the output format is not TFLite. optional bool split_tflite_lstm_inputs = 19 [default = true]; // Store weights as quantized weights followed by dequantize operations. // Computation is still done in float, but reduces model size (at the cost of // accuracy and latency). // DEPRECATED: Please use post_training_quantize instead. optional bool quantize_weights = 20 [default = false]; // Full filepath of folder to dump the graphs at various stages of processing // GraphViz .dot files. Preferred over --output_format=GRAPHVIZ_DOT in order // to keep the requirements of the output file. optional string dump_graphviz_dir = 24; // Boolean indicating whether to dump the graph after every graph // transformation. optional bool dump_graphviz_include_video = 25; // Boolean indicating whether to quantize the weights of the converted float // model. Model size will be reduced and there will be latency improvements // (at the cost of accuracy). optional bool post_training_quantize = 26 [default = false]; // This flag only works when converting to TensorFlow Lite format. // When enabled, unsupported ops will be converted to select TensorFlow ops. // TODO(ycling): Consider to rename the following 2 flags and don't call it // "Flex". // `enable_select_tf_ops` should always be used with `allow_custom_ops`. // WARNING: Experimental interface, subject to change optional bool enable_select_tf_ops = 27 [default = false]; // This flag only works when converting to TensorFlow Lite format. // When enabled, all TensorFlow ops will be converted to select TensorFlow // ops. // This will force `enable_select_tf_ops` to true. // `force_select_tf_ops` should always be used with `enable_select_tf_ops`. // WARNING: Experimental interface, subject to change optional bool force_select_tf_ops = 28 [default = false]; // Boolean indicating whether to convert float32 constant buffers to // float16. This is typically done to reduce model size. Delegates may also // wish to implement kernels on reduced precision floats for performance // gains. optional bool quantize_to_float16 = 29 [default = false]; // Boolean flag indicating whether the converter should allow models with // dynamic Tensor shape. When set to False, the converter will generate // runtime memory offsets for activation Tensors (with 128 bits alignment) // and error out on models with undetermined Tensor shape. (Default: True) optional bool allow_dynamic_tensors = 30 [default = true]; }