// Copyright 2019 Google LLC. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // syntax = "proto3"; package google.cloud.automl.v1; import "google/api/annotations.proto"; import "google/api/field_behavior.proto"; option csharp_namespace = "Google.Cloud.AutoML.V1"; option go_package = "google.golang.org/genproto/googleapis/cloud/automl/v1;automl"; option java_multiple_files = true; option java_package = "com.google.cloud.automl.v1"; option php_namespace = "Google\\Cloud\\AutoML\\V1"; option ruby_package = "Google::Cloud::AutoML::V1"; // Input configuration for // [AutoMl.ImportData][google.cloud.automl.v1.AutoMl.ImportData] action. // // The format of input depends on dataset_metadata the Dataset into which // the import is happening has. As input source the // [gcs_source][google.cloud.automl.v1.InputConfig.gcs_source] // is expected, unless specified otherwise. Additionally any input .CSV file // by itself must be 100MB or smaller, unless specified otherwise. // If an "example" file (that is, image, video etc.) with identical content // (even if it had different `GCS_FILE_PATH`) is mentioned multiple times, then // its label, bounding boxes etc. are appended. The same file should be always // provided with the same `ML_USE` and `GCS_FILE_PATH`, if it is not, then // these values are nondeterministically selected from the given ones. // // The formats are represented in EBNF with commas being literal and with // non-terminal symbols defined near the end of this comment. The formats are: // //

AutoML Vision

// // //
Classification
// // See [Preparing your training // data](https://cloud.google.com/vision/automl/docs/prepare) for more // information. // // CSV file(s) with each line in format: // // ML_USE,GCS_FILE_PATH,LABEL,LABEL,... // // * `ML_USE` - Identifies the data set that the current row (file) applies // to. // This value can be one of the following: // * `TRAIN` - Rows in this file are used to train the model. // * `TEST` - Rows in this file are used to test the model during training. // * `UNASSIGNED` - Rows in this file are not categorized. They are // Automatically divided into train and test data. 80% for training and // 20% for testing. // // * `GCS_FILE_PATH` - The Google Cloud Storage location of an image of up to // 30MB in size. Supported extensions: .JPEG, .GIF, .PNG, .WEBP, .BMP, // .TIFF, .ICO. // // * `LABEL` - A label that identifies the object in the image. // // For the `MULTICLASS` classification type, at most one `LABEL` is allowed // per image. If an image has not yet been labeled, then it should be // mentioned just once with no `LABEL`. // // Some sample rows: // // TRAIN,gs://folder/image1.jpg,daisy // TEST,gs://folder/image2.jpg,dandelion,tulip,rose // UNASSIGNED,gs://folder/image3.jpg,daisy // UNASSIGNED,gs://folder/image4.jpg // // //
Object Detection
// See [Preparing your training // data](https://cloud.google.com/vision/automl/object-detection/docs/prepare) // for more information. // // A CSV file(s) with each line in format: // // ML_USE,GCS_FILE_PATH,[LABEL],(BOUNDING_BOX | ,,,,,,,) // // * `ML_USE` - Identifies the data set that the current row (file) applies // to. // This value can be one of the following: // * `TRAIN` - Rows in this file are used to train the model. // * `TEST` - Rows in this file are used to test the model during training. // * `UNASSIGNED` - Rows in this file are not categorized. They are // Automatically divided into train and test data. 80% for training and // 20% for testing. // // * `GCS_FILE_PATH` - The Google Cloud Storage location of an image of up to // 30MB in size. Supported extensions: .JPEG, .GIF, .PNG. Each image // is assumed to be exhaustively labeled. // // * `LABEL` - A label that identifies the object in the image specified by the // `BOUNDING_BOX`. // // * `BOUNDING BOX` - The vertices of an object in the example image. // The minimum allowed `BOUNDING_BOX` edge length is 0.01, and no more than // 500 `BOUNDING_BOX` instances per image are allowed (one `BOUNDING_BOX` // per line). If an image has no looked for objects then it should be // mentioned just once with no LABEL and the ",,,,,,," in place of the // `BOUNDING_BOX`. // // **Four sample rows:** // // TRAIN,gs://folder/image1.png,car,0.1,0.1,,,0.3,0.3,, // TRAIN,gs://folder/image1.png,bike,.7,.6,,,.8,.9,, // UNASSIGNED,gs://folder/im2.png,car,0.1,0.1,0.2,0.1,0.2,0.3,0.1,0.3 // TEST,gs://folder/im3.png,,,,,,,,, //
//
// // //

AutoML Natural Language

// // //
Entity Extraction
// // See [Preparing your training // data](/natural-language/automl/entity-analysis/docs/prepare) for more // information. // // One or more CSV file(s) with each line in the following format: // // ML_USE,GCS_FILE_PATH // // * `ML_USE` - Identifies the data set that the current row (file) applies // to. // This value can be one of the following: // * `TRAIN` - Rows in this file are used to train the model. // * `TEST` - Rows in this file are used to test the model during training. // * `UNASSIGNED` - Rows in this file are not categorized. They are // Automatically divided into train and test data. 80% for training and // 20% for testing.. // // * `GCS_FILE_PATH` - a Identifies JSON Lines (.JSONL) file stored in // Google Cloud Storage that contains in-line text in-line as documents // for model training. // // After the training data set has been determined from the `TRAIN` and // `UNASSIGNED` CSV files, the training data is divided into train and // validation data sets. 70% for training and 30% for validation. // // For example: // // TRAIN,gs://folder/file1.jsonl // VALIDATE,gs://folder/file2.jsonl // TEST,gs://folder/file3.jsonl // // **In-line JSONL files** // // In-line .JSONL files contain, per line, a JSON document that wraps a // [`text_snippet`][google.cloud.automl.v1.TextSnippet] field followed by // one or more [`annotations`][google.cloud.automl.v1.AnnotationPayload] // fields, which have `display_name` and `text_extraction` fields to describe // the entity from the text snippet. Multiple JSON documents can be separated // using line breaks (\n). // // The supplied text must be annotated exhaustively. For example, if you // include the text "horse", but do not label it as "animal", // then "horse" is assumed to not be an "animal". // // Any given text snippet content must have 30,000 characters or // less, and also be UTF-8 NFC encoded. ASCII is accepted as it is // UTF-8 NFC encoded. // // For example: // // { // "text_snippet": { // "content": "dog car cat" // }, // "annotations": [ // { // "display_name": "animal", // "text_extraction": { // "text_segment": {"start_offset": 0, "end_offset": 2} // } // }, // { // "display_name": "vehicle", // "text_extraction": { // "text_segment": {"start_offset": 4, "end_offset": 6} // } // }, // { // "display_name": "animal", // "text_extraction": { // "text_segment": {"start_offset": 8, "end_offset": 10} // } // } // ] // }\n // { // "text_snippet": { // "content": "This dog is good." // }, // "annotations": [ // { // "display_name": "animal", // "text_extraction": { // "text_segment": {"start_offset": 5, "end_offset": 7} // } // } // ] // } // // **JSONL files that reference documents** // // .JSONL files contain, per line, a JSON document that wraps a // `input_config` that contains the path to a source PDF document. // Multiple JSON documents can be separated using line breaks (\n). // // For example: // // { // "document": { // "input_config": { // "gcs_source": { "input_uris": [ "gs://folder/document1.pdf" ] // } // } // } // }\n // { // "document": { // "input_config": { // "gcs_source": { "input_uris": [ "gs://folder/document2.pdf" ] // } // } // } // } // // **In-line JSONL files with PDF layout information** // // **Note:** You can only annotate PDF files using the UI. The format described // below applies to annotated PDF files exported using the UI or `exportData`. // // In-line .JSONL files for PDF documents contain, per line, a JSON document // that wraps a `document` field that provides the textual content of the PDF // document and the layout information. // // For example: // // { // "document": { // "document_text": { // "content": "dog car cat" // } // "layout": [ // { // "text_segment": { // "start_offset": 0, // "end_offset": 11, // }, // "page_number": 1, // "bounding_poly": { // "normalized_vertices": [ // {"x": 0.1, "y": 0.1}, // {"x": 0.1, "y": 0.3}, // {"x": 0.3, "y": 0.3}, // {"x": 0.3, "y": 0.1}, // ], // }, // "text_segment_type": TOKEN, // } // ], // "document_dimensions": { // "width": 8.27, // "height": 11.69, // "unit": INCH, // } // "page_count": 3, // }, // "annotations": [ // { // "display_name": "animal", // "text_extraction": { // "text_segment": {"start_offset": 0, "end_offset": 3} // } // }, // { // "display_name": "vehicle", // "text_extraction": { // "text_segment": {"start_offset": 4, "end_offset": 7} // } // }, // { // "display_name": "animal", // "text_extraction": { // "text_segment": {"start_offset": 8, "end_offset": 11} // } // }, // ], // // // // //
Classification
// // See [Preparing your training // data](https://cloud.google.com/natural-language/automl/docs/prepare) for more // information. // // One or more CSV file(s) with each line in the following format: // // ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),LABEL,LABEL,... // // * `ML_USE` - Identifies the data set that the current row (file) applies // to. // This value can be one of the following: // * `TRAIN` - Rows in this file are used to train the model. // * `TEST` - Rows in this file are used to test the model during training. // * `UNASSIGNED` - Rows in this file are not categorized. They are // Automatically divided into train and test data. 80% for training and // 20% for testing. // // * `TEXT_SNIPPET` and `GCS_FILE_PATH` are distinguished by a pattern. If // the column content is a valid Google Cloud Storage file path, that is, // prefixed by "gs://", it is treated as a `GCS_FILE_PATH`. Otherwise, if // the content is enclosed in double quotes (""), it is treated as a // `TEXT_SNIPPET`. For `GCS_FILE_PATH`, the path must lead to a // file with supported extension and UTF-8 encoding, for example, // "gs://folder/content.txt" AutoML imports the file content // as a text snippet. For `TEXT_SNIPPET`, AutoML imports the column content // excluding quotes. In both cases, size of the content must be 10MB or // less in size. For zip files, the size of each file inside the zip must be // 10MB or less in size. // // For the `MULTICLASS` classification type, at most one `LABEL` is allowed. // The `ML_USE` and `LABEL` columns are optional. // Supported file extensions: .TXT, .PDF, .ZIP // // A maximum of 100 unique labels are allowed per CSV row. // // Sample rows: // // TRAIN,"They have bad food and very rude",RudeService,BadFood // gs://folder/content.txt,SlowService // TEST,gs://folder/document.pdf // VALIDATE,gs://folder/text_files.zip,BadFood // // // //
Sentiment Analysis
// // See [Preparing your training // data](https://cloud.google.com/natural-language/automl/docs/prepare) for more // information. // // CSV file(s) with each line in format: // // ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),SENTIMENT // // * `ML_USE` - Identifies the data set that the current row (file) applies // to. // This value can be one of the following: // * `TRAIN` - Rows in this file are used to train the model. // * `TEST` - Rows in this file are used to test the model during training. // * `UNASSIGNED` - Rows in this file are not categorized. They are // Automatically divided into train and test data. 80% for training and // 20% for testing. // // * `TEXT_SNIPPET` and `GCS_FILE_PATH` are distinguished by a pattern. If // the column content is a valid Google Cloud Storage file path, that is, // prefixed by "gs://", it is treated as a `GCS_FILE_PATH`. Otherwise, if // the content is enclosed in double quotes (""), it is treated as a // `TEXT_SNIPPET`. For `GCS_FILE_PATH`, the path must lead to a // file with supported extension and UTF-8 encoding, for example, // "gs://folder/content.txt" AutoML imports the file content // as a text snippet. For `TEXT_SNIPPET`, AutoML imports the column content // excluding quotes. In both cases, size of the content must be 128kB or // less in size. For zip files, the size of each file inside the zip must be // 128kB or less in size. // // The `ML_USE` and `SENTIMENT` columns are optional. // Supported file extensions: .TXT, .PDF, .ZIP // // * `SENTIMENT` - An integer between 0 and // Dataset.text_sentiment_dataset_metadata.sentiment_max // (inclusive). Describes the ordinal of the sentiment - higher // value means a more positive sentiment. All the values are // completely relative, i.e. neither 0 needs to mean a negative or // neutral sentiment nor sentiment_max needs to mean a positive one - // it is just required that 0 is the least positive sentiment // in the data, and sentiment_max is the most positive one. // The SENTIMENT shouldn't be confused with "score" or "magnitude" // from the previous Natural Language Sentiment Analysis API. // All SENTIMENT values between 0 and sentiment_max must be // represented in the imported data. On prediction the same 0 to // sentiment_max range will be used. The difference between // neighboring sentiment values needs not to be uniform, e.g. 1 and // 2 may be similar whereas the difference between 2 and 3 may be // large. // // Sample rows: // // TRAIN,"@freewrytin this is way too good for your product",2 // gs://folder/content.txt,3 // TEST,gs://folder/document.pdf // VALIDATE,gs://folder/text_files.zip,2 //
//
// // // **Input field definitions:** // // `ML_USE` // : ("TRAIN" | "VALIDATE" | "TEST" | "UNASSIGNED") // Describes how the given example (file) should be used for model // training. "UNASSIGNED" can be used when user has no preference. // // `GCS_FILE_PATH` // : The path to a file on Google Cloud Storage. For example, // "gs://folder/image1.png". // // `LABEL` // : A display name of an object on an image, video etc., e.g. "dog". // Must be up to 32 characters long and can consist only of ASCII // Latin letters A-Z and a-z, underscores(_), and ASCII digits 0-9. // For each label an AnnotationSpec is created which display_name // becomes the label; AnnotationSpecs are given back in predictions. // // `BOUNDING_BOX` // : (`VERTEX,VERTEX,VERTEX,VERTEX` | `VERTEX,,,VERTEX,,`) // A rectangle parallel to the frame of the example (image, // video). If 4 vertices are given they are connected by edges // in the order provided, if 2 are given they are recognized // as diagonally opposite vertices of the rectangle. // // `VERTEX` // : (`COORDINATE,COORDINATE`) // First coordinate is horizontal (x), the second is vertical (y). // // `COORDINATE` // : A float in 0 to 1 range, relative to total length of // image or video in given dimension. For fractions the // leading non-decimal 0 can be omitted (i.e. 0.3 = .3). // Point 0,0 is in top left. // // `TEXT_SNIPPET` // : The content of a text snippet, UTF-8 encoded, enclosed within // double quotes (""). // // `DOCUMENT` // : A field that provides the textual content with document and the layout // information. // // // **Errors:** // // If any of the provided CSV files can't be parsed or if more than certain // percent of CSV rows cannot be processed then the operation fails and // nothing is imported. Regardless of overall success or failure the per-row // failures, up to a certain count cap, is listed in // Operation.metadata.partial_failures. // message InputConfig { // The source of the input. oneof source { // The Google Cloud Storage location for the input content. // For [AutoMl.ImportData][google.cloud.automl.v1.AutoMl.ImportData], // `gcs_source` points to a CSV file with a structure described in // [InputConfig][google.cloud.automl.v1.InputConfig]. GcsSource gcs_source = 1; } // Additional domain-specific parameters describing the semantic of the // imported data, any string must be up to 25000 // characters long. map params = 2; } // Input configuration for BatchPredict Action. // // The format of input depends on the ML problem of the model used for // prediction. As input source the // [gcs_source][google.cloud.automl.v1.InputConfig.gcs_source] // is expected, unless specified otherwise. // // The formats are represented in EBNF with commas being literal and with // non-terminal symbols defined near the end of this comment. The formats // are: // //

AutoML Natural Language

//
Classification
// // One or more CSV files where each line is a single column: // // GCS_FILE_PATH // // `GCS_FILE_PATH` is the Google Cloud Storage location of a text file. // Supported file extensions: .TXT, .PDF // Text files can be no larger than 10MB in size. // // Sample rows: // // gs://folder/text1.txt // gs://folder/text2.pdf // //
Sentiment Analysis
// One or more CSV files where each line is a single column: // // GCS_FILE_PATH // // `GCS_FILE_PATH` is the Google Cloud Storage location of a text file. // Supported file extensions: .TXT, .PDF // Text files can be no larger than 128kB in size. // // Sample rows: // // gs://folder/text1.txt // gs://folder/text2.pdf // //
Entity Extraction
// // One or more JSONL (JSON Lines) files that either provide inline text or // documents. You can only use one format, either inline text or documents, // for a single call to [AutoMl.BatchPredict]. // // Each JSONL file contains a per line a proto that // wraps a temporary user-assigned TextSnippet ID (string up to 2000 // characters long) called "id", a TextSnippet proto (in // JSON representation) and zero or more TextFeature protos. Any given // text snippet content must have 30,000 characters or less, and also // be UTF-8 NFC encoded (ASCII already is). The IDs provided should be // unique. // // Each document JSONL file contains, per line, a proto that wraps a // Document proto with `input_config` set. Only PDF documents are // currently supported, and each PDF document cannot exceed 2MB in size. // // Each JSONL file must not exceed 100MB in size, and no more than 20 // JSONL files may be passed. // // Sample inline JSONL file (Shown with artificial line // breaks. Actual line breaks are denoted by "\n".): // // { // "id": "my_first_id", // "text_snippet": { "content": "dog car cat"}, // "text_features": [ // { // "text_segment": {"start_offset": 4, "end_offset": 6}, // "structural_type": PARAGRAPH, // "bounding_poly": { // "normalized_vertices": [ // {"x": 0.1, "y": 0.1}, // {"x": 0.1, "y": 0.3}, // {"x": 0.3, "y": 0.3}, // {"x": 0.3, "y": 0.1}, // ] // }, // } // ], // }\n // { // "id": "2", // "text_snippet": { // "content": "Extended sample content", // "mime_type": "text/plain" // } // } // // Sample document JSONL file (Shown with artificial line // breaks. Actual line breaks are denoted by "\n".): // // { // "document": { // "input_config": { // "gcs_source": { "input_uris": [ "gs://folder/document1.pdf" ] // } // } // } // }\n // { // "document": { // "input_config": { // "gcs_source": { "input_uris": [ "gs://folder/document2.pdf" ] // } // } // } // } //
//
// // **Input field definitions:** // // `GCS_FILE_PATH` // : The path to a file on Google Cloud Storage. For example, // "gs://folder/video.avi". // // **Errors:** // // If any of the provided CSV files can't be parsed or if more than certain // percent of CSV rows cannot be processed then the operation fails and // prediction does not happen. Regardless of overall success or failure the // per-row failures, up to a certain count cap, will be listed in // Operation.metadata.partial_failures. message BatchPredictInputConfig { // The source of the input. oneof source { // Required. The Google Cloud Storage location for the input content. GcsSource gcs_source = 1 [(google.api.field_behavior) = REQUIRED]; } } // Input configuration of an [Document][google.cloud.automl.v1.Image]. message ImageInputConfig { // The Google Cloud Storage location of the document file. Only a single path // should be given. GcsSource gcs_source = 1; } // Input configuration of a [Document][google.cloud.automl.v1.Document]. message DocumentInputConfig { // The Google Cloud Storage location of the document file. Only a single path // should be given. // // Max supported size: 512MB. // // Supported extensions: .PDF. GcsSource gcs_source = 1; } // Output configuration for ExportData. // // As destination the // [gcs_destination][google.cloud.automl.v1.OutputConfig.gcs_destination] // must be set unless specified otherwise for a domain. If gcs_destination is // set then in the given directory a new directory is created. Its name // will be "export_data--", // where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. // Only ground truth annotations are exported (not approved annotations are // not exported). // // The outputs correspond to how the data was imported, and may be used as // input to import data. The output formats are represented as EBNF with literal // commas and same non-terminal symbols definitions are these in import data's // [InputConfig][google.cloud.automl.v1.InputConfig]: // // * For Image Classification: // CSV file(s) `image_classification_1.csv`, // `image_classification_2.csv`,...,`image_classification_N.csv`with // each line in format: // ML_USE,GCS_FILE_PATH,LABEL,LABEL,... // where GCS_FILE_PATHs point at the original, source locations of the // imported images. // For MULTICLASS classification type, there can be at most one LABEL // per example. // // * For Image Object Detection: // CSV file(s) `image_object_detection_1.csv`, // `image_object_detection_2.csv`,...,`image_object_detection_N.csv` // with each line in format: // ML_USE,GCS_FILE_PATH,[LABEL],(BOUNDING_BOX | ,,,,,,,) // where GCS_FILE_PATHs point at the original, source locations of the // imported images. // // * For Text Classification: // In the created directory CSV file(s) `text_classification_1.csv`, // `text_classification_2.csv`, ...,`text_classification_N.csv` will be // created where N depends on the total number of examples exported. // Each line in the CSV is of the format: // ML_USE,GCS_FILE_PATH,LABEL,LABEL,... // where GCS_FILE_PATHs point at the exported .txt files containing // the text content of the imported example. For MULTICLASS // classification type, there will be at most one LABEL per example. // // * For Text Sentiment: // In the created directory CSV file(s) `text_sentiment_1.csv`, // `text_sentiment_2.csv`, ...,`text_sentiment_N.csv` will be // created where N depends on the total number of examples exported. // Each line in the CSV is of the format: // ML_USE,GCS_FILE_PATH,SENTIMENT // where GCS_FILE_PATHs point at the exported .txt files containing // the text content of the imported example. // // * For Text Extraction: // CSV file `text_extraction.csv`, with each line in format: // ML_USE,GCS_FILE_PATH // GCS_FILE_PATH leads to a .JSONL (i.e. JSON Lines) file which // contains, per line, a proto that wraps a TextSnippet proto (in json // representation) followed by AnnotationPayload protos (called // annotations). If initially documents had been imported, the JSONL // will point at the original, source locations of the imported // documents. // // * For Translation: // CSV file `translation.csv`, with each line in format: // ML_USE,GCS_FILE_PATH // GCS_FILE_PATH leads to a .TSV file which describes examples that have // given ML_USE, using the following row format per line: // TEXT_SNIPPET (in source language) \t TEXT_SNIPPET (in target // language) message OutputConfig { // The destination of the output. oneof destination { // Required. The Google Cloud Storage location where the output is to be // written to. For Image Object Detection, Text Extraction in the given // directory a new directory will be created with name: // export_data-- where // timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. All export // output will be written into that directory. GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED]; } } // Output configuration for BatchPredict Action. // // As destination the // // [gcs_destination][google.cloud.automl.v1.BatchPredictOutputConfig.gcs_destination] // must be set unless specified otherwise for a domain. If gcs_destination is // set then in the given directory a new directory is created. Its name // will be // "prediction--", // where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. The contents // of it depends on the ML problem the predictions are made for. // // * For Text Classification: // In the created directory files `text_classification_1.jsonl`, // `text_classification_2.jsonl`,...,`text_classification_N.jsonl` // will be created, where N may be 1, and depends on the // total number of inputs and annotations found. // // Each .JSONL file will contain, per line, a JSON representation of a // proto that wraps input text (or pdf) file in // the text snippet (or document) proto and a list of // zero or more AnnotationPayload protos (called annotations), which // have classification detail populated. A single text (or pdf) file // will be listed only once with all its annotations, and its // annotations will never be split across files. // // If prediction for any text (or pdf) file failed (partially or // completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,..., // `errors_N.jsonl` files will be created (N depends on total number of // failed predictions). These files will have a JSON representation of a // proto that wraps input text (or pdf) file followed by exactly one // // [`google.rpc.Status`](https: // //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto) // containing only `code` and `message`. // // * For Text Sentiment: // In the created directory files `text_sentiment_1.jsonl`, // `text_sentiment_2.jsonl`,...,`text_sentiment_N.jsonl` // will be created, where N may be 1, and depends on the // total number of inputs and annotations found. // // Each .JSONL file will contain, per line, a JSON representation of a // proto that wraps input text (or pdf) file in // the text snippet (or document) proto and a list of // zero or more AnnotationPayload protos (called annotations), which // have text_sentiment detail populated. A single text (or pdf) file // will be listed only once with all its annotations, and its // annotations will never be split across files. // // If prediction for any text (or pdf) file failed (partially or // completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,..., // `errors_N.jsonl` files will be created (N depends on total number of // failed predictions). These files will have a JSON representation of a // proto that wraps input text (or pdf) file followed by exactly one // // [`google.rpc.Status`](https: // //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto) // containing only `code` and `message`. // // * For Text Extraction: // In the created directory files `text_extraction_1.jsonl`, // `text_extraction_2.jsonl`,...,`text_extraction_N.jsonl` // will be created, where N may be 1, and depends on the // total number of inputs and annotations found. // The contents of these .JSONL file(s) depend on whether the input // used inline text, or documents. // If input was inline, then each .JSONL file will contain, per line, // a JSON representation of a proto that wraps given in request text // snippet's "id" (if specified), followed by input text snippet, // and a list of zero or more // AnnotationPayload protos (called annotations), which have // text_extraction detail populated. A single text snippet will be // listed only once with all its annotations, and its annotations will // never be split across files. // If input used documents, then each .JSONL file will contain, per // line, a JSON representation of a proto that wraps given in request // document proto, followed by its OCR-ed representation in the form // of a text snippet, finally followed by a list of zero or more // AnnotationPayload protos (called annotations), which have // text_extraction detail populated and refer, via their indices, to // the OCR-ed text snippet. A single document (and its text snippet) // will be listed only once with all its annotations, and its // annotations will never be split across files. // If prediction for any text snippet failed (partially or completely), // then additional `errors_1.jsonl`, `errors_2.jsonl`,..., // `errors_N.jsonl` files will be created (N depends on total number of // failed predictions). These files will have a JSON representation of a // proto that wraps either the "id" : "" (in case of inline) // or the document proto (in case of document) but here followed by // exactly one [`google.rpc.Status`](https: // //github.com/googleapis/googleapis/blob/master/google/rpc/status.proto) // containing only `code` and `message`. message BatchPredictOutputConfig { // The destination of the output. oneof destination { // Required. The Google Cloud Storage location of the directory where the // output is to be written to. GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED]; } } // Output configuration for ModelExport Action. message ModelExportOutputConfig { // The destination of the output. oneof destination { // Required. The Google Cloud Storage location where the model is to be // written to. This location may only be set for the following model // formats: // "tflite", "edgetpu_tflite", "tf_saved_model", "tf_js", "core_ml". // // Under the directory given as the destination a new one with name // "model-export--", // where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format, // will be created. Inside the model and any of its supporting files // will be written. GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED]; } // The format in which the model must be exported. The available, and default, // formats depend on the problem and model type (if given problem and type // combination doesn't have a format listed, it means its models are not // exportable): // // * For Image Classification mobile-low-latency-1, mobile-versatile-1, // mobile-high-accuracy-1: // "tflite" (default), "edgetpu_tflite", "tf_saved_model", "tf_js". // // * For Image Classification mobile-core-ml-low-latency-1, // mobile-core-ml-versatile-1, mobile-core-ml-high-accuracy-1: // "core_ml" (default). // // * For Image Object Detection mobile-low-latency-1, mobile-versatile-1, // mobile-high-accuracy-1: // "tflite", "tf_saved_model", "tf_js". // Formats description: // // * tflite - Used for Android mobile devices. // * edgetpu_tflite - Used for [Edge TPU](https://cloud.google.com/edge-tpu/) // devices. // * tf_saved_model - A tensorflow model in SavedModel format. // * tf_js - A [TensorFlow.js](https://www.tensorflow.org/js) model that can // be used in the browser and in Node.js using JavaScript.x` // * core_ml - Used for iOS mobile devices. string model_format = 4; // Additional model-type and format specific parameters describing the // requirements for the to be exported model files, any string must be up to // 25000 characters long. map params = 2; } // The Google Cloud Storage location for the input content. message GcsSource { // Required. Google Cloud Storage URIs to input files, up to 2000 // characters long. Accepted forms: // * Full object path, e.g. gs://bucket/directory/object.csv repeated string input_uris = 1 [(google.api.field_behavior) = REQUIRED]; } // The Google Cloud Storage location where the output is to be written to. message GcsDestination { // Required. Google Cloud Storage URI to output directory, up to 2000 // characters long. // Accepted forms: // * Prefix path: gs://bucket/directory // The requesting user must have write permission to the bucket. // The directory is created if it doesn't exist. string output_uri_prefix = 1 [(google.api.field_behavior) = REQUIRED]; }