// Copyright 2023 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.translation.v3; import "google/api/annotations.proto"; import "google/api/client.proto"; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/longrunning/operations.proto"; import "google/protobuf/timestamp.proto"; option cc_enable_arenas = true; option csharp_namespace = "Google.Cloud.Translate.V3"; option go_package = "cloud.google.com/go/translate/apiv3/translatepb;translatepb"; option java_multiple_files = true; option java_outer_classname = "TranslationServiceProto"; option java_package = "com.google.cloud.translate.v3"; option php_namespace = "Google\\Cloud\\Translate\\V3"; option ruby_package = "Google::Cloud::Translate::V3"; // Proto file for the Cloud Translation API (v3 GA). // Provides natural language translation operations. service TranslationService { option (google.api.default_host) = "translate.googleapis.com"; option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform," "https://www.googleapis.com/auth/cloud-translation"; // Translates input text and returns translated text. rpc TranslateText(TranslateTextRequest) returns (TranslateTextResponse) { option (google.api.http) = { post: "/v3/{parent=projects/*/locations/*}:translateText" body: "*" additional_bindings { post: "/v3/{parent=projects/*}:translateText" body: "*" } }; option (google.api.method_signature) = "parent,target_language_code,contents"; option (google.api.method_signature) = "parent,model,mime_type,source_language_code,target_language_code,contents"; } // Detects the language of text within a request. rpc DetectLanguage(DetectLanguageRequest) returns (DetectLanguageResponse) { option (google.api.http) = { post: "/v3/{parent=projects/*/locations/*}:detectLanguage" body: "*" additional_bindings { post: "/v3/{parent=projects/*}:detectLanguage" body: "*" } }; option (google.api.method_signature) = "parent,model,mime_type,content"; } // Returns a list of supported languages for translation. rpc GetSupportedLanguages(GetSupportedLanguagesRequest) returns (SupportedLanguages) { option (google.api.http) = { get: "/v3/{parent=projects/*/locations/*}/supportedLanguages" additional_bindings { get: "/v3/{parent=projects/*}/supportedLanguages" } }; option (google.api.method_signature) = "parent,model,display_language_code"; } // Translates documents in synchronous mode. rpc TranslateDocument(TranslateDocumentRequest) returns (TranslateDocumentResponse) { option (google.api.http) = { post: "/v3/{parent=projects/*/locations/*}:translateDocument" body: "*" }; } // Translates a large volume of text in asynchronous batch mode. // This function provides real-time output as the inputs are being processed. // If caller cancels a request, the partial results (for an input file, it's // all or nothing) may still be available on the specified output location. // // This call returns immediately and you can // use google.longrunning.Operation.name to poll the status of the call. rpc BatchTranslateText(BatchTranslateTextRequest) returns (google.longrunning.Operation) { option (google.api.http) = { post: "/v3/{parent=projects/*/locations/*}:batchTranslateText" body: "*" }; option (google.longrunning.operation_info) = { response_type: "BatchTranslateResponse" metadata_type: "BatchTranslateMetadata" }; } // Translates a large volume of document in asynchronous batch mode. // This function provides real-time output as the inputs are being processed. // If caller cancels a request, the partial results (for an input file, it's // all or nothing) may still be available on the specified output location. // // This call returns immediately and you can use // google.longrunning.Operation.name to poll the status of the call. rpc BatchTranslateDocument(BatchTranslateDocumentRequest) returns (google.longrunning.Operation) { option (google.api.http) = { post: "/v3/{parent=projects/*/locations/*}:batchTranslateDocument" body: "*" }; option (google.api.method_signature) = "parent,source_language_code,target_language_codes,input_configs,output_config"; option (google.longrunning.operation_info) = { response_type: "BatchTranslateDocumentResponse" metadata_type: "BatchTranslateDocumentMetadata" }; } // Creates a glossary and returns the long-running operation. Returns // NOT_FOUND, if the project doesn't exist. rpc CreateGlossary(CreateGlossaryRequest) returns (google.longrunning.Operation) { option (google.api.http) = { post: "/v3/{parent=projects/*/locations/*}/glossaries" body: "glossary" }; option (google.api.method_signature) = "parent,glossary"; option (google.longrunning.operation_info) = { response_type: "Glossary" metadata_type: "CreateGlossaryMetadata" }; } // Lists glossaries in a project. Returns NOT_FOUND, if the project doesn't // exist. rpc ListGlossaries(ListGlossariesRequest) returns (ListGlossariesResponse) { option (google.api.http) = { get: "/v3/{parent=projects/*/locations/*}/glossaries" }; option (google.api.method_signature) = "parent"; } // Gets a glossary. Returns NOT_FOUND, if the glossary doesn't // exist. rpc GetGlossary(GetGlossaryRequest) returns (Glossary) { option (google.api.http) = { get: "/v3/{name=projects/*/locations/*/glossaries/*}" }; option (google.api.method_signature) = "name"; } // Deletes a glossary, or cancels glossary construction // if the glossary isn't created yet. // Returns NOT_FOUND, if the glossary doesn't exist. rpc DeleteGlossary(DeleteGlossaryRequest) returns (google.longrunning.Operation) { option (google.api.http) = { delete: "/v3/{name=projects/*/locations/*/glossaries/*}" }; option (google.api.method_signature) = "name"; option (google.longrunning.operation_info) = { response_type: "DeleteGlossaryResponse" metadata_type: "DeleteGlossaryMetadata" }; } } // Configures which glossary should be used for a specific target language, // and defines options for applying that glossary. message TranslateTextGlossaryConfig { // Required. The `glossary` to be applied for this translation. // // The format depends on the glossary: // // - User-provided custom glossary: // `projects/{project-number-or-id}/locations/{location-id}/glossaries/{glossary-id}` string glossary = 1 [(google.api.field_behavior) = REQUIRED]; // Optional. Indicates match is case insensitive. The default value is `false` // if missing. bool ignore_case = 2 [(google.api.field_behavior) = OPTIONAL]; } // The request message for synchronous translation. message TranslateTextRequest { // Required. The content of the input in string format. // We recommend the total content be less than 30,000 codepoints. The max // length of this field is 1024. Use BatchTranslateText for larger text. repeated string contents = 1 [(google.api.field_behavior) = REQUIRED]; // Optional. The format of the source text, for example, "text/html", // "text/plain". If left blank, the MIME type defaults to "text/html". string mime_type = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. The ISO-639 language code of the input text if // known, for example, "en-US" or "sr-Latn". Supported language codes are // listed in Language Support. If the source language isn't specified, the API // attempts to identify the source language automatically and returns the // source language within the response. string source_language_code = 4 [(google.api.field_behavior) = OPTIONAL]; // Required. The ISO-639 language code to use for translation of the input // text, set to one of the language codes listed in Language Support. string target_language_code = 5 [(google.api.field_behavior) = REQUIRED]; // Required. Project or location to make a call. Must refer to a caller's // project. // // Format: `projects/{project-number-or-id}` or // `projects/{project-number-or-id}/locations/{location-id}`. // // For global calls, use `projects/{project-number-or-id}/locations/global` or // `projects/{project-number-or-id}`. // // Non-global location is required for requests using AutoML models or // custom glossaries. // // Models and glossaries must be within the same region (have same // location-id), otherwise an INVALID_ARGUMENT (400) error is returned. string parent = 8 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "locations.googleapis.com/Location" } ]; // Optional. The `model` type requested for this translation. // // The format depends on model type: // // - AutoML Translation models: // `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}` // // - General (built-in) models: // `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`, // // // For global (non-regionalized) requests, use `location-id` `global`. // For example, // `projects/{project-number-or-id}/locations/global/models/general/nmt`. // // If not provided, the default Google model (NMT) will be used. string model = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. Glossary to be applied. The glossary must be // within the same region (have the same location-id) as the model, otherwise // an INVALID_ARGUMENT (400) error is returned. TranslateTextGlossaryConfig glossary_config = 7 [(google.api.field_behavior) = OPTIONAL]; // Optional. The labels with user-defined metadata for the request. // // Label keys and values can be no longer than 63 characters // (Unicode codepoints), can only contain lowercase letters, numeric // characters, underscores and dashes. International characters are allowed. // Label values are optional. Label keys must start with a letter. // // See https://cloud.google.com/translate/docs/advanced/labels for more // information. map labels = 10 [(google.api.field_behavior) = OPTIONAL]; } message TranslateTextResponse { // Text translation responses with no glossary applied. // This field has the same length as // [`contents`][google.cloud.translation.v3.TranslateTextRequest.contents]. repeated Translation translations = 1; // Text translation responses if a glossary is provided in the request. // This can be the same as // [`translations`][google.cloud.translation.v3.TranslateTextResponse.translations] // if no terms apply. This field has the same length as // [`contents`][google.cloud.translation.v3.TranslateTextRequest.contents]. repeated Translation glossary_translations = 3; } // A single translation response. message Translation { // Text translated into the target language. // If an error occurs during translation, this field might be excluded from // the response. string translated_text = 1; // Only present when `model` is present in the request. // `model` here is normalized to have project number. // // For example: // If the `model` requested in TranslationTextRequest is // `projects/{project-id}/locations/{location-id}/models/general/nmt` then // `model` here would be normalized to // `projects/{project-number}/locations/{location-id}/models/general/nmt`. string model = 2; // The ISO-639 language code of source text in the initial request, detected // automatically, if no source language was passed within the initial // request. If the source language was passed, auto-detection of the language // does not occur and this field is empty. string detected_language_code = 4; // The `glossary_config` used for this translation. TranslateTextGlossaryConfig glossary_config = 3; } // The request message for language detection. message DetectLanguageRequest { // Required. Project or location to make a call. Must refer to a caller's // project. // // Format: `projects/{project-number-or-id}/locations/{location-id}` or // `projects/{project-number-or-id}`. // // For global calls, use `projects/{project-number-or-id}/locations/global` or // `projects/{project-number-or-id}`. // // Only models within the same region (has same location-id) can be used. // Otherwise an INVALID_ARGUMENT (400) error is returned. string parent = 5 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "locations.googleapis.com/Location" } ]; // Optional. The language detection model to be used. // // Format: // `projects/{project-number-or-id}/locations/{location-id}/models/language-detection/{model-id}` // // Only one language detection model is currently supported: // `projects/{project-number-or-id}/locations/{location-id}/models/language-detection/default`. // // If not specified, the default model is used. string model = 4 [(google.api.field_behavior) = OPTIONAL]; // Required. The source of the document from which to detect the language. oneof source { // The content of the input stored as a string. string content = 1; } // Optional. The format of the source text, for example, "text/html", // "text/plain". If left blank, the MIME type defaults to "text/html". string mime_type = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. The labels with user-defined metadata for the request. // // Label keys and values can be no longer than 63 characters // (Unicode codepoints), can only contain lowercase letters, numeric // characters, underscores and dashes. International characters are allowed. // Label values are optional. Label keys must start with a letter. // // See https://cloud.google.com/translate/docs/advanced/labels for more // information. map labels = 6 [(google.api.field_behavior) = OPTIONAL]; } // The response message for language detection. message DetectedLanguage { // The ISO-639 language code of the source content in the request, detected // automatically. string language_code = 1; // The confidence of the detection result for this language. float confidence = 2; } // The response message for language detection. message DetectLanguageResponse { // The most probable language detected by the Translation API. For each // request, the Translation API will always return only one result. repeated DetectedLanguage languages = 1; } // The request message for discovering supported languages. message GetSupportedLanguagesRequest { // Required. Project or location to make a call. Must refer to a caller's // project. // // Format: `projects/{project-number-or-id}` or // `projects/{project-number-or-id}/locations/{location-id}`. // // For global calls, use `projects/{project-number-or-id}/locations/global` or // `projects/{project-number-or-id}`. // // Non-global location is required for AutoML models. // // Only models within the same region (have same location-id) can be used, // otherwise an INVALID_ARGUMENT (400) error is returned. string parent = 3 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "locations.googleapis.com/Location" } ]; // Optional. The language to use to return localized, human readable names // of supported languages. If missing, then display names are not returned // in a response. string display_language_code = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. Get supported languages of this model. // // The format depends on model type: // // - AutoML Translation models: // `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}` // // - General (built-in) models: // `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`, // // // Returns languages supported by the specified model. // If missing, we get supported languages of Google general NMT model. string model = 2 [(google.api.field_behavior) = OPTIONAL]; } // The response message for discovering supported languages. message SupportedLanguages { // A list of supported language responses. This list contains an entry // for each language the Translation API supports. repeated SupportedLanguage languages = 1; } // A single supported language response corresponds to information related // to one supported language. message SupportedLanguage { // Supported language code, generally consisting of its ISO 639-1 // identifier, for example, 'en', 'ja'. In certain cases, ISO-639 codes // including language and region identifiers are returned (for example, // 'zh-TW' and 'zh-CN'). string language_code = 1; // Human-readable name of the language localized in the display language // specified in the request. string display_name = 2; // Can be used as a source language. bool support_source = 3; // Can be used as a target language. bool support_target = 4; } // The Google Cloud Storage location for the input content. message GcsSource { // Required. Source data URI. For example, `gs://my_bucket/my_object`. string input_uri = 1 [(google.api.field_behavior) = REQUIRED]; } // Input configuration for BatchTranslateText request. message InputConfig { // Optional. Can be "text/plain" or "text/html". // For `.tsv`, "text/html" is used if mime_type is missing. // For `.html`, this field must be "text/html" or empty. // For `.txt`, this field must be "text/plain" or empty. string mime_type = 1 [(google.api.field_behavior) = OPTIONAL]; // Required. Specify the input. oneof source { // Required. Google Cloud Storage location for the source input. // This can be a single file (for example, // `gs://translation-test/input.tsv`) or a wildcard (for example, // `gs://translation-test/*`). If a file extension is `.tsv`, it can // contain either one or two columns. The first column (optional) is the id // of the text request. If the first column is missing, we use the row // number (0-based) from the input file as the ID in the output file. The // second column is the actual text to be // translated. We recommend each row be <= 10K Unicode codepoints, // otherwise an error might be returned. // Note that the input tsv must be RFC 4180 compliant. // // You could use https://github.com/Clever/csvlint to check potential // formatting errors in your tsv file. // csvlint --delimiter='\t' your_input_file.tsv // // The other supported file extensions are `.txt` or `.html`, which is // treated as a single large chunk of text. GcsSource gcs_source = 2; } } // The Google Cloud Storage location for the output content. message GcsDestination { // Required. The bucket used in 'output_uri_prefix' must exist and there must // be no files under 'output_uri_prefix'. 'output_uri_prefix' must end with // "/" and start with "gs://". One 'output_uri_prefix' can only be used by one // batch translation job at a time. Otherwise an INVALID_ARGUMENT (400) error // is returned. string output_uri_prefix = 1 [(google.api.field_behavior) = REQUIRED]; } // Output configuration for BatchTranslateText request. message OutputConfig { // Required. The destination of output. oneof destination { // Google Cloud Storage destination for output content. // For every single input file (for example, gs://a/b/c.[extension]), we // generate at most 2 * n output files. (n is the # of target_language_codes // in the BatchTranslateTextRequest). // // Output files (tsv) generated are compliant with RFC 4180 except that // record delimiters are '\n' instead of '\r\n'. We don't provide any way to // change record delimiters. // // While the input files are being processed, we write/update an index file // 'index.csv' under 'output_uri_prefix' (for example, // gs://translation-test/index.csv) The index file is generated/updated as // new files are being translated. The format is: // // input_file,target_language_code,translations_file,errors_file, // glossary_translations_file,glossary_errors_file // // input_file is one file we matched using gcs_source.input_uri. // target_language_code is provided in the request. // translations_file contains the translations. (details provided below) // errors_file contains the errors during processing of the file. (details // below). Both translations_file and errors_file could be empty // strings if we have no content to output. // glossary_translations_file and glossary_errors_file are always empty // strings if the input_file is tsv. They could also be empty if we have no // content to output. // // Once a row is present in index.csv, the input/output matching never // changes. Callers should also expect all the content in input_file are // processed and ready to be consumed (that is, no partial output file is // written). // // Since index.csv will be keeping updated during the process, please make // sure there is no custom retention policy applied on the output bucket // that may avoid file updating. // (https://cloud.google.com/storage/docs/bucket-lock#retention-policy) // // The format of translations_file (for target language code 'trg') is: // `gs://translation_test/a_b_c_'trg'_translations.[extension]` // // If the input file extension is tsv, the output has the following // columns: // Column 1: ID of the request provided in the input, if it's not // provided in the input, then the input row number is used (0-based). // Column 2: source sentence. // Column 3: translation without applying a glossary. Empty string if there // is an error. // Column 4 (only present if a glossary is provided in the request): // translation after applying the glossary. Empty string if there is an // error applying the glossary. Could be same string as column 3 if there is // no glossary applied. // // If input file extension is a txt or html, the translation is directly // written to the output file. If glossary is requested, a separate // glossary_translations_file has format of // `gs://translation_test/a_b_c_'trg'_glossary_translations.[extension]` // // The format of errors file (for target language code 'trg') is: // `gs://translation_test/a_b_c_'trg'_errors.[extension]` // // If the input file extension is tsv, errors_file contains the following: // Column 1: ID of the request provided in the input, if it's not // provided in the input, then the input row number is used (0-based). // Column 2: source sentence. // Column 3: Error detail for the translation. Could be empty. // Column 4 (only present if a glossary is provided in the request): // Error when applying the glossary. // // If the input file extension is txt or html, glossary_error_file will be // generated that contains error details. glossary_error_file has format of // `gs://translation_test/a_b_c_'trg'_glossary_errors.[extension]` GcsDestination gcs_destination = 1; } } // A document translation request input config. message DocumentInputConfig { // Specifies the source for the document's content. // The input file size should be <= 20MB for // - application/vnd.openxmlformats-officedocument.wordprocessingml.document // - application/vnd.openxmlformats-officedocument.presentationml.presentation // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet // The input file size should be <= 20MB and the maximum page limit is 20 for // - application/pdf oneof source { // Document's content represented as a stream of bytes. bytes content = 1; // Google Cloud Storage location. This must be a single file. // For example: gs://example_bucket/example_file.pdf GcsSource gcs_source = 2; } // Specifies the input document's mime_type. // // If not specified it will be determined using the file extension for // gcs_source provided files. For a file provided through bytes content the // mime_type must be provided. // Currently supported mime types are: // - application/pdf // - application/vnd.openxmlformats-officedocument.wordprocessingml.document // - application/vnd.openxmlformats-officedocument.presentationml.presentation // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet string mime_type = 4; } // A document translation request output config. message DocumentOutputConfig { // A URI destination for the translated document. // It is optional to provide a destination. If provided the results from // TranslateDocument will be stored in the destination. // Whether a destination is provided or not, the translated documents will be // returned within TranslateDocumentResponse.document_translation and // TranslateDocumentResponse.glossary_document_translation. oneof destination { // Optional. Google Cloud Storage destination for the translation output, // e.g., `gs://my_bucket/my_directory/`. // // The destination directory provided does not have to be empty, but the // bucket must exist. If a file with the same name as the output file // already exists in the destination an error will be returned. // // For a DocumentInputConfig.contents provided document, the output file // will have the name "output_[trg]_translations.[ext]", where // - [trg] corresponds to the translated file's language code, // - [ext] corresponds to the translated file's extension according to its // mime type. // // // For a DocumentInputConfig.gcs_uri provided document, the output file will // have a name according to its URI. For example: an input file with URI: // `gs://a/b/c.[extension]` stored in a gcs_destination bucket with name // "my_bucket" will have an output URI: // `gs://my_bucket/a_b_c_[trg]_translations.[ext]`, where // - [trg] corresponds to the translated file's language code, // - [ext] corresponds to the translated file's extension according to its // mime type. // // // If the document was directly provided through the request, then the // output document will have the format: // `gs://my_bucket/translated_document_[trg]_translations.[ext]`, where // - [trg] corresponds to the translated file's language code, // - [ext] corresponds to the translated file's extension according to its // mime type. // // If a glossary was provided, then the output URI for the glossary // translation will be equal to the default output URI but have // `glossary_translations` instead of `translations`. For the previous // example, its glossary URI would be: // `gs://my_bucket/a_b_c_[trg]_glossary_translations.[ext]`. // // Thus the max number of output files will be 2 (Translated document, // Glossary translated document). // // Callers should expect no partial outputs. If there is any error during // document translation, no output will be stored in the Cloud Storage // bucket. GcsDestination gcs_destination = 1 [(google.api.field_behavior) = OPTIONAL]; } // Optional. Specifies the translated document's mime_type. // If not specified, the translated file's mime type will be the same as the // input file's mime type. // Currently only support the output mime type to be the same as input mime // type. // - application/pdf // - application/vnd.openxmlformats-officedocument.wordprocessingml.document // - application/vnd.openxmlformats-officedocument.presentationml.presentation // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet string mime_type = 3 [(google.api.field_behavior) = OPTIONAL]; } // A document translation request. message TranslateDocumentRequest { // Required. Location to make a regional call. // // Format: `projects/{project-number-or-id}/locations/{location-id}`. // // For global calls, use `projects/{project-number-or-id}/locations/global` or // `projects/{project-number-or-id}`. // // Non-global location is required for requests using AutoML models or custom // glossaries. // // Models and glossaries must be within the same region (have the same // location-id), otherwise an INVALID_ARGUMENT (400) error is returned. string parent = 1 [(google.api.field_behavior) = REQUIRED]; // Optional. The ISO-639 language code of the input document if known, for // example, "en-US" or "sr-Latn". Supported language codes are listed in // Language Support. If the source language isn't specified, the API attempts // to identify the source language automatically and returns the source // language within the response. Source language must be specified if the // request contains a glossary or a custom model. string source_language_code = 2 [(google.api.field_behavior) = OPTIONAL]; // Required. The ISO-639 language code to use for translation of the input // document, set to one of the language codes listed in Language Support. string target_language_code = 3 [(google.api.field_behavior) = REQUIRED]; // Required. Input configurations. DocumentInputConfig document_input_config = 4 [(google.api.field_behavior) = REQUIRED]; // Optional. Output configurations. // Defines if the output file should be stored within Cloud Storage as well // as the desired output format. If not provided the translated file will // only be returned through a byte-stream and its output mime type will be // the same as the input file's mime type. DocumentOutputConfig document_output_config = 5 [(google.api.field_behavior) = OPTIONAL]; // Optional. The `model` type requested for this translation. // // The format depends on model type: // // - AutoML Translation models: // `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}` // // - General (built-in) models: // `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`, // // // If not provided, the default Google model (NMT) will be used for // translation. string model = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. Glossary to be applied. The glossary must be within the same // region (have the same location-id) as the model, otherwise an // INVALID_ARGUMENT (400) error is returned. TranslateTextGlossaryConfig glossary_config = 7 [(google.api.field_behavior) = OPTIONAL]; // Optional. The labels with user-defined metadata for the request. // // Label keys and values can be no longer than 63 characters (Unicode // codepoints), can only contain lowercase letters, numeric characters, // underscores and dashes. International characters are allowed. Label values // are optional. Label keys must start with a letter. // // See https://cloud.google.com/translate/docs/advanced/labels for more // information. map labels = 8 [(google.api.field_behavior) = OPTIONAL]; // Optional. This flag is to support user customized attribution. // If not provided, the default is `Machine Translated by Google`. // Customized attribution should follow rules in // https://cloud.google.com/translate/attribution#attribution_and_logos string customized_attribution = 10 [(google.api.field_behavior) = OPTIONAL]; // Optional. If true, the page limit of online native pdf translation is 300 // and only native pdf pages will be translated. bool is_translate_native_pdf_only = 11 [(google.api.field_behavior) = OPTIONAL]; // Optional. If true, use the text removal to remove the shadow text on // background image for native pdf translation. // Shadow removal feature can only be enabled when // is_translate_native_pdf_only is false bool enable_shadow_removal_native_pdf = 12 [(google.api.field_behavior) = OPTIONAL]; } // A translated document message. message DocumentTranslation { // The array of translated documents. It is expected to be size 1 for now. We // may produce multiple translated documents in the future for other type of // file formats. repeated bytes byte_stream_outputs = 1; // The translated document's mime type. string mime_type = 2; // The detected language for the input document. // If the user did not provide the source language for the input document, // this field will have the language code automatically detected. If the // source language was passed, auto-detection of the language does not occur // and this field is empty. string detected_language_code = 3; } // A translated document response message. message TranslateDocumentResponse { // Translated document. DocumentTranslation document_translation = 1; // The document's translation output if a glossary is provided in the request. // This can be the same as [TranslateDocumentResponse.document_translation] // if no glossary terms apply. DocumentTranslation glossary_document_translation = 2; // Only present when 'model' is present in the request. // 'model' is normalized to have a project number. // // For example: // If the 'model' field in TranslateDocumentRequest is: // `projects/{project-id}/locations/{location-id}/models/general/nmt` then // `model` here would be normalized to // `projects/{project-number}/locations/{location-id}/models/general/nmt`. string model = 3; // The `glossary_config` used for this translation. TranslateTextGlossaryConfig glossary_config = 4; } // The batch translation request. message BatchTranslateTextRequest { // Required. Location to make a call. Must refer to a caller's project. // // Format: `projects/{project-number-or-id}/locations/{location-id}`. // // The `global` location is not supported for batch translation. // // Only AutoML Translation models or glossaries within the same region (have // the same location-id) can be used, otherwise an INVALID_ARGUMENT (400) // error is returned. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "locations.googleapis.com/Location" } ]; // Required. Source language code. string source_language_code = 2 [(google.api.field_behavior) = REQUIRED]; // Required. Specify up to 10 language codes here. repeated string target_language_codes = 3 [(google.api.field_behavior) = REQUIRED]; // Optional. The models to use for translation. Map's key is target language // code. Map's value is model name. Value can be a built-in general model, // or an AutoML Translation model. // // The value format depends on model type: // // - AutoML Translation models: // `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}` // // - General (built-in) models: // `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`, // // // If the map is empty or a specific model is // not requested for a language pair, then default google model (nmt) is used. map models = 4 [(google.api.field_behavior) = OPTIONAL]; // Required. Input configurations. // The total number of files matched should be <= 100. // The total content size should be <= 100M Unicode codepoints. // The files must use UTF-8 encoding. repeated InputConfig input_configs = 5 [(google.api.field_behavior) = REQUIRED]; // Required. Output configuration. // If 2 input configs match to the same file (that is, same input path), // we don't generate output for duplicate inputs. OutputConfig output_config = 6 [(google.api.field_behavior) = REQUIRED]; // Optional. Glossaries to be applied for translation. // It's keyed by target language code. map glossaries = 7 [(google.api.field_behavior) = OPTIONAL]; // Optional. The labels with user-defined metadata for the request. // // Label keys and values can be no longer than 63 characters // (Unicode codepoints), can only contain lowercase letters, numeric // characters, underscores and dashes. International characters are allowed. // Label values are optional. Label keys must start with a letter. // // See https://cloud.google.com/translate/docs/advanced/labels for more // information. map labels = 9 [(google.api.field_behavior) = OPTIONAL]; } // State metadata for the batch translation operation. message BatchTranslateMetadata { // State of the job. enum State { // Invalid. STATE_UNSPECIFIED = 0; // Request is being processed. RUNNING = 1; // The batch is processed, and at least one item was successfully // processed. SUCCEEDED = 2; // The batch is done and no item was successfully processed. FAILED = 3; // Request is in the process of being canceled after caller invoked // longrunning.Operations.CancelOperation on the request id. CANCELLING = 4; // The batch is done after the user has called the // longrunning.Operations.CancelOperation. Any records processed before the // cancel command are output as specified in the request. CANCELLED = 5; } // The state of the operation. State state = 1; // Number of successfully translated characters so far (Unicode codepoints). int64 translated_characters = 2; // Number of characters that have failed to process so far (Unicode // codepoints). int64 failed_characters = 3; // Total number of characters (Unicode codepoints). // This is the total number of codepoints from input files times the number of // target languages and appears here shortly after the call is submitted. int64 total_characters = 4; // Time when the operation was submitted. google.protobuf.Timestamp submit_time = 5; } // Stored in the // [google.longrunning.Operation.response][google.longrunning.Operation.response] // field returned by BatchTranslateText if at least one sentence is translated // successfully. message BatchTranslateResponse { // Total number of characters (Unicode codepoints). int64 total_characters = 1; // Number of successfully translated characters (Unicode codepoints). int64 translated_characters = 2; // Number of characters that have failed to process (Unicode codepoints). int64 failed_characters = 3; // Time when the operation was submitted. google.protobuf.Timestamp submit_time = 4; // The time when the operation is finished and // [google.longrunning.Operation.done][google.longrunning.Operation.done] is // set to true. google.protobuf.Timestamp end_time = 5; } // Input configuration for glossaries. message GlossaryInputConfig { // Required. Specify the input. oneof source { // Required. Google Cloud Storage location of glossary data. // File format is determined based on the filename extension. API returns // [google.rpc.Code.INVALID_ARGUMENT] for unsupported URI-s and file // formats. Wildcards are not allowed. This must be a single file in one of // the following formats: // // For unidirectional glossaries: // // - TSV/CSV (`.tsv`/`.csv`): Two column file, tab- or comma-separated. // The first column is source text. The second column is target text. // No headers in this file. The first row contains data and not column // names. // // - TMX (`.tmx`): TMX file with parallel data defining source/target term // pairs. // // For equivalent term sets glossaries: // // - CSV (`.csv`): Multi-column CSV file defining equivalent glossary terms // in multiple languages. See documentation for more information - // [glossaries](https://cloud.google.com/translate/docs/advanced/glossary). GcsSource gcs_source = 1; } } // Represents a glossary built from user-provided data. message Glossary { option (google.api.resource) = { type: "translate.googleapis.com/Glossary" pattern: "projects/{project}/locations/{location}/glossaries/{glossary}" }; // Used with unidirectional glossaries. message LanguageCodePair { // Required. The ISO-639 language code of the input text, for example, // "en-US". Expected to be an exact match for GlossaryTerm.language_code. string source_language_code = 1; // Required. The ISO-639 language code for translation output, for example, // "zh-CN". Expected to be an exact match for GlossaryTerm.language_code. string target_language_code = 2; } // Used with equivalent term set glossaries. message LanguageCodesSet { // The ISO-639 language code(s) for terms defined in the glossary. // All entries are unique. The list contains at least two entries. // Expected to be an exact match for GlossaryTerm.language_code. repeated string language_codes = 1; } // Required. The resource name of the glossary. Glossary names have the form // `projects/{project-number-or-id}/locations/{location-id}/glossaries/{glossary-id}`. string name = 1 [(google.api.field_behavior) = REQUIRED]; // Languages supported by the glossary. oneof languages { // Used with unidirectional glossaries. LanguageCodePair language_pair = 3; // Used with equivalent term set glossaries. LanguageCodesSet language_codes_set = 4; } // Required. Provides examples to build the glossary from. // Total glossary must not exceed 10M Unicode codepoints. GlossaryInputConfig input_config = 5; // Output only. The number of entries defined in the glossary. int32 entry_count = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. When CreateGlossary was called. google.protobuf.Timestamp submit_time = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; // Output only. When the glossary creation was finished. google.protobuf.Timestamp end_time = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; // Optional. The display name of the glossary. string display_name = 9 [(google.api.field_behavior) = OPTIONAL]; } // Request message for CreateGlossary. message CreateGlossaryRequest { // Required. The project name. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "locations.googleapis.com/Location" } ]; // Required. The glossary to create. Glossary glossary = 2 [(google.api.field_behavior) = REQUIRED]; } // Request message for GetGlossary. message GetGlossaryRequest { // Required. The name of the glossary to retrieve. string name = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "translate.googleapis.com/Glossary" } ]; } // Request message for DeleteGlossary. message DeleteGlossaryRequest { // Required. The name of the glossary to delete. string name = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "translate.googleapis.com/Glossary" } ]; } // Request message for ListGlossaries. message ListGlossariesRequest { // Required. The name of the project from which to list all of the glossaries. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "locations.googleapis.com/Location" } ]; // Optional. Requested page size. The server may return fewer glossaries than // requested. If unspecified, the server picks an appropriate default. int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. A token identifying a page of results the server should return. // Typically, this is the value of [ListGlossariesResponse.next_page_token] // returned from the previous call to `ListGlossaries` method. // The first page is returned if `page_token`is empty or missing. string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; // Optional. Filter specifying constraints of a list operation. // Specify the constraint by the format of "key=value", where key must be // "src" or "tgt", and the value must be a valid language code. // For multiple restrictions, concatenate them by "AND" (uppercase only), // such as: "src=en-US AND tgt=zh-CN". Notice that the exact match is used // here, which means using 'en-US' and 'en' can lead to different results, // which depends on the language code you used when you create the glossary. // For the unidirectional glossaries, the "src" and "tgt" add restrictions // on the source and target language code separately. // For the equivalent term set glossaries, the "src" and/or "tgt" add // restrictions on the term set. // For example: "src=en-US AND tgt=zh-CN" will only pick the unidirectional // glossaries which exactly match the source language code as "en-US" and the // target language code "zh-CN", but all equivalent term set glossaries which // contain "en-US" and "zh-CN" in their language set will be picked. // If missing, no filtering is performed. string filter = 4 [(google.api.field_behavior) = OPTIONAL]; } // Response message for ListGlossaries. message ListGlossariesResponse { // The list of glossaries for a project. repeated Glossary glossaries = 1; // A token to retrieve a page of results. Pass this value in the // [ListGlossariesRequest.page_token] field in the subsequent call to // `ListGlossaries` method to retrieve the next page of results. string next_page_token = 2; } // Stored in the // [google.longrunning.Operation.metadata][google.longrunning.Operation.metadata] // field returned by CreateGlossary. message CreateGlossaryMetadata { // Enumerates the possible states that the creation request can be in. enum State { // Invalid. STATE_UNSPECIFIED = 0; // Request is being processed. RUNNING = 1; // The glossary was successfully created. SUCCEEDED = 2; // Failed to create the glossary. FAILED = 3; // Request is in the process of being canceled after caller invoked // longrunning.Operations.CancelOperation on the request id. CANCELLING = 4; // The glossary creation request was successfully canceled. CANCELLED = 5; } // The name of the glossary that is being created. string name = 1; // The current state of the glossary creation operation. State state = 2; // The time when the operation was submitted to the server. google.protobuf.Timestamp submit_time = 3; } // Stored in the // [google.longrunning.Operation.metadata][google.longrunning.Operation.metadata] // field returned by DeleteGlossary. message DeleteGlossaryMetadata { // Enumerates the possible states that the creation request can be in. enum State { // Invalid. STATE_UNSPECIFIED = 0; // Request is being processed. RUNNING = 1; // The glossary was successfully deleted. SUCCEEDED = 2; // Failed to delete the glossary. FAILED = 3; // Request is in the process of being canceled after caller invoked // longrunning.Operations.CancelOperation on the request id. CANCELLING = 4; // The glossary deletion request was successfully canceled. CANCELLED = 5; } // The name of the glossary that is being deleted. string name = 1; // The current state of the glossary deletion operation. State state = 2; // The time when the operation was submitted to the server. google.protobuf.Timestamp submit_time = 3; } // Stored in the // [google.longrunning.Operation.response][google.longrunning.Operation.response] // field returned by DeleteGlossary. message DeleteGlossaryResponse { // The name of the deleted glossary. string name = 1; // The time when the operation was submitted to the server. google.protobuf.Timestamp submit_time = 2; // The time when the glossary deletion is finished and // [google.longrunning.Operation.done][google.longrunning.Operation.done] is // set to true. google.protobuf.Timestamp end_time = 3; } // The BatchTranslateDocument request. message BatchTranslateDocumentRequest { // Required. Location to make a regional call. // // Format: `projects/{project-number-or-id}/locations/{location-id}`. // // The `global` location is not supported for batch translation. // // Only AutoML Translation models or glossaries within the same region (have // the same location-id) can be used, otherwise an INVALID_ARGUMENT (400) // error is returned. string parent = 1 [ (google.api.field_behavior) = REQUIRED, (google.api.resource_reference) = { type: "locations.googleapis.com/Location" } ]; // Required. The ISO-639 language code of the input document if known, for // example, "en-US" or "sr-Latn". Supported language codes are listed in // [Language Support](https://cloud.google.com/translate/docs/languages). string source_language_code = 2 [(google.api.field_behavior) = REQUIRED]; // Required. The ISO-639 language code to use for translation of the input // document. Specify up to 10 language codes here. repeated string target_language_codes = 3 [(google.api.field_behavior) = REQUIRED]; // Required. Input configurations. // The total number of files matched should be <= 100. // The total content size to translate should be <= 100M Unicode codepoints. // The files must use UTF-8 encoding. repeated BatchDocumentInputConfig input_configs = 4 [(google.api.field_behavior) = REQUIRED]; // Required. Output configuration. // If 2 input configs match to the same file (that is, same input path), // we don't generate output for duplicate inputs. BatchDocumentOutputConfig output_config = 5 [(google.api.field_behavior) = REQUIRED]; // Optional. The models to use for translation. Map's key is target language // code. Map's value is the model name. Value can be a built-in general model, // or an AutoML Translation model. // // The value format depends on model type: // // - AutoML Translation models: // `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}` // // - General (built-in) models: // `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`, // // // If the map is empty or a specific model is // not requested for a language pair, then default google model (nmt) is used. map models = 6 [(google.api.field_behavior) = OPTIONAL]; // Optional. Glossaries to be applied. It's keyed by target language code. map glossaries = 7 [(google.api.field_behavior) = OPTIONAL]; // Optional. File format conversion map to be applied to all input files. // Map's key is the original mime_type. Map's value is the target mime_type of // translated documents. // // Supported file format conversion includes: // - `application/pdf` to // `application/vnd.openxmlformats-officedocument.wordprocessingml.document` // // If nothing specified, output files will be in the same format as the // original file. map format_conversions = 8 [(google.api.field_behavior) = OPTIONAL]; // Optional. This flag is to support user customized attribution. // If not provided, the default is `Machine Translated by Google`. // Customized attribution should follow rules in // https://cloud.google.com/translate/attribution#attribution_and_logos string customized_attribution = 10 [(google.api.field_behavior) = OPTIONAL]; } // Input configuration for BatchTranslateDocument request. message BatchDocumentInputConfig { // Specify the input. oneof source { // Google Cloud Storage location for the source input. // This can be a single file (for example, // `gs://translation-test/input.docx`) or a wildcard (for example, // `gs://translation-test/*`). // // File mime type is determined based on extension. Supported mime type // includes: // - `pdf`, application/pdf // - `docx`, // application/vnd.openxmlformats-officedocument.wordprocessingml.document // - `pptx`, // application/vnd.openxmlformats-officedocument.presentationml.presentation // - `xlsx`, // application/vnd.openxmlformats-officedocument.spreadsheetml.sheet // // The max file size to support for `.docx`, `.pptx` and `.xlsx` is 100MB. // The max file size to support for `.pdf` is 1GB and the max page limit is // 1000 pages. // The max file size to support for all input documents is 1GB. GcsSource gcs_source = 1; } } // Output configuration for BatchTranslateDocument request. message BatchDocumentOutputConfig { // The destination of output. The destination directory provided must exist // and be empty. oneof destination { // Google Cloud Storage destination for output content. // For every single input document (for example, gs://a/b/c.[extension]), we // generate at most 2 * n output files. (n is the # of target_language_codes // in the BatchTranslateDocumentRequest). // // While the input documents are being processed, we write/update an index // file `index.csv` under `gcs_destination.output_uri_prefix` (for example, // gs://translation_output/index.csv) The index file is generated/updated as // new files are being translated. The format is: // // input_document,target_language_code,translation_output,error_output, // glossary_translation_output,glossary_error_output // // `input_document` is one file we matched using gcs_source.input_uri. // `target_language_code` is provided in the request. // `translation_output` contains the translations. (details provided below) // `error_output` contains the error message during processing of the file. // Both translations_file and errors_file could be empty strings if we have // no content to output. // `glossary_translation_output` and `glossary_error_output` are the // translated output/error when we apply glossaries. They could also be // empty if we have no content to output. // // Once a row is present in index.csv, the input/output matching never // changes. Callers should also expect all the content in input_file are // processed and ready to be consumed (that is, no partial output file is // written). // // Since index.csv will be keeping updated during the process, please make // sure there is no custom retention policy applied on the output bucket // that may avoid file updating. // (https://cloud.google.com/storage/docs/bucket-lock#retention-policy) // // The naming format of translation output files follows (for target // language code [trg]): `translation_output`: // `gs://translation_output/a_b_c_[trg]_translation.[extension]` // `glossary_translation_output`: // `gs://translation_test/a_b_c_[trg]_glossary_translation.[extension]`. The // output document will maintain the same file format as the input document. // // The naming format of error output files follows (for target language code // [trg]): `error_output`: `gs://translation_test/a_b_c_[trg]_errors.txt` // `glossary_error_output`: // `gs://translation_test/a_b_c_[trg]_glossary_translation.txt`. The error // output is a txt file containing error details. GcsDestination gcs_destination = 1; } } // Stored in the // [google.longrunning.Operation.response][google.longrunning.Operation.response] // field returned by BatchTranslateDocument if at least one document is // translated successfully. message BatchTranslateDocumentResponse { // Total number of pages to translate in all documents. Documents without // clear page definition (such as XLSX) are not counted. int64 total_pages = 1; // Number of successfully translated pages in all documents. Documents without // clear page definition (such as XLSX) are not counted. int64 translated_pages = 2; // Number of pages that failed to process in all documents. Documents without // clear page definition (such as XLSX) are not counted. int64 failed_pages = 3; // Number of billable pages in documents with clear page definition (such as // PDF, DOCX, PPTX) int64 total_billable_pages = 4; // Total number of characters (Unicode codepoints) in all documents. int64 total_characters = 5; // Number of successfully translated characters (Unicode codepoints) in all // documents. int64 translated_characters = 6; // Number of characters that have failed to process (Unicode codepoints) in // all documents. int64 failed_characters = 7; // Number of billable characters (Unicode codepoints) in documents without // clear page definition, such as XLSX. int64 total_billable_characters = 8; // Time when the operation was submitted. google.protobuf.Timestamp submit_time = 9; // The time when the operation is finished and // [google.longrunning.Operation.done][google.longrunning.Operation.done] is // set to true. google.protobuf.Timestamp end_time = 10; } // State metadata for the batch translation operation. message BatchTranslateDocumentMetadata { // State of the job. enum State { // Invalid. STATE_UNSPECIFIED = 0; // Request is being processed. RUNNING = 1; // The batch is processed, and at least one item was successfully processed. SUCCEEDED = 2; // The batch is done and no item was successfully processed. FAILED = 3; // Request is in the process of being canceled after caller invoked // longrunning.Operations.CancelOperation on the request id. CANCELLING = 4; // The batch is done after the user has called the // longrunning.Operations.CancelOperation. Any records processed before the // cancel command are output as specified in the request. CANCELLED = 5; } // The state of the operation. State state = 1; // Total number of pages to translate in all documents so far. Documents // without clear page definition (such as XLSX) are not counted. int64 total_pages = 2; // Number of successfully translated pages in all documents so far. Documents // without clear page definition (such as XLSX) are not counted. int64 translated_pages = 3; // Number of pages that failed to process in all documents so far. Documents // without clear page definition (such as XLSX) are not counted. int64 failed_pages = 4; // Number of billable pages in documents with clear page definition (such as // PDF, DOCX, PPTX) so far. int64 total_billable_pages = 5; // Total number of characters (Unicode codepoints) in all documents so far. int64 total_characters = 6; // Number of successfully translated characters (Unicode codepoints) in all // documents so far. int64 translated_characters = 7; // Number of characters that have failed to process (Unicode codepoints) in // all documents so far. int64 failed_characters = 8; // Number of billable characters (Unicode codepoints) in documents without // clear page definition (such as XLSX) so far. int64 total_billable_characters = 9; // Time when the operation was submitted. google.protobuf.Timestamp submit_time = 10; }