// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.language.v2; import "google/api/annotations.proto"; import "google/api/client.proto"; import "google/api/field_behavior.proto"; option go_package = "cloud.google.com/go/language/apiv2/languagepb;languagepb"; option java_multiple_files = true; option java_outer_classname = "LanguageServiceProto"; option java_package = "com.google.cloud.language.v2"; // Provides text analysis operations such as sentiment analysis and entity // recognition. service LanguageService { option (google.api.default_host) = "language.googleapis.com"; option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-language," "https://www.googleapis.com/auth/cloud-platform"; // Analyzes the sentiment of the provided text. rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) { option (google.api.http) = { post: "/v2/documents:analyzeSentiment" body: "*" }; option (google.api.method_signature) = "document,encoding_type"; option (google.api.method_signature) = "document"; } // Finds named entities (currently proper names and common nouns) in the text // along with entity types, probability, mentions for each entity, and // other properties. rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) { option (google.api.http) = { post: "/v2/documents:analyzeEntities" body: "*" }; option (google.api.method_signature) = "document,encoding_type"; option (google.api.method_signature) = "document"; } // Classifies a document into categories. rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) { option (google.api.http) = { post: "/v2/documents:classifyText" body: "*" }; option (google.api.method_signature) = "document"; } // Moderates a document for harmful and sensitive categories. rpc ModerateText(ModerateTextRequest) returns (ModerateTextResponse) { option (google.api.http) = { post: "/v2/documents:moderateText" body: "*" }; option (google.api.method_signature) = "document"; } // A convenience method that provides all features in one call. rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) { option (google.api.http) = { post: "/v2/documents:annotateText" body: "*" }; option (google.api.method_signature) = "document,features,encoding_type"; option (google.api.method_signature) = "document,features"; } } // Represents the input to API methods. message Document { // The document types enum. enum Type { // The content type is not specified. TYPE_UNSPECIFIED = 0; // Plain text PLAIN_TEXT = 1; // HTML HTML = 2; } // Required. If the type is not set or is `TYPE_UNSPECIFIED`, // returns an `INVALID_ARGUMENT` error. Type type = 1; // The source of the document: a string containing the content or a // Google Cloud Storage URI. oneof source { // The content of the input in string format. // Cloud audit logging exempt since it is based on user data. string content = 2; // The Google Cloud Storage URI where the file content is located. // This URI must be of the form: gs://bucket_name/object_name. For more // details, see https://cloud.google.com/storage/docs/reference-uris. // NOTE: Cloud Storage object versioning is not supported. string gcs_content_uri = 3; } // Optional. The language of the document (if not specified, the language is // automatically detected). Both ISO and BCP-47 language codes are // accepted.
// [Language // Support](https://cloud.google.com/natural-language/docs/languages) lists // currently supported languages for each API method. If the language (either // specified by the caller or automatically detected) is not supported by the // called API method, an `INVALID_ARGUMENT` error is returned. string language_code = 4 [(google.api.field_behavior) = OPTIONAL]; } // Represents a sentence in the input document. message Sentence { // The sentence text. TextSpan text = 1; // For calls to [AnalyzeSentiment][] or if // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_document_sentiment] // is set to true, this field will contain the sentiment for the sentence. Sentiment sentiment = 2; } // Represents the text encoding that the caller uses to process the output. // Providing an `EncodingType` is recommended because the API provides the // beginning offsets for various outputs, such as tokens and mentions, and // languages that natively use different text encodings may access offsets // differently. enum EncodingType { // If `EncodingType` is not specified, encoding-dependent information (such as // `begin_offset`) will be set at `-1`. NONE = 0; // Encoding-dependent information (such as `begin_offset`) is calculated based // on the UTF-8 encoding of the input. C++ and Go are examples of languages // that use this encoding natively. UTF8 = 1; // Encoding-dependent information (such as `begin_offset`) is calculated based // on the UTF-16 encoding of the input. Java and JavaScript are examples of // languages that use this encoding natively. UTF16 = 2; // Encoding-dependent information (such as `begin_offset`) is calculated based // on the UTF-32 encoding of the input. Python is an example of a language // that uses this encoding natively. UTF32 = 3; } // Represents a phrase in the text that is a known entity, such as // a person, an organization, or location. The API associates information, such // as probability and mentions, with entities. message Entity { // The type of the entity. For most entity types, the associated metadata is a // Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table // below lists the associated fields for entities that have different // metadata. enum Type { // Unknown UNKNOWN = 0; // Person PERSON = 1; // Location LOCATION = 2; // Organization ORGANIZATION = 3; // Event EVENT = 4; // Artwork WORK_OF_ART = 5; // Consumer product CONSUMER_GOOD = 6; // Other types of entities OTHER = 7; // Phone number // // The metadata lists the phone number, formatted according to local // convention, plus whichever additional elements appear in the text: // // * `number` - the actual number, broken down into sections as per local // convention // * `national_prefix` - country code, if detected // * `area_code` - region or area code, if detected // * `extension` - phone extension (to be dialed after connection), if // detected PHONE_NUMBER = 9; // Address // // The metadata identifies the street number and locality plus whichever // additional elements appear in the text: // // * `street_number` - street number // * `locality` - city or town // * `street_name` - street/route name, if detected // * `postal_code` - postal code, if detected // * `country` - country, if detected< // * `broad_region` - administrative area, such as the state, if detected // * `narrow_region` - smaller administrative area, such as county, if // detected // * `sublocality` - used in Asian addresses to demark a district within a // city, if detected ADDRESS = 10; // Date // // The metadata identifies the components of the date: // // * `year` - four digit year, if detected // * `month` - two digit month number, if detected // * `day` - two digit day number, if detected DATE = 11; // Number // // The metadata is the number itself. NUMBER = 12; // Price // // The metadata identifies the `value` and `currency`. PRICE = 13; } // The representative name for the entity. string name = 1; // The entity type. Type type = 2; // Metadata associated with the entity. // // For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`) // and Knowledge Graph MID (`mid`), if they are available. For the metadata // associated with other entity types, see the Type table below. map metadata = 3; // The mentions of this entity in the input document. The API currently // supports proper noun mentions. repeated EntityMention mentions = 5; // For calls to [AnalyzeEntitySentiment][] or if // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entity_sentiment] // is set to true, this field will contain the aggregate sentiment expressed // for this entity in the provided document. Sentiment sentiment = 6; } // Represents the feeling associated with the entire text or entities in // the text. message Sentiment { // A non-negative number in the [0, +inf) range, which represents // the absolute magnitude of sentiment regardless of score (positive or // negative). float magnitude = 1; // Sentiment score between -1.0 (negative sentiment) and 1.0 // (positive sentiment). float score = 2; } // Represents a mention for an entity in the text. Currently, proper noun // mentions are supported. message EntityMention { // The supported types of mentions. enum Type { // Unknown TYPE_UNKNOWN = 0; // Proper name PROPER = 1; // Common noun (or noun compound) COMMON = 2; } // The mention text. TextSpan text = 1; // The type of the entity mention. Type type = 2; // For calls to [AnalyzeEntitySentiment][] or if // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entity_sentiment] // is set to true, this field will contain the sentiment expressed for this // mention of the entity in the provided document. Sentiment sentiment = 3; // Probability score associated with the entity. // // The score shows the probability of the entity mention being the entity // type. The score is in (0, 1] range. float probability = 4; } // Represents a text span in the input document. message TextSpan { // The content of the text span, which is a substring of the document. string content = 1; // The API calculates the beginning offset of the content in the original // document according to the // [EncodingType][google.cloud.language.v2.EncodingType] specified in the API // request. int32 begin_offset = 2; } // Represents a category returned from the text classifier. message ClassificationCategory { // The name of the category representing the document. string name = 1; // The classifier's confidence of the category. Number represents how certain // the classifier is that this category represents the given text. float confidence = 2; } // The sentiment analysis request message. message AnalyzeSentimentRequest { // Required. Input document. Document document = 1 [(google.api.field_behavior) = REQUIRED]; // The encoding type used by the API to calculate sentence offsets. EncodingType encoding_type = 2; } // The sentiment analysis response message. message AnalyzeSentimentResponse { // The overall sentiment of the input document. Sentiment document_sentiment = 1; // The language of the text, which will be the same as the language specified // in the request or, if not specified, the automatically-detected language. // See [Document.language][] field for more details. string language_code = 2; // The sentiment for all the sentences in the document. repeated Sentence sentences = 3; // Whether the language is officially supported. The API may still return a // response when the language is not supported, but it is on a best effort // basis. bool language_supported = 4; } // The entity analysis request message. message AnalyzeEntitiesRequest { // Required. Input document. Document document = 1 [(google.api.field_behavior) = REQUIRED]; // The encoding type used by the API to calculate offsets. EncodingType encoding_type = 2; } // The entity analysis response message. message AnalyzeEntitiesResponse { // The recognized entities in the input document. repeated Entity entities = 1; // The language of the text, which will be the same as the language specified // in the request or, if not specified, the automatically-detected language. // See [Document.language][] field for more details. string language_code = 2; // Whether the language is officially supported. The API may still return a // response when the language is not supported, but it is on a best effort // basis. bool language_supported = 3; } // The document classification request message. message ClassifyTextRequest { // Required. Input document. Document document = 1 [(google.api.field_behavior) = REQUIRED]; } // The document classification response message. message ClassifyTextResponse { // Categories representing the input document. repeated ClassificationCategory categories = 1; // The language of the text, which will be the same as the language specified // in the request or, if not specified, the automatically-detected language. // See [Document.language][] field for more details. string language_code = 2; // Whether the language is officially supported. The API may still return a // response when the language is not supported, but it is on a best effort // basis. bool language_supported = 3; } // The document moderation request message. message ModerateTextRequest { // Required. Input document. Document document = 1 [(google.api.field_behavior) = REQUIRED]; } // The document moderation response message. message ModerateTextResponse { // Harmful and sensitive categories representing the input document. repeated ClassificationCategory moderation_categories = 1; // The language of the text, which will be the same as the language specified // in the request or, if not specified, the automatically-detected language. // See [Document.language][] field for more details. string language_code = 2; // Whether the language is officially supported. The API may still return a // response when the language is not supported, but it is on a best effort // basis. bool language_supported = 3; } // The request message for the text annotation API, which can perform multiple // analysis types in one call. message AnnotateTextRequest { // All available features. // Setting each one to true will enable that specific analysis for the input. message Features { // Optional. Extract entities. bool extract_entities = 1 [(google.api.field_behavior) = OPTIONAL]; // Optional. Extract document-level sentiment. bool extract_document_sentiment = 2 [(google.api.field_behavior) = OPTIONAL]; // Optional. Classify the full document into categories. bool classify_text = 4 [(google.api.field_behavior) = OPTIONAL]; // Optional. Moderate the document for harmful and sensitive categories. bool moderate_text = 5 [(google.api.field_behavior) = OPTIONAL]; } // Required. Input document. Document document = 1 [(google.api.field_behavior) = REQUIRED]; // Required. The enabled features. Features features = 2 [(google.api.field_behavior) = REQUIRED]; // The encoding type used by the API to calculate offsets. EncodingType encoding_type = 3; } // The text annotations response message. message AnnotateTextResponse { // Sentences in the input document. Populated if the user enables // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_document_sentiment]. repeated Sentence sentences = 1; // Entities, along with their semantic information, in the input document. // Populated if the user enables // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entities] // or // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entity_sentiment]. repeated Entity entities = 2; // The overall sentiment for the document. Populated if the user enables // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_document_sentiment]. Sentiment document_sentiment = 3; // The language of the text, which will be the same as the language specified // in the request or, if not specified, the automatically-detected language. // See [Document.language][] field for more details. string language_code = 4; // Categories identified in the input document. repeated ClassificationCategory categories = 5; // Harmful and sensitive categories identified in the input document. repeated ClassificationCategory moderation_categories = 6; // Whether the language is officially supported by all requested features. // The API may still return a response when the language is not supported, but // it is on a best effort basis. bool language_supported = 7; }