// Copyright 2019 Google LLC. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // syntax = "proto3"; package google.cloud.language.v1; import "google/api/annotations.proto"; import "google/api/client.proto"; import "google/api/field_behavior.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/language/v1;language"; option java_multiple_files = true; option java_outer_classname = "LanguageServiceProto"; option java_package = "com.google.cloud.language.v1"; // Provides text analysis operations such as sentiment analysis and entity // recognition. service LanguageService { option (google.api.default_host) = "language.googleapis.com"; option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-language," "https://www.googleapis.com/auth/cloud-platform"; // Analyzes the sentiment of the provided text. rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) { option (google.api.http) = { post: "/v1/documents:analyzeSentiment" body: "*" }; option (google.api.method_signature) = "document,encoding_type"; option (google.api.method_signature) = "document"; } // Finds named entities (currently proper names and common nouns) in the text // along with entity types, salience, mentions for each entity, and // other properties. rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) { option (google.api.http) = { post: "/v1/documents:analyzeEntities" body: "*" }; option (google.api.method_signature) = "document,encoding_type"; option (google.api.method_signature) = "document"; } // Finds entities, similar to [AnalyzeEntities][google.cloud.language.v1.LanguageService.AnalyzeEntities] in the text and analyzes // sentiment associated with each entity and its mentions. rpc AnalyzeEntitySentiment(AnalyzeEntitySentimentRequest) returns (AnalyzeEntitySentimentResponse) { option (google.api.http) = { post: "/v1/documents:analyzeEntitySentiment" body: "*" }; option (google.api.method_signature) = "document,encoding_type"; option (google.api.method_signature) = "document"; } // Analyzes the syntax of the text and provides sentence boundaries and // tokenization along with part of speech tags, dependency trees, and other // properties. rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) { option (google.api.http) = { post: "/v1/documents:analyzeSyntax" body: "*" }; option (google.api.method_signature) = "document,encoding_type"; option (google.api.method_signature) = "document"; } // Classifies a document into categories. rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) { option (google.api.http) = { post: "/v1/documents:classifyText" body: "*" }; option (google.api.method_signature) = "document"; } // A convenience method that provides all the features that analyzeSentiment, // analyzeEntities, and analyzeSyntax provide in one call. rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) { option (google.api.http) = { post: "/v1/documents:annotateText" body: "*" }; option (google.api.method_signature) = "document,features,encoding_type"; option (google.api.method_signature) = "document,features"; } } // // Represents the input to API methods. message Document { // The document types enum. enum Type { // The content type is not specified. TYPE_UNSPECIFIED = 0; // Plain text PLAIN_TEXT = 1; // HTML HTML = 2; } // Required. If the type is not set or is `TYPE_UNSPECIFIED`, // returns an `INVALID_ARGUMENT` error. Type type = 1; // The source of the document: a string containing the content or a // Google Cloud Storage URI. oneof source { // The content of the input in string format. // Cloud audit logging exempt since it is based on user data. string content = 2; // The Google Cloud Storage URI where the file content is located. // This URI must be of the form: gs://bucket_name/object_name. For more // details, see https://cloud.google.com/storage/docs/reference-uris. // NOTE: Cloud Storage object versioning is not supported. string gcs_content_uri = 3; } // The language of the document (if not specified, the language is // automatically detected). Both ISO and BCP-47 language codes are // accepted.
// [Language // Support](https://cloud.google.com/natural-language/docs/languages) lists // currently supported languages for each API method. If the language (either // specified by the caller or automatically detected) is not supported by the // called API method, an `INVALID_ARGUMENT` error is returned. string language = 4; } // Represents a sentence in the input document. message Sentence { // The sentence text. TextSpan text = 1; // For calls to [AnalyzeSentiment][] or if // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment] is set to // true, this field will contain the sentiment for the sentence. Sentiment sentiment = 2; } // Represents a phrase in the text that is a known entity, such as // a person, an organization, or location. The API associates information, such // as salience and mentions, with entities. message Entity { // The type of the entity. For most entity types, the associated metadata is a // Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table // below lists the associated fields for entities that have different // metadata. enum Type { // Unknown UNKNOWN = 0; // Person PERSON = 1; // Location LOCATION = 2; // Organization ORGANIZATION = 3; // Event EVENT = 4; // Artwork WORK_OF_ART = 5; // Consumer product CONSUMER_GOOD = 6; // Other types of entities OTHER = 7; // Phone number

// The metadata lists the phone number, formatted according to local // convention, plus whichever additional elements appear in the text: PHONE_NUMBER = 9; // Address

// The metadata identifies the street number and locality plus whichever // additional elements appear in the text: ADDRESS = 10; // Date

// The metadata identifies the components of the date: DATE = 11; // Number

// The metadata is the number itself. NUMBER = 12; // Price

// The metadata identifies the value and currency. PRICE = 13; } // The representative name for the entity. string name = 1; // The entity type. Type type = 2; // Metadata associated with the entity. // // For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`) // and Knowledge Graph MID (`mid`), if they are available. For the metadata // associated with other entity types, see the Type table below. map metadata = 3; // The salience score associated with the entity in the [0, 1.0] range. // // The salience score for an entity provides information about the // importance or centrality of that entity to the entire document text. // Scores closer to 0 are less salient, while scores closer to 1.0 are highly // salient. float salience = 4; // The mentions of this entity in the input document. The API currently // supports proper noun mentions. repeated EntityMention mentions = 5; // For calls to [AnalyzeEntitySentiment][] or if // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment] is set to // true, this field will contain the aggregate sentiment expressed for this // entity in the provided document. Sentiment sentiment = 6; } // Represents the text encoding that the caller uses to process the output. // Providing an `EncodingType` is recommended because the API provides the // beginning offsets for various outputs, such as tokens and mentions, and // languages that natively use different text encodings may access offsets // differently. enum EncodingType { // If `EncodingType` is not specified, encoding-dependent information (such as // `begin_offset`) will be set at `-1`. NONE = 0; // Encoding-dependent information (such as `begin_offset`) is calculated based // on the UTF-8 encoding of the input. C++ and Go are examples of languages // that use this encoding natively. UTF8 = 1; // Encoding-dependent information (such as `begin_offset`) is calculated based // on the UTF-16 encoding of the input. Java and JavaScript are examples of // languages that use this encoding natively. UTF16 = 2; // Encoding-dependent information (such as `begin_offset`) is calculated based // on the UTF-32 encoding of the input. Python is an example of a language // that uses this encoding natively. UTF32 = 3; } // Represents the smallest syntactic building block of the text. message Token { // The token text. TextSpan text = 1; // Parts of speech tag for this token. PartOfSpeech part_of_speech = 2; // Dependency tree parse for this token. DependencyEdge dependency_edge = 3; // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token. string lemma = 4; } // Represents the feeling associated with the entire text or entities in // the text. message Sentiment { // A non-negative number in the [0, +inf) range, which represents // the absolute magnitude of sentiment regardless of score (positive or // negative). float magnitude = 2; // Sentiment score between -1.0 (negative sentiment) and 1.0 // (positive sentiment). float score = 3; } // Represents part of speech information for a token. Parts of speech // are as defined in // http://www.lrec-conf.org/proceedings/lrec2012/pdf/274_Paper.pdf message PartOfSpeech { // The part of speech tags enum. enum Tag { // Unknown UNKNOWN = 0; // Adjective ADJ = 1; // Adposition (preposition and postposition) ADP = 2; // Adverb ADV = 3; // Conjunction CONJ = 4; // Determiner DET = 5; // Noun (common and proper) NOUN = 6; // Cardinal number NUM = 7; // Pronoun PRON = 8; // Particle or other function word PRT = 9; // Punctuation PUNCT = 10; // Verb (all tenses and modes) VERB = 11; // Other: foreign words, typos, abbreviations X = 12; // Affix AFFIX = 13; } // The characteristic of a verb that expresses time flow during an event. enum Aspect { // Aspect is not applicable in the analyzed language or is not predicted. ASPECT_UNKNOWN = 0; // Perfective PERFECTIVE = 1; // Imperfective IMPERFECTIVE = 2; // Progressive PROGRESSIVE = 3; } // The grammatical function performed by a noun or pronoun in a phrase, // clause, or sentence. In some languages, other parts of speech, such as // adjective and determiner, take case inflection in agreement with the noun. enum Case { // Case is not applicable in the analyzed language or is not predicted. CASE_UNKNOWN = 0; // Accusative ACCUSATIVE = 1; // Adverbial ADVERBIAL = 2; // Complementive COMPLEMENTIVE = 3; // Dative DATIVE = 4; // Genitive GENITIVE = 5; // Instrumental INSTRUMENTAL = 6; // Locative LOCATIVE = 7; // Nominative NOMINATIVE = 8; // Oblique OBLIQUE = 9; // Partitive PARTITIVE = 10; // Prepositional PREPOSITIONAL = 11; // Reflexive REFLEXIVE_CASE = 12; // Relative RELATIVE_CASE = 13; // Vocative VOCATIVE = 14; } // Depending on the language, Form can be categorizing different forms of // verbs, adjectives, adverbs, etc. For example, categorizing inflected // endings of verbs and adjectives or distinguishing between short and long // forms of adjectives and participles enum Form { // Form is not applicable in the analyzed language or is not predicted. FORM_UNKNOWN = 0; // Adnomial ADNOMIAL = 1; // Auxiliary AUXILIARY = 2; // Complementizer COMPLEMENTIZER = 3; // Final ending FINAL_ENDING = 4; // Gerund GERUND = 5; // Realis REALIS = 6; // Irrealis IRREALIS = 7; // Short form SHORT = 8; // Long form LONG = 9; // Order form ORDER = 10; // Specific form SPECIFIC = 11; } // Gender classes of nouns reflected in the behaviour of associated words. enum Gender { // Gender is not applicable in the analyzed language or is not predicted. GENDER_UNKNOWN = 0; // Feminine FEMININE = 1; // Masculine MASCULINE = 2; // Neuter NEUTER = 3; } // The grammatical feature of verbs, used for showing modality and attitude. enum Mood { // Mood is not applicable in the analyzed language or is not predicted. MOOD_UNKNOWN = 0; // Conditional CONDITIONAL_MOOD = 1; // Imperative IMPERATIVE = 2; // Indicative INDICATIVE = 3; // Interrogative INTERROGATIVE = 4; // Jussive JUSSIVE = 5; // Subjunctive SUBJUNCTIVE = 6; } // Count distinctions. enum Number { // Number is not applicable in the analyzed language or is not predicted. NUMBER_UNKNOWN = 0; // Singular SINGULAR = 1; // Plural PLURAL = 2; // Dual DUAL = 3; } // The distinction between the speaker, second person, third person, etc. enum Person { // Person is not applicable in the analyzed language or is not predicted. PERSON_UNKNOWN = 0; // First FIRST = 1; // Second SECOND = 2; // Third THIRD = 3; // Reflexive REFLEXIVE_PERSON = 4; } // This category shows if the token is part of a proper name. enum Proper { // Proper is not applicable in the analyzed language or is not predicted. PROPER_UNKNOWN = 0; // Proper PROPER = 1; // Not proper NOT_PROPER = 2; } // Reciprocal features of a pronoun. enum Reciprocity { // Reciprocity is not applicable in the analyzed language or is not // predicted. RECIPROCITY_UNKNOWN = 0; // Reciprocal RECIPROCAL = 1; // Non-reciprocal NON_RECIPROCAL = 2; } // Time reference. enum Tense { // Tense is not applicable in the analyzed language or is not predicted. TENSE_UNKNOWN = 0; // Conditional CONDITIONAL_TENSE = 1; // Future FUTURE = 2; // Past PAST = 3; // Present PRESENT = 4; // Imperfect IMPERFECT = 5; // Pluperfect PLUPERFECT = 6; } // The relationship between the action that a verb expresses and the // participants identified by its arguments. enum Voice { // Voice is not applicable in the analyzed language or is not predicted. VOICE_UNKNOWN = 0; // Active ACTIVE = 1; // Causative CAUSATIVE = 2; // Passive PASSIVE = 3; } // The part of speech tag. Tag tag = 1; // The grammatical aspect. Aspect aspect = 2; // The grammatical case. Case case = 3; // The grammatical form. Form form = 4; // The grammatical gender. Gender gender = 5; // The grammatical mood. Mood mood = 6; // The grammatical number. Number number = 7; // The grammatical person. Person person = 8; // The grammatical properness. Proper proper = 9; // The grammatical reciprocity. Reciprocity reciprocity = 10; // The grammatical tense. Tense tense = 11; // The grammatical voice. Voice voice = 12; } // Represents dependency parse tree information for a token. (For more // information on dependency labels, see // http://www.aclweb.org/anthology/P13-2017 message DependencyEdge { // The parse label enum for the token. enum Label { // Unknown UNKNOWN = 0; // Abbreviation modifier ABBREV = 1; // Adjectival complement ACOMP = 2; // Adverbial clause modifier ADVCL = 3; // Adverbial modifier ADVMOD = 4; // Adjectival modifier of an NP AMOD = 5; // Appositional modifier of an NP APPOS = 6; // Attribute dependent of a copular verb ATTR = 7; // Auxiliary (non-main) verb AUX = 8; // Passive auxiliary AUXPASS = 9; // Coordinating conjunction CC = 10; // Clausal complement of a verb or adjective CCOMP = 11; // Conjunct CONJ = 12; // Clausal subject CSUBJ = 13; // Clausal passive subject CSUBJPASS = 14; // Dependency (unable to determine) DEP = 15; // Determiner DET = 16; // Discourse DISCOURSE = 17; // Direct object DOBJ = 18; // Expletive EXPL = 19; // Goes with (part of a word in a text not well edited) GOESWITH = 20; // Indirect object IOBJ = 21; // Marker (word introducing a subordinate clause) MARK = 22; // Multi-word expression MWE = 23; // Multi-word verbal expression MWV = 24; // Negation modifier NEG = 25; // Noun compound modifier NN = 26; // Noun phrase used as an adverbial modifier NPADVMOD = 27; // Nominal subject NSUBJ = 28; // Passive nominal subject NSUBJPASS = 29; // Numeric modifier of a noun NUM = 30; // Element of compound number NUMBER = 31; // Punctuation mark P = 32; // Parataxis relation PARATAXIS = 33; // Participial modifier PARTMOD = 34; // The complement of a preposition is a clause PCOMP = 35; // Object of a preposition POBJ = 36; // Possession modifier POSS = 37; // Postverbal negative particle POSTNEG = 38; // Predicate complement PRECOMP = 39; // Preconjunt PRECONJ = 40; // Predeterminer PREDET = 41; // Prefix PREF = 42; // Prepositional modifier PREP = 43; // The relationship between a verb and verbal morpheme PRONL = 44; // Particle PRT = 45; // Associative or possessive marker PS = 46; // Quantifier phrase modifier QUANTMOD = 47; // Relative clause modifier RCMOD = 48; // Complementizer in relative clause RCMODREL = 49; // Ellipsis without a preceding predicate RDROP = 50; // Referent REF = 51; // Remnant REMNANT = 52; // Reparandum REPARANDUM = 53; // Root ROOT = 54; // Suffix specifying a unit of number SNUM = 55; // Suffix SUFF = 56; // Temporal modifier TMOD = 57; // Topic marker TOPIC = 58; // Clause headed by an infinite form of the verb that modifies a noun VMOD = 59; // Vocative VOCATIVE = 60; // Open clausal complement XCOMP = 61; // Name suffix SUFFIX = 62; // Name title TITLE = 63; // Adverbial phrase modifier ADVPHMOD = 64; // Causative auxiliary AUXCAUS = 65; // Helper auxiliary AUXVV = 66; // Rentaishi (Prenominal modifier) DTMOD = 67; // Foreign words FOREIGN = 68; // Keyword KW = 69; // List for chains of comparable items LIST = 70; // Nominalized clause NOMC = 71; // Nominalized clausal subject NOMCSUBJ = 72; // Nominalized clausal passive NOMCSUBJPASS = 73; // Compound of numeric modifier NUMC = 74; // Copula COP = 75; // Dislocated relation (for fronted/topicalized elements) DISLOCATED = 76; // Aspect marker ASP = 77; // Genitive modifier GMOD = 78; // Genitive object GOBJ = 79; // Infinitival modifier INFMOD = 80; // Measure MES = 81; // Nominal complement of a noun NCOMP = 82; } // Represents the head of this token in the dependency tree. // This is the index of the token which has an arc going to this token. // The index is the position of the token in the array of tokens returned // by the API method. If this token is a root token, then the // `head_token_index` is its own index. int32 head_token_index = 1; // The parse label for the token. Label label = 2; } // Represents a mention for an entity in the text. Currently, proper noun // mentions are supported. message EntityMention { // The supported types of mentions. enum Type { // Unknown TYPE_UNKNOWN = 0; // Proper name PROPER = 1; // Common noun (or noun compound) COMMON = 2; } // The mention text. TextSpan text = 1; // The type of the entity mention. Type type = 2; // For calls to [AnalyzeEntitySentiment][] or if // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entity_sentiment] is set to // true, this field will contain the sentiment expressed for this mention of // the entity in the provided document. Sentiment sentiment = 3; } // Represents an output piece of text. message TextSpan { // The content of the output text. string content = 1; // The API calculates the beginning offset of the content in the original // document according to the [EncodingType][google.cloud.language.v1.EncodingType] specified in the API request. int32 begin_offset = 2; } // Represents a category returned from the text classifier. message ClassificationCategory { // The name of the category representing the document, from the [predefined // taxonomy](https://cloud.google.com/natural-language/docs/categories). string name = 1; // The classifier's confidence of the category. Number represents how certain // the classifier is that this category represents the given text. float confidence = 2; } // The sentiment analysis request message. message AnalyzeSentimentRequest { // Input document. Document document = 1 [(google.api.field_behavior) = REQUIRED]; // The encoding type used by the API to calculate sentence offsets. EncodingType encoding_type = 2; } // The sentiment analysis response message. message AnalyzeSentimentResponse { // The overall sentiment of the input document. Sentiment document_sentiment = 1; // The language of the text, which will be the same as the language specified // in the request or, if not specified, the automatically-detected language. // See [Document.language][google.cloud.language.v1.Document.language] field for more details. string language = 2; // The sentiment for all the sentences in the document. repeated Sentence sentences = 3; } // The entity-level sentiment analysis request message. message AnalyzeEntitySentimentRequest { // Input document. Document document = 1 [(google.api.field_behavior) = REQUIRED]; // The encoding type used by the API to calculate offsets. EncodingType encoding_type = 2; } // The entity-level sentiment analysis response message. message AnalyzeEntitySentimentResponse { // The recognized entities in the input document with associated sentiments. repeated Entity entities = 1; // The language of the text, which will be the same as the language specified // in the request or, if not specified, the automatically-detected language. // See [Document.language][google.cloud.language.v1.Document.language] field for more details. string language = 2; } // The entity analysis request message. message AnalyzeEntitiesRequest { // Input document. Document document = 1 [(google.api.field_behavior) = REQUIRED]; // The encoding type used by the API to calculate offsets. EncodingType encoding_type = 2; } // The entity analysis response message. message AnalyzeEntitiesResponse { // The recognized entities in the input document. repeated Entity entities = 1; // The language of the text, which will be the same as the language specified // in the request or, if not specified, the automatically-detected language. // See [Document.language][google.cloud.language.v1.Document.language] field for more details. string language = 2; } // The syntax analysis request message. message AnalyzeSyntaxRequest { // Input document. Document document = 1 [(google.api.field_behavior) = REQUIRED]; // The encoding type used by the API to calculate offsets. EncodingType encoding_type = 2; } // The syntax analysis response message. message AnalyzeSyntaxResponse { // Sentences in the input document. repeated Sentence sentences = 1; // Tokens, along with their syntactic information, in the input document. repeated Token tokens = 2; // The language of the text, which will be the same as the language specified // in the request or, if not specified, the automatically-detected language. // See [Document.language][google.cloud.language.v1.Document.language] field for more details. string language = 3; } // The document classification request message. message ClassifyTextRequest { // Input document. Document document = 1 [(google.api.field_behavior) = REQUIRED]; } // The document classification response message. message ClassifyTextResponse { // Categories representing the input document. repeated ClassificationCategory categories = 1; } // The request message for the text annotation API, which can perform multiple // analysis types (sentiment, entities, and syntax) in one call. message AnnotateTextRequest { // All available features for sentiment, syntax, and semantic analysis. // Setting each one to true will enable that specific analysis for the input. message Features { // Extract syntax information. bool extract_syntax = 1; // Extract entities. bool extract_entities = 2; // Extract document-level sentiment. bool extract_document_sentiment = 3; // Extract entities and their associated sentiment. bool extract_entity_sentiment = 4; // Classify the full document into categories. bool classify_text = 6; } // Input document. Document document = 1 [(google.api.field_behavior) = REQUIRED]; // The enabled features. Features features = 2 [(google.api.field_behavior) = REQUIRED]; // The encoding type used by the API to calculate offsets. EncodingType encoding_type = 3; } // The text annotations response message. message AnnotateTextResponse { // Sentences in the input document. Populated if the user enables // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax]. repeated Sentence sentences = 1; // Tokens, along with their syntactic information, in the input document. // Populated if the user enables // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1.AnnotateTextRequest.Features.extract_syntax]. repeated Token tokens = 2; // Entities, along with their semantic information, in the input document. // Populated if the user enables // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1.AnnotateTextRequest.Features.extract_entities]. repeated Entity entities = 3; // The overall sentiment for the document. Populated if the user enables // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1.AnnotateTextRequest.Features.extract_document_sentiment]. Sentiment document_sentiment = 4; // The language of the text, which will be the same as the language specified // in the request or, if not specified, the automatically-detected language. // See [Document.language][google.cloud.language.v1.Document.language] field for more details. string language = 5; // Categories identified in the input document. repeated ClassificationCategory categories = 6; }