// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.language.v1beta2;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
option go_package = "google.golang.org/genproto/googleapis/cloud/language/v1beta2;language";
option java_multiple_files = true;
option java_outer_classname = "LanguageServiceProto";
option java_package = "com.google.cloud.language.v1beta2";
// Provides text analysis operations such as sentiment analysis and entity
// recognition.
service LanguageService {
option (google.api.default_host) = "language.googleapis.com";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-language,"
"https://www.googleapis.com/auth/cloud-platform";
// Analyzes the sentiment of the provided text.
rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) {
option (google.api.http) = {
post: "/v1beta2/documents:analyzeSentiment"
body: "*"
};
option (google.api.method_signature) = "document,encoding_type";
option (google.api.method_signature) = "document";
}
// Finds named entities (currently proper names and common nouns) in the text
// along with entity types, salience, mentions for each entity, and
// other properties.
rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) {
option (google.api.http) = {
post: "/v1beta2/documents:analyzeEntities"
body: "*"
};
option (google.api.method_signature) = "document,encoding_type";
option (google.api.method_signature) = "document";
}
// Finds entities, similar to [AnalyzeEntities][google.cloud.language.v1beta2.LanguageService.AnalyzeEntities] in the text and analyzes
// sentiment associated with each entity and its mentions.
rpc AnalyzeEntitySentiment(AnalyzeEntitySentimentRequest) returns (AnalyzeEntitySentimentResponse) {
option (google.api.http) = {
post: "/v1beta2/documents:analyzeEntitySentiment"
body: "*"
};
option (google.api.method_signature) = "document,encoding_type";
option (google.api.method_signature) = "document";
}
// Analyzes the syntax of the text and provides sentence boundaries and
// tokenization along with part of speech tags, dependency trees, and other
// properties.
rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) {
option (google.api.http) = {
post: "/v1beta2/documents:analyzeSyntax"
body: "*"
};
option (google.api.method_signature) = "document,encoding_type";
option (google.api.method_signature) = "document";
}
// Classifies a document into categories.
rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) {
option (google.api.http) = {
post: "/v1beta2/documents:classifyText"
body: "*"
};
option (google.api.method_signature) = "document";
}
// A convenience method that provides all syntax, sentiment, entity, and
// classification features in one call.
rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
option (google.api.http) = {
post: "/v1beta2/documents:annotateText"
body: "*"
};
option (google.api.method_signature) = "document,features,encoding_type";
option (google.api.method_signature) = "document,features";
}
}
// Represents the input to API methods.
message Document {
// The document types enum.
enum Type {
// The content type is not specified.
TYPE_UNSPECIFIED = 0;
// Plain text
PLAIN_TEXT = 1;
// HTML
HTML = 2;
}
// Ways of handling boilerplate detected in the document
enum BoilerplateHandling {
// The boilerplate handling is not specified.
BOILERPLATE_HANDLING_UNSPECIFIED = 0;
// Do not analyze detected boilerplate. Reference web URI is required for
// detecting boilerplate.
SKIP_BOILERPLATE = 1;
// Treat boilerplate the same as content.
KEEP_BOILERPLATE = 2;
}
// Required. If the type is not set or is `TYPE_UNSPECIFIED`,
// returns an `INVALID_ARGUMENT` error.
Type type = 1;
// The source of the document: a string containing the content or a
// Google Cloud Storage URI.
oneof source {
// The content of the input in string format.
// Cloud audit logging exempt since it is based on user data.
string content = 2;
// The Google Cloud Storage URI where the file content is located.
// This URI must be of the form: gs://bucket_name/object_name. For more
// details, see https://cloud.google.com/storage/docs/reference-uris.
// NOTE: Cloud Storage object versioning is not supported.
string gcs_content_uri = 3;
}
// The language of the document (if not specified, the language is
// automatically detected). Both ISO and BCP-47 language codes are
// accepted.
// [Language
// Support](https://cloud.google.com/natural-language/docs/languages) lists
// currently supported languages for each API method. If the language (either
// specified by the caller or automatically detected) is not supported by the
// called API method, an `INVALID_ARGUMENT` error is returned.
string language = 4;
// The web URI where the document comes from. This URI is not used for
// fetching the content, but as a hint for analyzing the document.
string reference_web_uri = 5;
// Indicates how detected boilerplate(e.g. advertisements, copyright
// declarations, banners) should be handled for this document. If not
// specified, boilerplate will be treated the same as content.
BoilerplateHandling boilerplate_handling = 6;
}
// Represents a sentence in the input document.
message Sentence {
// The sentence text.
TextSpan text = 1;
// For calls to [AnalyzeSentiment][] or if
// [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_document_sentiment] is set to
// true, this field will contain the sentiment for the sentence.
Sentiment sentiment = 2;
}
// Represents the text encoding that the caller uses to process the output.
// Providing an `EncodingType` is recommended because the API provides the
// beginning offsets for various outputs, such as tokens and mentions, and
// languages that natively use different text encodings may access offsets
// differently.
enum EncodingType {
// If `EncodingType` is not specified, encoding-dependent information (such as
// `begin_offset`) will be set at `-1`.
NONE = 0;
// Encoding-dependent information (such as `begin_offset`) is calculated based
// on the UTF-8 encoding of the input. C++ and Go are examples of languages
// that use this encoding natively.
UTF8 = 1;
// Encoding-dependent information (such as `begin_offset`) is calculated based
// on the UTF-16 encoding of the input. Java and JavaScript are examples of
// languages that use this encoding natively.
UTF16 = 2;
// Encoding-dependent information (such as `begin_offset`) is calculated based
// on the UTF-32 encoding of the input. Python is an example of a language
// that uses this encoding natively.
UTF32 = 3;
}
// Represents a phrase in the text that is a known entity, such as
// a person, an organization, or location. The API associates information, such
// as salience and mentions, with entities.
message Entity {
// The type of the entity. For most entity types, the associated metadata is a
// Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table
// below lists the associated fields for entities that have different
// metadata.
enum Type {
// Unknown
UNKNOWN = 0;
// Person
PERSON = 1;
// Location
LOCATION = 2;
// Organization
ORGANIZATION = 3;
// Event
EVENT = 4;
// Artwork
WORK_OF_ART = 5;
// Consumer product
CONSUMER_GOOD = 6;
// Other types of entities
OTHER = 7;
// Phone number
//
// The metadata lists the phone number, formatted according to local
// convention, plus whichever additional elements appear in the text:
//
// * `number` - the actual number, broken down into sections as per local
// convention
// * `national_prefix` - country code, if detected
// * `area_code` - region or area code, if detected
// * `extension` - phone extension (to be dialed after connection), if
// detected
PHONE_NUMBER = 9;
// Address
//
// The metadata identifies the street number and locality plus whichever
// additional elements appear in the text:
//
// * `street_number` - street number
// * `locality` - city or town
// * `street_name` - street/route name, if detected
// * `postal_code` - postal code, if detected
// * `country` - country, if detected<
// * `broad_region` - administrative area, such as the state, if detected
// * `narrow_region` - smaller administrative area, such as county, if
// detected
// * `sublocality` - used in Asian addresses to demark a district within a
// city, if detected
ADDRESS = 10;
// Date
//
// The metadata identifies the components of the date:
//
// * `year` - four digit year, if detected
// * `month` - two digit month number, if detected
// * `day` - two digit day number, if detected
DATE = 11;
// Number
//
// The metadata is the number itself.
NUMBER = 12;
// Price
//
// The metadata identifies the `value` and `currency`.
PRICE = 13;
}
// The representative name for the entity.
string name = 1;
// The entity type.
Type type = 2;
// Metadata associated with the entity.
//
// For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`)
// and Knowledge Graph MID (`mid`), if they are available. For the metadata
// associated with other entity types, see the Type table below.
map metadata = 3;
// The salience score associated with the entity in the [0, 1.0] range.
//
// The salience score for an entity provides information about the
// importance or centrality of that entity to the entire document text.
// Scores closer to 0 are less salient, while scores closer to 1.0 are highly
// salient.
float salience = 4;
// The mentions of this entity in the input document. The API currently
// supports proper noun mentions.
repeated EntityMention mentions = 5;
// For calls to [AnalyzeEntitySentiment][] or if
// [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entity_sentiment] is set to
// true, this field will contain the aggregate sentiment expressed for this
// entity in the provided document.
Sentiment sentiment = 6;
}
// Represents the smallest syntactic building block of the text.
message Token {
// The token text.
TextSpan text = 1;
// Parts of speech tag for this token.
PartOfSpeech part_of_speech = 2;
// Dependency tree parse for this token.
DependencyEdge dependency_edge = 3;
// [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token.
string lemma = 4;
}
// Represents the feeling associated with the entire text or entities in
// the text.
// Next ID: 6
message Sentiment {
// A non-negative number in the [0, +inf) range, which represents
// the absolute magnitude of sentiment regardless of score (positive or
// negative).
float magnitude = 2;
// Sentiment score between -1.0 (negative sentiment) and 1.0
// (positive sentiment).
float score = 3;
}
// Represents part of speech information for a token.
message PartOfSpeech {
// The part of speech tags enum.
enum Tag {
// Unknown
UNKNOWN = 0;
// Adjective
ADJ = 1;
// Adposition (preposition and postposition)
ADP = 2;
// Adverb
ADV = 3;
// Conjunction
CONJ = 4;
// Determiner
DET = 5;
// Noun (common and proper)
NOUN = 6;
// Cardinal number
NUM = 7;
// Pronoun
PRON = 8;
// Particle or other function word
PRT = 9;
// Punctuation
PUNCT = 10;
// Verb (all tenses and modes)
VERB = 11;
// Other: foreign words, typos, abbreviations
X = 12;
// Affix
AFFIX = 13;
}
// The characteristic of a verb that expresses time flow during an event.
enum Aspect {
// Aspect is not applicable in the analyzed language or is not predicted.
ASPECT_UNKNOWN = 0;
// Perfective
PERFECTIVE = 1;
// Imperfective
IMPERFECTIVE = 2;
// Progressive
PROGRESSIVE = 3;
}
// The grammatical function performed by a noun or pronoun in a phrase,
// clause, or sentence. In some languages, other parts of speech, such as
// adjective and determiner, take case inflection in agreement with the noun.
enum Case {
// Case is not applicable in the analyzed language or is not predicted.
CASE_UNKNOWN = 0;
// Accusative
ACCUSATIVE = 1;
// Adverbial
ADVERBIAL = 2;
// Complementive
COMPLEMENTIVE = 3;
// Dative
DATIVE = 4;
// Genitive
GENITIVE = 5;
// Instrumental
INSTRUMENTAL = 6;
// Locative
LOCATIVE = 7;
// Nominative
NOMINATIVE = 8;
// Oblique
OBLIQUE = 9;
// Partitive
PARTITIVE = 10;
// Prepositional
PREPOSITIONAL = 11;
// Reflexive
REFLEXIVE_CASE = 12;
// Relative
RELATIVE_CASE = 13;
// Vocative
VOCATIVE = 14;
}
// Depending on the language, Form can be categorizing different forms of
// verbs, adjectives, adverbs, etc. For example, categorizing inflected
// endings of verbs and adjectives or distinguishing between short and long
// forms of adjectives and participles
enum Form {
// Form is not applicable in the analyzed language or is not predicted.
FORM_UNKNOWN = 0;
// Adnomial
ADNOMIAL = 1;
// Auxiliary
AUXILIARY = 2;
// Complementizer
COMPLEMENTIZER = 3;
// Final ending
FINAL_ENDING = 4;
// Gerund
GERUND = 5;
// Realis
REALIS = 6;
// Irrealis
IRREALIS = 7;
// Short form
SHORT = 8;
// Long form
LONG = 9;
// Order form
ORDER = 10;
// Specific form
SPECIFIC = 11;
}
// Gender classes of nouns reflected in the behaviour of associated words.
enum Gender {
// Gender is not applicable in the analyzed language or is not predicted.
GENDER_UNKNOWN = 0;
// Feminine
FEMININE = 1;
// Masculine
MASCULINE = 2;
// Neuter
NEUTER = 3;
}
// The grammatical feature of verbs, used for showing modality and attitude.
enum Mood {
// Mood is not applicable in the analyzed language or is not predicted.
MOOD_UNKNOWN = 0;
// Conditional
CONDITIONAL_MOOD = 1;
// Imperative
IMPERATIVE = 2;
// Indicative
INDICATIVE = 3;
// Interrogative
INTERROGATIVE = 4;
// Jussive
JUSSIVE = 5;
// Subjunctive
SUBJUNCTIVE = 6;
}
// Count distinctions.
enum Number {
// Number is not applicable in the analyzed language or is not predicted.
NUMBER_UNKNOWN = 0;
// Singular
SINGULAR = 1;
// Plural
PLURAL = 2;
// Dual
DUAL = 3;
}
// The distinction between the speaker, second person, third person, etc.
enum Person {
// Person is not applicable in the analyzed language or is not predicted.
PERSON_UNKNOWN = 0;
// First
FIRST = 1;
// Second
SECOND = 2;
// Third
THIRD = 3;
// Reflexive
REFLEXIVE_PERSON = 4;
}
// This category shows if the token is part of a proper name.
enum Proper {
// Proper is not applicable in the analyzed language or is not predicted.
PROPER_UNKNOWN = 0;
// Proper
PROPER = 1;
// Not proper
NOT_PROPER = 2;
}
// Reciprocal features of a pronoun.
enum Reciprocity {
// Reciprocity is not applicable in the analyzed language or is not
// predicted.
RECIPROCITY_UNKNOWN = 0;
// Reciprocal
RECIPROCAL = 1;
// Non-reciprocal
NON_RECIPROCAL = 2;
}
// Time reference.
enum Tense {
// Tense is not applicable in the analyzed language or is not predicted.
TENSE_UNKNOWN = 0;
// Conditional
CONDITIONAL_TENSE = 1;
// Future
FUTURE = 2;
// Past
PAST = 3;
// Present
PRESENT = 4;
// Imperfect
IMPERFECT = 5;
// Pluperfect
PLUPERFECT = 6;
}
// The relationship between the action that a verb expresses and the
// participants identified by its arguments.
enum Voice {
// Voice is not applicable in the analyzed language or is not predicted.
VOICE_UNKNOWN = 0;
// Active
ACTIVE = 1;
// Causative
CAUSATIVE = 2;
// Passive
PASSIVE = 3;
}
// The part of speech tag.
Tag tag = 1;
// The grammatical aspect.
Aspect aspect = 2;
// The grammatical case.
Case case = 3;
// The grammatical form.
Form form = 4;
// The grammatical gender.
Gender gender = 5;
// The grammatical mood.
Mood mood = 6;
// The grammatical number.
Number number = 7;
// The grammatical person.
Person person = 8;
// The grammatical properness.
Proper proper = 9;
// The grammatical reciprocity.
Reciprocity reciprocity = 10;
// The grammatical tense.
Tense tense = 11;
// The grammatical voice.
Voice voice = 12;
}
// Represents dependency parse tree information for a token.
message DependencyEdge {
// The parse label enum for the token.
enum Label {
// Unknown
UNKNOWN = 0;
// Abbreviation modifier
ABBREV = 1;
// Adjectival complement
ACOMP = 2;
// Adverbial clause modifier
ADVCL = 3;
// Adverbial modifier
ADVMOD = 4;
// Adjectival modifier of an NP
AMOD = 5;
// Appositional modifier of an NP
APPOS = 6;
// Attribute dependent of a copular verb
ATTR = 7;
// Auxiliary (non-main) verb
AUX = 8;
// Passive auxiliary
AUXPASS = 9;
// Coordinating conjunction
CC = 10;
// Clausal complement of a verb or adjective
CCOMP = 11;
// Conjunct
CONJ = 12;
// Clausal subject
CSUBJ = 13;
// Clausal passive subject
CSUBJPASS = 14;
// Dependency (unable to determine)
DEP = 15;
// Determiner
DET = 16;
// Discourse
DISCOURSE = 17;
// Direct object
DOBJ = 18;
// Expletive
EXPL = 19;
// Goes with (part of a word in a text not well edited)
GOESWITH = 20;
// Indirect object
IOBJ = 21;
// Marker (word introducing a subordinate clause)
MARK = 22;
// Multi-word expression
MWE = 23;
// Multi-word verbal expression
MWV = 24;
// Negation modifier
NEG = 25;
// Noun compound modifier
NN = 26;
// Noun phrase used as an adverbial modifier
NPADVMOD = 27;
// Nominal subject
NSUBJ = 28;
// Passive nominal subject
NSUBJPASS = 29;
// Numeric modifier of a noun
NUM = 30;
// Element of compound number
NUMBER = 31;
// Punctuation mark
P = 32;
// Parataxis relation
PARATAXIS = 33;
// Participial modifier
PARTMOD = 34;
// The complement of a preposition is a clause
PCOMP = 35;
// Object of a preposition
POBJ = 36;
// Possession modifier
POSS = 37;
// Postverbal negative particle
POSTNEG = 38;
// Predicate complement
PRECOMP = 39;
// Preconjunt
PRECONJ = 40;
// Predeterminer
PREDET = 41;
// Prefix
PREF = 42;
// Prepositional modifier
PREP = 43;
// The relationship between a verb and verbal morpheme
PRONL = 44;
// Particle
PRT = 45;
// Associative or possessive marker
PS = 46;
// Quantifier phrase modifier
QUANTMOD = 47;
// Relative clause modifier
RCMOD = 48;
// Complementizer in relative clause
RCMODREL = 49;
// Ellipsis without a preceding predicate
RDROP = 50;
// Referent
REF = 51;
// Remnant
REMNANT = 52;
// Reparandum
REPARANDUM = 53;
// Root
ROOT = 54;
// Suffix specifying a unit of number
SNUM = 55;
// Suffix
SUFF = 56;
// Temporal modifier
TMOD = 57;
// Topic marker
TOPIC = 58;
// Clause headed by an infinite form of the verb that modifies a noun
VMOD = 59;
// Vocative
VOCATIVE = 60;
// Open clausal complement
XCOMP = 61;
// Name suffix
SUFFIX = 62;
// Name title
TITLE = 63;
// Adverbial phrase modifier
ADVPHMOD = 64;
// Causative auxiliary
AUXCAUS = 65;
// Helper auxiliary
AUXVV = 66;
// Rentaishi (Prenominal modifier)
DTMOD = 67;
// Foreign words
FOREIGN = 68;
// Keyword
KW = 69;
// List for chains of comparable items
LIST = 70;
// Nominalized clause
NOMC = 71;
// Nominalized clausal subject
NOMCSUBJ = 72;
// Nominalized clausal passive
NOMCSUBJPASS = 73;
// Compound of numeric modifier
NUMC = 74;
// Copula
COP = 75;
// Dislocated relation (for fronted/topicalized elements)
DISLOCATED = 76;
// Aspect marker
ASP = 77;
// Genitive modifier
GMOD = 78;
// Genitive object
GOBJ = 79;
// Infinitival modifier
INFMOD = 80;
// Measure
MES = 81;
// Nominal complement of a noun
NCOMP = 82;
}
// Represents the head of this token in the dependency tree.
// This is the index of the token which has an arc going to this token.
// The index is the position of the token in the array of tokens returned
// by the API method. If this token is a root token, then the
// `head_token_index` is its own index.
int32 head_token_index = 1;
// The parse label for the token.
Label label = 2;
}
// Represents a mention for an entity in the text. Currently, proper noun
// mentions are supported.
message EntityMention {
// The supported types of mentions.
enum Type {
// Unknown
TYPE_UNKNOWN = 0;
// Proper name
PROPER = 1;
// Common noun (or noun compound)
COMMON = 2;
}
// The mention text.
TextSpan text = 1;
// The type of the entity mention.
Type type = 2;
// For calls to [AnalyzeEntitySentiment][] or if
// [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entity_sentiment] is set to
// true, this field will contain the sentiment expressed for this mention of
// the entity in the provided document.
Sentiment sentiment = 3;
}
// Represents an output piece of text.
message TextSpan {
// The content of the output text.
string content = 1;
// The API calculates the beginning offset of the content in the original
// document according to the [EncodingType][google.cloud.language.v1beta2.EncodingType] specified in the API request.
int32 begin_offset = 2;
}
// Represents a category returned from the text classifier.
message ClassificationCategory {
// The name of the category representing the document, from the [predefined
// taxonomy](https://cloud.google.com/natural-language/docs/categories).
string name = 1;
// The classifier's confidence of the category. Number represents how certain
// the classifier is that this category represents the given text.
float confidence = 2;
}
// Model options available for classification requests.
message ClassificationModelOptions {
// Options for the V1 model.
message V1Model {
}
// Options for the V2 model.
message V2Model {
// The content categories used for classification.
enum ContentCategoriesVersion {
// If `ContentCategoriesVersion` is not specified, this option will
// default to `V1`.
CONTENT_CATEGORIES_VERSION_UNSPECIFIED = 0;
// Legacy content categories of our initial launch in 2017.
V1 = 1;
// Updated content categories in 2022.
V2 = 2;
}
// The content categories used for classification.
ContentCategoriesVersion content_categories_version = 1;
}
// If this field is not set, then the `v1_model` will be used by default.
oneof model_type {
// Setting this field will use the V1 model and V1 content categories
// version. The V1 model is a legacy model; support for this will be
// discontinued in the future.
V1Model v1_model = 1;
// Setting this field will use the V2 model with the appropriate content
// categories version. The V2 model is a better performing model.
V2Model v2_model = 2;
}
}
// The sentiment analysis request message.
message AnalyzeSentimentRequest {
// Required. Input document.
Document document = 1 [(google.api.field_behavior) = REQUIRED];
// The encoding type used by the API to calculate sentence offsets for the
// sentence sentiment.
EncodingType encoding_type = 2;
}
// The sentiment analysis response message.
message AnalyzeSentimentResponse {
// The overall sentiment of the input document.
Sentiment document_sentiment = 1;
// The language of the text, which will be the same as the language specified
// in the request or, if not specified, the automatically-detected language.
// See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details.
string language = 2;
// The sentiment for all the sentences in the document.
repeated Sentence sentences = 3;
}
// The entity-level sentiment analysis request message.
message AnalyzeEntitySentimentRequest {
// Required. Input document.
Document document = 1 [(google.api.field_behavior) = REQUIRED];
// The encoding type used by the API to calculate offsets.
EncodingType encoding_type = 2;
}
// The entity-level sentiment analysis response message.
message AnalyzeEntitySentimentResponse {
// The recognized entities in the input document with associated sentiments.
repeated Entity entities = 1;
// The language of the text, which will be the same as the language specified
// in the request or, if not specified, the automatically-detected language.
// See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details.
string language = 2;
}
// The entity analysis request message.
message AnalyzeEntitiesRequest {
// Required. Input document.
Document document = 1 [(google.api.field_behavior) = REQUIRED];
// The encoding type used by the API to calculate offsets.
EncodingType encoding_type = 2;
}
// The entity analysis response message.
message AnalyzeEntitiesResponse {
// The recognized entities in the input document.
repeated Entity entities = 1;
// The language of the text, which will be the same as the language specified
// in the request or, if not specified, the automatically-detected language.
// See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details.
string language = 2;
}
// The syntax analysis request message.
message AnalyzeSyntaxRequest {
// Required. Input document.
Document document = 1 [(google.api.field_behavior) = REQUIRED];
// The encoding type used by the API to calculate offsets.
EncodingType encoding_type = 2;
}
// The syntax analysis response message.
message AnalyzeSyntaxResponse {
// Sentences in the input document.
repeated Sentence sentences = 1;
// Tokens, along with their syntactic information, in the input document.
repeated Token tokens = 2;
// The language of the text, which will be the same as the language specified
// in the request or, if not specified, the automatically-detected language.
// See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details.
string language = 3;
}
// The document classification request message.
message ClassifyTextRequest {
// Required. Input document.
Document document = 1 [(google.api.field_behavior) = REQUIRED];
// Model options to use for classification. Defaults to v1 options if not
// specified.
ClassificationModelOptions classification_model_options = 3;
}
// The document classification response message.
message ClassifyTextResponse {
// Categories representing the input document.
repeated ClassificationCategory categories = 1;
}
// The request message for the text annotation API, which can perform multiple
// analysis types (sentiment, entities, and syntax) in one call.
message AnnotateTextRequest {
// All available features for sentiment, syntax, and semantic analysis.
// Setting each one to true will enable that specific analysis for the input.
// Next ID: 11
message Features {
// Extract syntax information.
bool extract_syntax = 1;
// Extract entities.
bool extract_entities = 2;
// Extract document-level sentiment.
bool extract_document_sentiment = 3;
// Extract entities and their associated sentiment.
bool extract_entity_sentiment = 4;
// Classify the full document into categories. If this is true,
// the API will use the default model which classifies into a
// [predefined
// taxonomy](https://cloud.google.com/natural-language/docs/categories).
bool classify_text = 6;
// The model options to use for classification. Defaults to v1 options
// if not specified. Only used if `classify_text` is set to true.
ClassificationModelOptions classification_model_options = 10;
}
// Required. Input document.
Document document = 1 [(google.api.field_behavior) = REQUIRED];
// Required. The enabled features.
Features features = 2 [(google.api.field_behavior) = REQUIRED];
// The encoding type used by the API to calculate offsets.
EncodingType encoding_type = 3;
}
// The text annotations response message.
message AnnotateTextResponse {
// Sentences in the input document. Populated if the user enables
// [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_syntax].
repeated Sentence sentences = 1;
// Tokens, along with their syntactic information, in the input document.
// Populated if the user enables
// [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_syntax].
repeated Token tokens = 2;
// Entities, along with their semantic information, in the input document.
// Populated if the user enables
// [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_entities].
repeated Entity entities = 3;
// The overall sentiment for the document. Populated if the user enables
// [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta2.AnnotateTextRequest.Features.extract_document_sentiment].
Sentiment document_sentiment = 4;
// The language of the text, which will be the same as the language specified
// in the request or, if not specified, the automatically-detected language.
// See [Document.language][google.cloud.language.v1beta2.Document.language] field for more details.
string language = 5;
// Categories identified in the input document.
repeated ClassificationCategory categories = 6;
}