// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.language.v2;
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
option go_package = "cloud.google.com/go/language/apiv2/languagepb;languagepb";
option java_multiple_files = true;
option java_outer_classname = "LanguageServiceProto";
option java_package = "com.google.cloud.language.v2";
// Provides text analysis operations such as sentiment analysis and entity
// recognition.
service LanguageService {
option (google.api.default_host) = "language.googleapis.com";
option (google.api.oauth_scopes) =
"https://www.googleapis.com/auth/cloud-language,"
"https://www.googleapis.com/auth/cloud-platform";
// Analyzes the sentiment of the provided text.
rpc AnalyzeSentiment(AnalyzeSentimentRequest)
returns (AnalyzeSentimentResponse) {
option (google.api.http) = {
post: "/v2/documents:analyzeSentiment"
body: "*"
};
option (google.api.method_signature) = "document,encoding_type";
option (google.api.method_signature) = "document";
}
// Finds named entities (currently proper names and common nouns) in the text
// along with entity types, probability, mentions for each entity, and
// other properties.
rpc AnalyzeEntities(AnalyzeEntitiesRequest)
returns (AnalyzeEntitiesResponse) {
option (google.api.http) = {
post: "/v2/documents:analyzeEntities"
body: "*"
};
option (google.api.method_signature) = "document,encoding_type";
option (google.api.method_signature) = "document";
}
// Classifies a document into categories.
rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) {
option (google.api.http) = {
post: "/v2/documents:classifyText"
body: "*"
};
option (google.api.method_signature) = "document";
}
// Moderates a document for harmful and sensitive categories.
rpc ModerateText(ModerateTextRequest) returns (ModerateTextResponse) {
option (google.api.http) = {
post: "/v2/documents:moderateText"
body: "*"
};
option (google.api.method_signature) = "document";
}
// A convenience method that provides all features in one call.
rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
option (google.api.http) = {
post: "/v2/documents:annotateText"
body: "*"
};
option (google.api.method_signature) = "document,features,encoding_type";
option (google.api.method_signature) = "document,features";
}
}
// Represents the input to API methods.
message Document {
// The document types enum.
enum Type {
// The content type is not specified.
TYPE_UNSPECIFIED = 0;
// Plain text
PLAIN_TEXT = 1;
// HTML
HTML = 2;
}
// Required. If the type is not set or is `TYPE_UNSPECIFIED`,
// returns an `INVALID_ARGUMENT` error.
Type type = 1;
// The source of the document: a string containing the content or a
// Google Cloud Storage URI.
oneof source {
// The content of the input in string format.
// Cloud audit logging exempt since it is based on user data.
string content = 2;
// The Google Cloud Storage URI where the file content is located.
// This URI must be of the form: gs://bucket_name/object_name. For more
// details, see https://cloud.google.com/storage/docs/reference-uris.
// NOTE: Cloud Storage object versioning is not supported.
string gcs_content_uri = 3;
}
// Optional. The language of the document (if not specified, the language is
// automatically detected). Both ISO and BCP-47 language codes are
// accepted.
// [Language
// Support](https://cloud.google.com/natural-language/docs/languages) lists
// currently supported languages for each API method. If the language (either
// specified by the caller or automatically detected) is not supported by the
// called API method, an `INVALID_ARGUMENT` error is returned.
string language_code = 4 [(google.api.field_behavior) = OPTIONAL];
}
// Represents a sentence in the input document.
message Sentence {
// The sentence text.
TextSpan text = 1;
// For calls to [AnalyzeSentiment][] or if
// [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_document_sentiment]
// is set to true, this field will contain the sentiment for the sentence.
Sentiment sentiment = 2;
}
// Represents the text encoding that the caller uses to process the output.
// Providing an `EncodingType` is recommended because the API provides the
// beginning offsets for various outputs, such as tokens and mentions, and
// languages that natively use different text encodings may access offsets
// differently.
enum EncodingType {
// If `EncodingType` is not specified, encoding-dependent information (such as
// `begin_offset`) will be set at `-1`.
NONE = 0;
// Encoding-dependent information (such as `begin_offset`) is calculated based
// on the UTF-8 encoding of the input. C++ and Go are examples of languages
// that use this encoding natively.
UTF8 = 1;
// Encoding-dependent information (such as `begin_offset`) is calculated based
// on the UTF-16 encoding of the input. Java and JavaScript are examples of
// languages that use this encoding natively.
UTF16 = 2;
// Encoding-dependent information (such as `begin_offset`) is calculated based
// on the UTF-32 encoding of the input. Python is an example of a language
// that uses this encoding natively.
UTF32 = 3;
}
// Represents a phrase in the text that is a known entity, such as
// a person, an organization, or location. The API associates information, such
// as probability and mentions, with entities.
message Entity {
// The type of the entity. For most entity types, the associated metadata is a
// Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table
// below lists the associated fields for entities that have different
// metadata.
enum Type {
// Unknown
UNKNOWN = 0;
// Person
PERSON = 1;
// Location
LOCATION = 2;
// Organization
ORGANIZATION = 3;
// Event
EVENT = 4;
// Artwork
WORK_OF_ART = 5;
// Consumer product
CONSUMER_GOOD = 6;
// Other types of entities
OTHER = 7;
// Phone number
//
// The metadata lists the phone number, formatted according to local
// convention, plus whichever additional elements appear in the text:
//
// * `number` - the actual number, broken down into sections as per local
// convention
// * `national_prefix` - country code, if detected
// * `area_code` - region or area code, if detected
// * `extension` - phone extension (to be dialed after connection), if
// detected
PHONE_NUMBER = 9;
// Address
//
// The metadata identifies the street number and locality plus whichever
// additional elements appear in the text:
//
// * `street_number` - street number
// * `locality` - city or town
// * `street_name` - street/route name, if detected
// * `postal_code` - postal code, if detected
// * `country` - country, if detected<
// * `broad_region` - administrative area, such as the state, if detected
// * `narrow_region` - smaller administrative area, such as county, if
// detected
// * `sublocality` - used in Asian addresses to demark a district within a
// city, if detected
ADDRESS = 10;
// Date
//
// The metadata identifies the components of the date:
//
// * `year` - four digit year, if detected
// * `month` - two digit month number, if detected
// * `day` - two digit day number, if detected
DATE = 11;
// Number
//
// The metadata is the number itself.
NUMBER = 12;
// Price
//
// The metadata identifies the `value` and `currency`.
PRICE = 13;
}
// The representative name for the entity.
string name = 1;
// The entity type.
Type type = 2;
// Metadata associated with the entity.
//
// For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`)
// and Knowledge Graph MID (`mid`), if they are available. For the metadata
// associated with other entity types, see the Type table below.
map metadata = 3;
// The mentions of this entity in the input document. The API currently
// supports proper noun mentions.
repeated EntityMention mentions = 5;
// For calls to [AnalyzeEntitySentiment][] or if
// [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entity_sentiment]
// is set to true, this field will contain the aggregate sentiment expressed
// for this entity in the provided document.
Sentiment sentiment = 6;
}
// Represents the feeling associated with the entire text or entities in
// the text.
message Sentiment {
// A non-negative number in the [0, +inf) range, which represents
// the absolute magnitude of sentiment regardless of score (positive or
// negative).
float magnitude = 1;
// Sentiment score between -1.0 (negative sentiment) and 1.0
// (positive sentiment).
float score = 2;
}
// Represents a mention for an entity in the text. Currently, proper noun
// mentions are supported.
message EntityMention {
// The supported types of mentions.
enum Type {
// Unknown
TYPE_UNKNOWN = 0;
// Proper name
PROPER = 1;
// Common noun (or noun compound)
COMMON = 2;
}
// The mention text.
TextSpan text = 1;
// The type of the entity mention.
Type type = 2;
// For calls to [AnalyzeEntitySentiment][] or if
// [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entity_sentiment]
// is set to true, this field will contain the sentiment expressed for this
// mention of the entity in the provided document.
Sentiment sentiment = 3;
// Probability score associated with the entity.
//
// The score shows the probability of the entity mention being the entity
// type. The score is in (0, 1] range.
float probability = 4;
}
// Represents a text span in the input document.
message TextSpan {
// The content of the text span, which is a substring of the document.
string content = 1;
// The API calculates the beginning offset of the content in the original
// document according to the
// [EncodingType][google.cloud.language.v2.EncodingType] specified in the API
// request.
int32 begin_offset = 2;
}
// Represents a category returned from the text classifier.
message ClassificationCategory {
// The name of the category representing the document.
string name = 1;
// The classifier's confidence of the category. Number represents how certain
// the classifier is that this category represents the given text.
float confidence = 2;
}
// The sentiment analysis request message.
message AnalyzeSentimentRequest {
// Required. Input document.
Document document = 1 [(google.api.field_behavior) = REQUIRED];
// The encoding type used by the API to calculate sentence offsets.
EncodingType encoding_type = 2;
}
// The sentiment analysis response message.
message AnalyzeSentimentResponse {
// The overall sentiment of the input document.
Sentiment document_sentiment = 1;
// The language of the text, which will be the same as the language specified
// in the request or, if not specified, the automatically-detected language.
// See [Document.language][] field for more details.
string language_code = 2;
// The sentiment for all the sentences in the document.
repeated Sentence sentences = 3;
// Whether the language is officially supported. The API may still return a
// response when the language is not supported, but it is on a best effort
// basis.
bool language_supported = 4;
}
// The entity analysis request message.
message AnalyzeEntitiesRequest {
// Required. Input document.
Document document = 1 [(google.api.field_behavior) = REQUIRED];
// The encoding type used by the API to calculate offsets.
EncodingType encoding_type = 2;
}
// The entity analysis response message.
message AnalyzeEntitiesResponse {
// The recognized entities in the input document.
repeated Entity entities = 1;
// The language of the text, which will be the same as the language specified
// in the request or, if not specified, the automatically-detected language.
// See [Document.language][] field for more details.
string language_code = 2;
// Whether the language is officially supported. The API may still return a
// response when the language is not supported, but it is on a best effort
// basis.
bool language_supported = 3;
}
// The document classification request message.
message ClassifyTextRequest {
// Required. Input document.
Document document = 1 [(google.api.field_behavior) = REQUIRED];
}
// The document classification response message.
message ClassifyTextResponse {
// Categories representing the input document.
repeated ClassificationCategory categories = 1;
// The language of the text, which will be the same as the language specified
// in the request or, if not specified, the automatically-detected language.
// See [Document.language][] field for more details.
string language_code = 2;
// Whether the language is officially supported. The API may still return a
// response when the language is not supported, but it is on a best effort
// basis.
bool language_supported = 3;
}
// The document moderation request message.
message ModerateTextRequest {
// Required. Input document.
Document document = 1 [(google.api.field_behavior) = REQUIRED];
}
// The document moderation response message.
message ModerateTextResponse {
// Harmful and sensitive categories representing the input document.
repeated ClassificationCategory moderation_categories = 1;
// The language of the text, which will be the same as the language specified
// in the request or, if not specified, the automatically-detected language.
// See [Document.language][] field for more details.
string language_code = 2;
// Whether the language is officially supported. The API may still return a
// response when the language is not supported, but it is on a best effort
// basis.
bool language_supported = 3;
}
// The request message for the text annotation API, which can perform multiple
// analysis types in one call.
message AnnotateTextRequest {
// All available features.
// Setting each one to true will enable that specific analysis for the input.
message Features {
// Optional. Extract entities.
bool extract_entities = 1 [(google.api.field_behavior) = OPTIONAL];
// Optional. Extract document-level sentiment.
bool extract_document_sentiment = 2
[(google.api.field_behavior) = OPTIONAL];
// Optional. Classify the full document into categories.
bool classify_text = 4 [(google.api.field_behavior) = OPTIONAL];
// Optional. Moderate the document for harmful and sensitive categories.
bool moderate_text = 5 [(google.api.field_behavior) = OPTIONAL];
}
// Required. Input document.
Document document = 1 [(google.api.field_behavior) = REQUIRED];
// Required. The enabled features.
Features features = 2 [(google.api.field_behavior) = REQUIRED];
// The encoding type used by the API to calculate offsets.
EncodingType encoding_type = 3;
}
// The text annotations response message.
message AnnotateTextResponse {
// Sentences in the input document. Populated if the user enables
// [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_document_sentiment].
repeated Sentence sentences = 1;
// Entities, along with their semantic information, in the input document.
// Populated if the user enables
// [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entities]
// or
// [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entity_sentiment].
repeated Entity entities = 2;
// The overall sentiment for the document. Populated if the user enables
// [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_document_sentiment].
Sentiment document_sentiment = 3;
// The language of the text, which will be the same as the language specified
// in the request or, if not specified, the automatically-detected language.
// See [Document.language][] field for more details.
string language_code = 4;
// Categories identified in the input document.
repeated ClassificationCategory categories = 5;
// Harmful and sensitive categories identified in the input document.
repeated ClassificationCategory moderation_categories = 6;
// Whether the language is officially supported by all requested features.
// The API may still return a response when the language is not supported, but
// it is on a best effort basis.
bool language_supported = 7;
}