// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.documentai.v1beta3;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/documentai/v1beta3/dataset.proto";
import "google/cloud/documentai/v1beta3/document.proto";
import "google/cloud/documentai/v1beta3/document_io.proto";
import "google/cloud/documentai/v1beta3/operation_metadata.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/field_mask.proto";
import "google/rpc/status.proto";

option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3";
option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb";
option java_multiple_files = true;
option java_outer_classname = "DocumentAiDocumentService";
option java_package = "com.google.cloud.documentai.v1beta3";
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
option ruby_package = "Google::Cloud::DocumentAI::V1beta3";

// Service to call Cloud DocumentAI to manage document collection (dataset).
service DocumentService {
  option (google.api.default_host) = "documentai.googleapis.com";
  option (google.api.oauth_scopes) =
      "https://www.googleapis.com/auth/cloud-platform";

  // Updates metadata associated with a dataset.
  rpc UpdateDataset(UpdateDatasetRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      patch: "/v1beta3/{dataset.name=projects/*/locations/*/processors/*/dataset}"
      body: "dataset"
    };
    option (google.api.method_signature) = "dataset,update_mask";
    option (google.longrunning.operation_info) = {
      response_type: "Dataset"
      metadata_type: "UpdateDatasetOperationMetadata"
    };
  }

  // Import documents into a dataset.
  rpc ImportDocuments(ImportDocumentsRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1beta3/{dataset=projects/*/locations/*/processors/*/dataset}:importDocuments"
      body: "*"
    };
    option (google.api.method_signature) = "dataset";
    option (google.longrunning.operation_info) = {
      response_type: "ImportDocumentsResponse"
      metadata_type: "ImportDocumentsMetadata"
    };
  }

  // Returns relevant fields present in the requested document.
  rpc GetDocument(GetDocumentRequest) returns (GetDocumentResponse) {
    option (google.api.http) = {
      get: "/v1beta3/{dataset=projects/*/locations/*/processors/*/dataset}:getDocument"
    };
    option (google.api.method_signature) = "dataset";
  }

  // Deletes a set of documents.
  rpc BatchDeleteDocuments(BatchDeleteDocumentsRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1beta3/{dataset=projects/*/locations/*/processors/*/dataset}:batchDeleteDocuments"
      body: "*"
    };
    option (google.api.method_signature) = "dataset";
    option (google.longrunning.operation_info) = {
      response_type: "BatchDeleteDocumentsResponse"
      metadata_type: "BatchDeleteDocumentsMetadata"
    };
  }

  // Gets the `DatasetSchema` of a `Dataset`.
  rpc GetDatasetSchema(GetDatasetSchemaRequest) returns (DatasetSchema) {
    option (google.api.http) = {
      get: "/v1beta3/{name=projects/*/locations/*/processors/*/dataset/datasetSchema}"
    };
    option (google.api.method_signature) = "name";
  }

  // Updates a `DatasetSchema`.
  rpc UpdateDatasetSchema(UpdateDatasetSchemaRequest) returns (DatasetSchema) {
    option (google.api.http) = {
      patch: "/v1beta3/{dataset_schema.name=projects/*/locations/*/processors/*/dataset/datasetSchema}"
      body: "dataset_schema"
    };
    option (google.api.method_signature) = "dataset_schema,update_mask";
  }
}

// Documents belonging to a dataset will be split into different groups
// referred to as splits: train, test.
enum DatasetSplitType {
  // Default value if the enum is not set.
  // go/protodosdonts#do-include-an-unspecified-value-in-an-enum
  DATASET_SPLIT_TYPE_UNSPECIFIED = 0;

  // Identifies the train documents.
  DATASET_SPLIT_TRAIN = 1;

  // Identifies the test documents.
  DATASET_SPLIT_TEST = 2;

  // Identifies the unassigned documents.
  DATASET_SPLIT_UNASSIGNED = 3;
}

message UpdateDatasetRequest {
  // Required. The `name` field of the `Dataset` is used to identify the
  // resource to be updated.
  Dataset dataset = 1 [(google.api.field_behavior) = REQUIRED];

  // The update mask applies to the resource.
  google.protobuf.FieldMask update_mask = 2;
}

message UpdateDatasetOperationMetadata {
  // The basic metadata of the long running operation.
  CommonOperationMetadata common_metadata = 1;
}

message ImportDocumentsRequest {
  // Config for importing documents.
  // Each batch can have its own dataset split type.
  message BatchDocumentsImportConfig {
    // The config for auto-split.
    message AutoSplitConfig {
      // Ratio of training dataset split.
      float training_split_ratio = 1;
    }

    oneof split_type_config {
      // Target dataset split where the documents must be stored.
      DatasetSplitType dataset_split = 2;

      // If set, documents will be automatically split into training and test
      // split category with the specified ratio.
      AutoSplitConfig auto_split_config = 3;
    }

    // The common config to specify a set of documents used as input.
    BatchDocumentsInputConfig batch_input_config = 1;
  }

  // Required. The dataset resource name.
  // Format:
  // projects/{project}/locations/{location}/processors/{processor}/dataset
  string dataset = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "documentai.googleapis.com/Dataset"
    }
  ];

  // Required. The Cloud Storage uri containing raw documents that must be
  // imported.
  repeated BatchDocumentsImportConfig batch_documents_import_configs = 4
      [(google.api.field_behavior) = REQUIRED];
}

// Response of the import document operation.
message ImportDocumentsResponse {}

// Metadata of the import document operation.
message ImportDocumentsMetadata {
  // The status of each individual document in the import process.
  message IndividualImportStatus {
    // The source Cloud Storage URI of the document.
    string input_gcs_source = 1;

    // The status of the importing of the document.
    google.rpc.Status status = 2;

    // The document id of imported document if it was successful, otherwise
    // empty.
    DocumentId output_document_id = 4;
  }

  // The validation status of each import config. Status is set to errors if
  // there is no documents to import in the import_config, or OK if the
  // operation will try to proceed at least one document.
  message ImportConfigValidationResult {
    // The source Cloud Storage URI specified in the import config.
    string input_gcs_source = 1;

    // The validation status of import config.
    google.rpc.Status status = 2;
  }

  // The basic metadata of the long running operation.
  CommonOperationMetadata common_metadata = 1;

  // The list of response details of each document.
  repeated IndividualImportStatus individual_import_statuses = 2;

  // Validation statuses of the batch documents import config.
  repeated ImportConfigValidationResult import_config_validation_results = 4;

  // Total number of the documents that are qualified for importing.
  int32 total_document_count = 3;
}

message GetDocumentRequest {
  // Required. The resource name of the dataset that the document belongs to .
  // Format:
  // projects/{project}/locations/{location}/processors/{processor}/dataset
  string dataset = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "documentai.googleapis.com/Dataset"
    }
  ];

  // Required. Document identifier.
  DocumentId document_id = 2 [(google.api.field_behavior) = REQUIRED];

  // If set, only fields listed here will be returned. Otherwise, all fields
  // will be returned by default.
  google.protobuf.FieldMask read_mask = 3;

  // List of pages for which the fields specified in the `read_mask` must
  // be served.
  DocumentPageRange page_range = 4;
}

message GetDocumentResponse {
  Document document = 1;
}

message BatchDeleteDocumentsRequest {
  // Required. The dataset resource name.
  // Format:
  // projects/{project}/locations/{location}/processors/{processor}/dataset
  string dataset = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. Dataset documents input. If given `filter`, all documents
  // satisfying the filter will be deleted. If given documentIds, a maximum of
  // 50 documents can be deleted in a batch. The request will be rejected if
  // more than 50 document_ids are provided.
  BatchDatasetDocuments dataset_documents = 3
      [(google.api.field_behavior) = REQUIRED];
}

// Response of the delete documents operation.
message BatchDeleteDocumentsResponse {}

message BatchDeleteDocumentsMetadata {
  // The status of each individual document in the batch delete process.
  message IndividualBatchDeleteStatus {
    // The document id of the document.
    DocumentId document_id = 1;

    // The status of deleting the document in storage.
    google.rpc.Status status = 2;
  }

  // The basic metadata of the long running operation.
  CommonOperationMetadata common_metadata = 1;

  // The list of response details of each document.
  repeated IndividualBatchDeleteStatus individual_batch_delete_statuses = 2;

  // Total number of documents deleting from dataset.
  int32 total_document_count = 3;

  // Total number of documents that failed to be deleted in storage.
  int32 error_document_count = 4;
}

// Request for `GetDatasetSchema`.
message GetDatasetSchemaRequest {
  // Required. The dataset schema resource name.
  // Format:
  // projects/{project}/locations/{location}/processors/{processor}/dataset/datasetSchema
  string name = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "documentai.googleapis.com/DatasetSchema"
    }
  ];

  // If set, only returns the visible fields of the schema.
  bool visible_fields_only = 2;
}

// Request for `UpdateDatasetSchema`.
message UpdateDatasetSchemaRequest {
  // Required. The name field of the `DatasetSchema` is used to identify the
  // resource to be updated.
  DatasetSchema dataset_schema = 1 [(google.api.field_behavior) = REQUIRED];

  // The update mask applies to the resource.
  google.protobuf.FieldMask update_mask = 2;
}

// Range of pages present in a document.
message DocumentPageRange {
  // First page number (one-based index) to be returned.
  int32 start = 1;

  // Last page number (one-based index) to be returned.
  int32 end = 2;
}