// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.webrisk.v1;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/timestamp.proto";

option csharp_namespace = "Google.Cloud.WebRisk.V1";
option go_package = "cloud.google.com/go/webrisk/apiv1/webriskpb;webriskpb";
option java_multiple_files = true;
option java_outer_classname = "WebRiskProto";
option java_package = "com.google.webrisk.v1";
option objc_class_prefix = "GCWR";
option php_namespace = "Google\\Cloud\\WebRisk\\V1";
option ruby_package = "Google::Cloud::WebRisk::V1";

// Web Risk API defines an interface to detect malicious URLs on your
// website and in client applications.
service WebRiskService {
  option (google.api.default_host) = "webrisk.googleapis.com";
  option (google.api.oauth_scopes) =
      "https://www.googleapis.com/auth/cloud-platform";

  // Gets the most recent threat list diffs. These diffs should be applied to
  // a local database of hashes to keep it up-to-date. If the local database is
  // empty or excessively out-of-date, a complete snapshot of the database will
  // be returned. This Method only updates a single ThreatList at a time. To
  // update multiple ThreatList databases, this method needs to be called once
  // for each list.
  rpc ComputeThreatListDiff(ComputeThreatListDiffRequest)
      returns (ComputeThreatListDiffResponse) {
    option (google.api.http) = {
      get: "/v1/threatLists:computeDiff"
    };
    option (google.api.method_signature) =
        "threat_type,version_token,constraints";
  }

  // This method is used to check whether a URI is on a given threatList.
  // Multiple threatLists may be searched in a single query.
  // The response will list all requested threatLists the URI was found to
  // match. If the URI is not found on any of the requested ThreatList an
  // empty response will be returned.
  rpc SearchUris(SearchUrisRequest) returns (SearchUrisResponse) {
    option (google.api.http) = {
      get: "/v1/uris:search"
    };
    option (google.api.method_signature) = "uri,threat_types";
  }

  // Gets the full hashes that match the requested hash prefix.
  // This is used after a hash prefix is looked up in a threatList
  // and there is a match. The client side threatList only holds partial hashes
  // so the client must query this method to determine if there is a full
  // hash match of a threat.
  rpc SearchHashes(SearchHashesRequest) returns (SearchHashesResponse) {
    option (google.api.http) = {
      get: "/v1/hashes:search"
    };
    option (google.api.method_signature) = "hash_prefix,threat_types";
  }

  // Creates a Submission of a URI suspected of containing phishing content to
  // be reviewed. If the result verifies the existence of malicious phishing
  // content, the site will be added to the [Google's Social Engineering
  // lists](https://support.google.com/webmasters/answer/6350487/) in order to
  // protect users that could get exposed to this threat in the future. Only
  // allowlisted projects can use this method during Early Access. Please reach
  // out to Sales or your customer engineer to obtain access.
  rpc CreateSubmission(CreateSubmissionRequest) returns (Submission) {
    option (google.api.http) = {
      post: "/v1/{parent=projects/*}/submissions"
      body: "submission"
    };
    option (google.api.method_signature) = "parent,submission";
  }

  // Submits a URI suspected of containing malicious content to be reviewed.
  // Returns a google.longrunning.Operation which, once the review is complete,
  // is updated with its result. You can use the [Pub/Sub API]
  // (https://cloud.google.com/pubsub) to receive notifications for the returned
  // Operation. If the result verifies the existence of malicious content, the
  // site will be added to the [Google's Social Engineering lists]
  // (https://support.google.com/webmasters/answer/6350487/) in order to
  // protect users that could get exposed to this threat in the future. Only
  // allowlisted projects can use this method during Early Access. Please reach
  // out to Sales or your customer engineer to obtain access.
  rpc SubmitUri(SubmitUriRequest) returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1/{parent=projects/*}/uris:submit"
      body: "*"
    };
    option (google.longrunning.operation_info) = {
      response_type: "Submission"
      metadata_type: "SubmitUriMetadata"
    };
  }
}

// Describes an API diff request.
message ComputeThreatListDiffRequest {
  // The constraints for this diff.
  message Constraints {
    // The maximum size in number of entries. The diff will not contain more
    // entries than this value.  This should be a power of 2 between 2**10 and
    // 2**20.  If zero, no diff size limit is set.
    int32 max_diff_entries = 1;

    // Sets the maximum number of entries that the client is willing to have
    // in the local database. This should be a power of 2 between 2**10 and
    // 2**20. If zero, no database size limit is set.
    int32 max_database_entries = 2;

    // The compression types supported by the client.
    repeated CompressionType supported_compressions = 3;
  }

  // Required. The threat list to update. Only a single ThreatType should be
  // specified per request. If you want to handle multiple ThreatTypes, you must
  // make one request per ThreatType.
  ThreatType threat_type = 1 [(google.api.field_behavior) = REQUIRED];

  // The current version token of the client for the requested list (the
  // client version that was received from the last successful diff).
  // If the client does not have a version token (this is the first time calling
  // ComputeThreatListDiff), this may be left empty and a full database
  // snapshot will be returned.
  bytes version_token = 2;

  // Required. The constraints associated with this request.
  Constraints constraints = 3 [(google.api.field_behavior) = REQUIRED];
}

message ComputeThreatListDiffResponse {
  // The type of response sent to the client.
  enum ResponseType {
    // Unknown.
    RESPONSE_TYPE_UNSPECIFIED = 0;

    // Partial updates are applied to the client's existing local database.
    DIFF = 1;

    // Full updates resets the client's entire local database. This means
    // that either the client had no state, was seriously out-of-date,
    // or the client is believed to be corrupt.
    RESET = 2;
  }

  // The expected state of a client's local database.
  message Checksum {
    // The SHA256 hash of the client state; that is, of the sorted list of all
    // hashes present in the database.
    bytes sha256 = 1;
  }

  // The type of response. This may indicate that an action must be taken by the
  // client when the response is received.
  ResponseType response_type = 4;

  // A set of entries to add to a local threat type's list.
  ThreatEntryAdditions additions = 5;

  // A set of entries to remove from a local threat type's list.
  // This field may be empty.
  ThreatEntryRemovals removals = 6;

  // The new opaque client version token. This should be retained by the client
  // and passed into the next call of ComputeThreatListDiff as 'version_token'.
  // A separate version token should be stored and used for each threatList.
  bytes new_version_token = 7;

  // The expected SHA256 hash of the client state; that is, of the sorted list
  // of all hashes present in the database after applying the provided diff.
  // If the client state doesn't match the expected state, the client must
  // discard this diff and retry later.
  Checksum checksum = 8;

  // The soonest the client should wait before issuing any diff
  // request. Querying sooner is unlikely to produce a meaningful diff.
  // Waiting longer is acceptable considering the use case.
  // If this field is not set clients may update as soon as they want.
  google.protobuf.Timestamp recommended_next_diff = 2;
}

// Request to check URI entries against threatLists.
message SearchUrisRequest {
  // Required. The URI to be checked for matches.
  string uri = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. The ThreatLists to search in. Multiple ThreatLists may be
  // specified.
  repeated ThreatType threat_types = 2 [(google.api.field_behavior) = REQUIRED];
}

message SearchUrisResponse {
  // Contains threat information on a matching uri.
  message ThreatUri {
    // The ThreatList this threat belongs to.
    repeated ThreatType threat_types = 1;

    // The cache lifetime for the returned match. Clients must not cache this
    // response past this timestamp to avoid false positives.
    google.protobuf.Timestamp expire_time = 2;
  }

  // The threat list matches. This might be empty if the URI is on no list.
  ThreatUri threat = 1;
}

// Request to return full hashes matched by the provided hash prefixes.
message SearchHashesRequest {
  // A hash prefix, consisting of the most significant 4-32 bytes of a SHA256
  // hash. For JSON requests, this field is base64-encoded.
  // Note that if this parameter is provided by a URI, it must be encoded using
  // the web safe base64 variant (RFC 4648).
  bytes hash_prefix = 1;

  // Required. The ThreatLists to search in. Multiple ThreatLists may be
  // specified.
  repeated ThreatType threat_types = 2 [(google.api.field_behavior) = REQUIRED];
}

message SearchHashesResponse {
  // Contains threat information on a matching hash.
  message ThreatHash {
    // The ThreatList this threat belongs to.
    // This must contain at least one entry.
    repeated ThreatType threat_types = 1;

    // A 32 byte SHA256 hash. This field is in binary format. For JSON
    // requests, hashes are base64-encoded.
    bytes hash = 2;

    // The cache lifetime for the returned match. Clients must not cache this
    // response past this timestamp to avoid false positives.
    google.protobuf.Timestamp expire_time = 3;
  }

  // The full hashes that matched the requested prefixes.
  // The hash will be populated in the key.
  repeated ThreatHash threats = 1;

  // For requested entities that did not match the threat list, how long to
  // cache the response until.
  google.protobuf.Timestamp negative_expire_time = 2;
}

// The type of threat. This maps directly to the threat list a threat may
// belong to.
enum ThreatType {
  // No entries should match this threat type. This threat type is unused.
  THREAT_TYPE_UNSPECIFIED = 0;

  // Malware targeting any platform.
  MALWARE = 1;

  // Social engineering targeting any platform.
  SOCIAL_ENGINEERING = 2;

  // Unwanted software targeting any platform.
  UNWANTED_SOFTWARE = 3;

  // A list of extended coverage social engineering URIs targeting any
  // platform.
  SOCIAL_ENGINEERING_EXTENDED_COVERAGE = 4;
}

// The ways in which threat entry sets can be compressed.
enum CompressionType {
  // Unknown.
  COMPRESSION_TYPE_UNSPECIFIED = 0;

  // Raw, uncompressed data.
  RAW = 1;

  // Rice-Golomb encoded data.
  RICE = 2;
}

// Contains the set of entries to add to a local database.
// May contain a combination of compressed and raw data in a single response.
message ThreatEntryAdditions {
  // The raw SHA256-formatted entries.
  // Repeated to allow returning sets of hashes with different prefix sizes.
  repeated RawHashes raw_hashes = 1;

  // The encoded 4-byte prefixes of SHA256-formatted entries, using a
  // Golomb-Rice encoding. The hashes are converted to uint32, sorted in
  // ascending order, then delta encoded and stored as encoded_data.
  RiceDeltaEncoding rice_hashes = 2;
}

// Contains the set of entries to remove from a local database.
message ThreatEntryRemovals {
  // The raw removal indices for a local list.
  RawIndices raw_indices = 1;

  // The encoded local, lexicographically-sorted list indices, using a
  // Golomb-Rice encoding. Used for sending compressed removal indices. The
  // removal indices (uint32) are sorted in ascending order, then delta encoded
  // and stored as encoded_data.
  RiceDeltaEncoding rice_indices = 2;
}

// A set of raw indices to remove from a local list.
message RawIndices {
  // The indices to remove from a lexicographically-sorted local list.
  repeated int32 indices = 1;
}

// The uncompressed threat entries in hash format.
// Hashes can be anywhere from 4 to 32 bytes in size. A large majority are 4
// bytes, but some hashes are lengthened if they collide with the hash of a
// popular URI.
//
// Used for sending ThreatEntryAdditons to clients that do not support
// compression, or when sending non-4-byte hashes to clients that do support
// compression.
message RawHashes {
  // The number of bytes for each prefix encoded below.  This field can be
  // anywhere from 4 (shortest prefix) to 32 (full SHA256 hash).
  // In practice this is almost always 4, except in exceptional circumstances.
  int32 prefix_size = 1;

  // The hashes, in binary format, concatenated into one long string. Hashes are
  // sorted in lexicographic order. For JSON API users, hashes are
  // base64-encoded.
  bytes raw_hashes = 2;
}

// The Rice-Golomb encoded data. Used for sending compressed 4-byte hashes or
// compressed removal indices.
message RiceDeltaEncoding {
  // The offset of the first entry in the encoded data, or, if only a single
  // integer was encoded, that single integer's value. If the field is empty or
  // missing, assume zero.
  int64 first_value = 1;

  // The Golomb-Rice parameter, which is a number between 2 and 28. This field
  // is missing (that is, zero) if `num_entries` is zero.
  int32 rice_parameter = 2;

  // The number of entries that are delta encoded in the encoded data. If only a
  // single integer was encoded, this will be zero and the single value will be
  // stored in `first_value`.
  int32 entry_count = 3;

  // The encoded deltas that are encoded using the Golomb-Rice coder.
  bytes encoded_data = 4;
}

// Wraps a URI that might be displaying malicious content.
message Submission {
  // Required. The URI that is being reported for malicious content to be
  // analyzed.
  string uri = 1 [(google.api.field_behavior) = REQUIRED];

  // Output only. ThreatTypes found to be associated with the submitted URI
  // after reviewing it. This might be empty if the URI was not added to any
  // list.
  repeated ThreatType threat_types = 2
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Context about the submission including the type of abuse found on the URI and
// supporting details.
// option (google.api.message_visibility).restriction = "TRUSTED_TESTER";
message ThreatInfo {
  // The abuse type found on the URI.
  enum AbuseType {
    // Default.
    ABUSE_TYPE_UNSPECIFIED = 0;

    // The URI contains malware.
    MALWARE = 1;

    // The URI contains social engineering.
    SOCIAL_ENGINEERING = 2;

    // The URI contains unwanted software.
    UNWANTED_SOFTWARE = 3;
  }

  // Confidence that a URI is unsafe.
  message Confidence {
    // Enum representation of confidence.
    enum ConfidenceLevel {
      // Default.
      CONFIDENCE_LEVEL_UNSPECIFIED = 0;

      // Less than 60% confidence that the URI is unsafe.
      LOW = 1;

      // Between 60% and 80% confidence that the URI is unsafe.
      MEDIUM = 2;

      // Greater than 80% confidence that the URI is unsafe.
      HIGH = 3;
    }

    oneof value {
      // A decimal representation of confidence in the range of 0
      // to 1 where 0 indicates no confidence and 1 indicates
      // complete confidence.
      float score = 1;

      // Enum representation of confidence.
      ConfidenceLevel level = 2;
    }
  }

  // Context about why the URI is unsafe.
  message ThreatJustification {
    // Labels that explain how the URI was classified.
    enum JustificationLabel {
      // Default.
      JUSTIFICATION_LABEL_UNSPECIFIED = 0;

      // The submitter manually verified that the submission is unsafe.
      MANUAL_VERIFICATION = 1;

      // The submitter received the submission from an end user.
      USER_REPORT = 2;

      // The submitter received the submission from an automated system.
      AUTOMATED_REPORT = 3;
    }

    // Labels associated with this URI that explain how it was classified.
    repeated JustificationLabel labels = 1;

    // Free-form context on why this URI is unsafe.
    repeated string comments = 2;
  }

  // The type of abuse.
  AbuseType abuse_type = 1;

  // Confidence that the URI is unsafe.
  Confidence threat_confidence = 2;

  // Context about why the URI is unsafe.
  ThreatJustification threat_justification = 3;
}

// Details about how the threat was discovered.
message ThreatDiscovery {
  // Platform types.
  enum Platform {
    // Default.
    PLATFORM_UNSPECIFIED = 0;

    // General Android platform.
    ANDROID = 1;

    // General iOS platform.
    IOS = 2;

    // General macOS platform.
    MACOS = 3;

    // General Windows platform.
    WINDOWS = 4;
  }

  // Platform on which the threat was discovered.
  Platform platform = 1;

  // CLDR region code of the countries/regions the URI poses a threat ordered
  // from most impact to least impact. Example: "US" for United States.
  repeated string region_codes = 2;
}

// Request to send a potentially phishy URI to WebRisk.
message CreateSubmissionRequest {
  // Required. The name of the project that is making the submission. This
  // string is in the format "projects/{project_number}".
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "cloudresourcemanager.googleapis.com/Project"
    }
  ];

  // Required. The submission that contains the content of the phishing report.
  Submission submission = 2 [(google.api.field_behavior) = REQUIRED];
}

// Request to send a potentially malicious URI to WebRisk.
message SubmitUriRequest {
  // Required. The name of the project that is making the submission. This
  // string is in the format "projects/{project_number}".
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "cloudresourcemanager.googleapis.com/Project"
    }
  ];

  // Required. The submission that contains the URI to be scanned.
  Submission submission = 2 [(google.api.field_behavior) = REQUIRED];

  // Provides additional information about the submission.
  ThreatInfo threat_info = 3;

  // Provides additional information about how the submission was discovered.
  ThreatDiscovery threat_discovery = 4;
}

// Metadata for the Submit URI long-running operation.
// option (google.api.message_visibility).restriction = "TRUSTED_TESTER";
message SubmitUriMetadata {
  // Enum that represents the state of the long-running operation.
  enum State {
    // Default unspecified state.
    STATE_UNSPECIFIED = 0;

    // The operation is currently running.
    RUNNING = 1;

    // The operation finished with a success status.
    SUCCEEDED = 2;

    // The operation was cancelled.
    CANCELLED = 3;

    // The operation finished with a failure status.
    FAILED = 4;

    // The operation was closed with no action taken.
    CLOSED = 5;
  }

  // The state of the operation.
  State state = 1;

  // Creation time of the operation.
  google.protobuf.Timestamp create_time = 2;

  // Latest update time of the operation.
  google.protobuf.Timestamp update_time = 3;
}