// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.webrisk.v1;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/protobuf/timestamp.proto";

option csharp_namespace = "Google.Cloud.WebRisk.V1";
option go_package = "google.golang.org/genproto/googleapis/cloud/webrisk/v1;webrisk";
option java_multiple_files = true;
option java_outer_classname = "WebRiskProto";
option java_package = "com.google.webrisk.v1";
option objc_class_prefix = "GCWR";
option php_namespace = "Google\\Cloud\\WebRisk\\V1";
option ruby_package = "Google::Cloud::WebRisk::V1";

// Web Risk API defines an interface to detect malicious URLs on your
// website and in client applications.
service WebRiskService {
  option (google.api.default_host) = "webrisk.googleapis.com";
  option (google.api.oauth_scopes) = "https://www.googleapis.com/auth/cloud-platform";

  // Gets the most recent threat list diffs. These diffs should be applied to
  // a local database of hashes to keep it up-to-date. If the local database is
  // empty or excessively out-of-date, a complete snapshot of the database will
  // be returned. This Method only updates a single ThreatList at a time. To
  // update multiple ThreatList databases, this method needs to be called once
  // for each list.
  rpc ComputeThreatListDiff(ComputeThreatListDiffRequest) returns (ComputeThreatListDiffResponse) {
    option (google.api.http) = {
      get: "/v1/threatLists:computeDiff"
    };
    option (google.api.method_signature) = "threat_type,version_token,constraints";
  }

  // This method is used to check whether a URI is on a given threatList.
  // Multiple threatLists may be searched in a single query.
  // The response will list all requested threatLists the URI was found to
  // match. If the URI is not found on any of the requested ThreatList an
  // empty response will be returned.
  rpc SearchUris(SearchUrisRequest) returns (SearchUrisResponse) {
    option (google.api.http) = {
      get: "/v1/uris:search"
    };
    option (google.api.method_signature) = "uri,threat_types";
  }

  // Gets the full hashes that match the requested hash prefix.
  // This is used after a hash prefix is looked up in a threatList
  // and there is a match. The client side threatList only holds partial hashes
  // so the client must query this method to determine if there is a full
  // hash match of a threat.
  rpc SearchHashes(SearchHashesRequest) returns (SearchHashesResponse) {
    option (google.api.http) = {
      get: "/v1/hashes:search"
    };
    option (google.api.method_signature) = "hash_prefix,threat_types";
  }

  // Creates a Submission of a URI suspected of containing phishing content to
  // be reviewed. If the result verifies the existence of malicious phishing
  // content, the site will be added to the [Google's Social Engineering
  // lists](https://support.google.com/webmasters/answer/6350487/) in order to
  // protect users that could get exposed to this threat in the future. Only
  // allowlisted projects can use this method during Early Access. Please reach
  // out to Sales or your customer engineer to obtain access.
  rpc CreateSubmission(CreateSubmissionRequest) returns (Submission) {
    option (google.api.http) = {
      post: "/v1/{parent=projects/*}/submissions"
      body: "submission"
    };
    option (google.api.method_signature) = "parent,submission";
  }
}

// Describes an API diff request.
message ComputeThreatListDiffRequest {
  // The constraints for this diff.
  message Constraints {
    // The maximum size in number of entries. The diff will not contain more
    // entries than this value.  This should be a power of 2 between 2**10 and
    // 2**20.  If zero, no diff size limit is set.
    int32 max_diff_entries = 1;

    // Sets the maximum number of entries that the client is willing to have
    // in the local database. This should be a power of 2 between 2**10 and
    // 2**20. If zero, no database size limit is set.
    int32 max_database_entries = 2;

    // The compression types supported by the client.
    repeated CompressionType supported_compressions = 3;
  }

  // Required. The threat list to update. Only a single ThreatType should be specified
  // per request. If you want to handle multiple ThreatTypes, you must make one
  // request per ThreatType.
  ThreatType threat_type = 1 [(google.api.field_behavior) = REQUIRED];

  // The current version token of the client for the requested list (the
  // client version that was received from the last successful diff).
  // If the client does not have a version token (this is the first time calling
  // ComputeThreatListDiff), this may be left empty and a full database
  // snapshot will be returned.
  bytes version_token = 2;

  // Required. The constraints associated with this request.
  Constraints constraints = 3 [(google.api.field_behavior) = REQUIRED];
}

message ComputeThreatListDiffResponse {
  // The type of response sent to the client.
  enum ResponseType {
    // Unknown.
    RESPONSE_TYPE_UNSPECIFIED = 0;

    // Partial updates are applied to the client's existing local database.
    DIFF = 1;

    // Full updates resets the client's entire local database. This means
    // that either the client had no state, was seriously out-of-date,
    // or the client is believed to be corrupt.
    RESET = 2;
  }

  // The expected state of a client's local database.
  message Checksum {
    // The SHA256 hash of the client state; that is, of the sorted list of all
    // hashes present in the database.
    bytes sha256 = 1;
  }

  // The type of response. This may indicate that an action must be taken by the
  // client when the response is received.
  ResponseType response_type = 4;

  // A set of entries to add to a local threat type's list.
  ThreatEntryAdditions additions = 5;

  // A set of entries to remove from a local threat type's list.
  // This field may be empty.
  ThreatEntryRemovals removals = 6;

  // The new opaque client version token. This should be retained by the client
  // and passed into the next call of ComputeThreatListDiff as 'version_token'.
  // A separate version token should be stored and used for each threatList.
  bytes new_version_token = 7;

  // The expected SHA256 hash of the client state; that is, of the sorted list
  // of all hashes present in the database after applying the provided diff.
  // If the client state doesn't match the expected state, the client must
  // discard this diff and retry later.
  Checksum checksum = 8;

  // The soonest the client should wait before issuing any diff
  // request. Querying sooner is unlikely to produce a meaningful diff.
  // Waiting longer is acceptable considering the use case.
  // If this field is not set clients may update as soon as they want.
  google.protobuf.Timestamp recommended_next_diff = 2;
}

// Request to check URI entries against threatLists.
message SearchUrisRequest {
  // Required. The URI to be checked for matches.
  string uri = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. The ThreatLists to search in. Multiple ThreatLists may be specified.
  repeated ThreatType threat_types = 2 [(google.api.field_behavior) = REQUIRED];
}

message SearchUrisResponse {
  // Contains threat information on a matching uri.
  message ThreatUri {
    // The ThreatList this threat belongs to.
    repeated ThreatType threat_types = 1;

    // The cache lifetime for the returned match. Clients must not cache this
    // response past this timestamp to avoid false positives.
    google.protobuf.Timestamp expire_time = 2;
  }

  // The threat list matches. This might be empty if the URI is on no list.
  ThreatUri threat = 1;
}

// Request to return full hashes matched by the provided hash prefixes.
message SearchHashesRequest {
  // A hash prefix, consisting of the most significant 4-32 bytes of a SHA256
  // hash. For JSON requests, this field is base64-encoded.
  // Note that if this parameter is provided by a URI, it must be encoded using
  // the web safe base64 variant (RFC 4648).
  bytes hash_prefix = 1;

  // Required. The ThreatLists to search in. Multiple ThreatLists may be specified.
  repeated ThreatType threat_types = 2 [(google.api.field_behavior) = REQUIRED];
}

message SearchHashesResponse {
  // Contains threat information on a matching hash.
  message ThreatHash {
    // The ThreatList this threat belongs to.
    // This must contain at least one entry.
    repeated ThreatType threat_types = 1;

    // A 32 byte SHA256 hash. This field is in binary format. For JSON
    // requests, hashes are base64-encoded.
    bytes hash = 2;

    // The cache lifetime for the returned match. Clients must not cache this
    // response past this timestamp to avoid false positives.
    google.protobuf.Timestamp expire_time = 3;
  }

  // The full hashes that matched the requested prefixes.
  // The hash will be populated in the key.
  repeated ThreatHash threats = 1;

  // For requested entities that did not match the threat list, how long to
  // cache the response until.
  google.protobuf.Timestamp negative_expire_time = 2;
}

// The type of threat. This maps directly to the threat list a threat may
// belong to.
enum ThreatType {
  // No entries should match this threat type. This threat type is unused.
  THREAT_TYPE_UNSPECIFIED = 0;

  // Malware targeting any platform.
  MALWARE = 1;

  // Social engineering targeting any platform.
  SOCIAL_ENGINEERING = 2;

  // Unwanted software targeting any platform.
  UNWANTED_SOFTWARE = 3;

  // A list of extended coverage social engineering URIs targeting any
  // platform.
  SOCIAL_ENGINEERING_EXTENDED_COVERAGE = 4;
}

// The ways in which threat entry sets can be compressed.
enum CompressionType {
  // Unknown.
  COMPRESSION_TYPE_UNSPECIFIED = 0;

  // Raw, uncompressed data.
  RAW = 1;

  // Rice-Golomb encoded data.
  RICE = 2;
}

// Contains the set of entries to add to a local database.
// May contain a combination of compressed and raw data in a single response.
message ThreatEntryAdditions {
  // The raw SHA256-formatted entries.
  // Repeated to allow returning sets of hashes with different prefix sizes.
  repeated RawHashes raw_hashes = 1;

  // The encoded 4-byte prefixes of SHA256-formatted entries, using a
  // Golomb-Rice encoding. The hashes are converted to uint32, sorted in
  // ascending order, then delta encoded and stored as encoded_data.
  RiceDeltaEncoding rice_hashes = 2;
}

// Contains the set of entries to remove from a local database.
message ThreatEntryRemovals {
  // The raw removal indices for a local list.
  RawIndices raw_indices = 1;

  // The encoded local, lexicographically-sorted list indices, using a
  // Golomb-Rice encoding. Used for sending compressed removal indices. The
  // removal indices (uint32) are sorted in ascending order, then delta encoded
  // and stored as encoded_data.
  RiceDeltaEncoding rice_indices = 2;
}

// A set of raw indices to remove from a local list.
message RawIndices {
  // The indices to remove from a lexicographically-sorted local list.
  repeated int32 indices = 1;
}

// The uncompressed threat entries in hash format.
// Hashes can be anywhere from 4 to 32 bytes in size. A large majority are 4
// bytes, but some hashes are lengthened if they collide with the hash of a
// popular URI.
//
// Used for sending ThreatEntryAdditons to clients that do not support
// compression, or when sending non-4-byte hashes to clients that do support
// compression.
message RawHashes {
  // The number of bytes for each prefix encoded below.  This field can be
  // anywhere from 4 (shortest prefix) to 32 (full SHA256 hash).
  // In practice this is almost always 4, except in exceptional circumstances.
  int32 prefix_size = 1;

  // The hashes, in binary format, concatenated into one long string. Hashes are
  // sorted in lexicographic order. For JSON API users, hashes are
  // base64-encoded.
  bytes raw_hashes = 2;
}

// The Rice-Golomb encoded data. Used for sending compressed 4-byte hashes or
// compressed removal indices.
message RiceDeltaEncoding {
  // The offset of the first entry in the encoded data, or, if only a single
  // integer was encoded, that single integer's value. If the field is empty or
  // missing, assume zero.
  int64 first_value = 1;

  // The Golomb-Rice parameter, which is a number between 2 and 28. This field
  // is missing (that is, zero) if `num_entries` is zero.
  int32 rice_parameter = 2;

  // The number of entries that are delta encoded in the encoded data. If only a
  // single integer was encoded, this will be zero and the single value will be
  // stored in `first_value`.
  int32 entry_count = 3;

  // The encoded deltas that are encoded using the Golomb-Rice coder.
  bytes encoded_data = 4;
}

// Wraps a URI that might be displaying malicious content.
message Submission {
  // Required. The URI that is being reported for malicious content to be analyzed.
  string uri = 1 [(google.api.field_behavior) = REQUIRED];
}

// Request to send a potentially phishy URI to WebRisk.
message CreateSubmissionRequest {
  // Required. The name of the project that is making the submission. This string is in
  // the format "projects/{project_number}".
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "cloudresourcemanager.googleapis.com/Project"
    }
  ];

  // Required. The submission that contains the content of the phishing report.
  Submission submission = 2 [(google.api.field_behavior) = REQUIRED];
}