// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.dataflow.v1beta3; option csharp_namespace = "Google.Cloud.Dataflow.V1Beta3"; option go_package = "cloud.google.com/go/dataflow/apiv1beta3/dataflowpb;dataflowpb"; option java_multiple_files = true; option java_outer_classname = "StreamingProto"; option java_package = "com.google.dataflow.v1beta3"; option php_namespace = "Google\\Cloud\\Dataflow\\V1beta3"; option ruby_package = "Google::Cloud::Dataflow::V1beta3"; // Global topology of the streaming Dataflow job, including all // computations and their sharded locations. message TopologyConfig { // The computations associated with a streaming Dataflow job. repeated ComputationTopology computations = 1; // The disks assigned to a streaming Dataflow job. repeated DataDiskAssignment data_disk_assignments = 2; // Maps user stage names to stable computation names. map user_stage_to_computation_name_map = 3; // The size (in bits) of keys that will be assigned to source messages. int32 forwarding_key_bits = 4; // Version number for persistent state. int32 persistent_state_version = 5; } // Identifies a pubsub location to use for transferring data into or // out of a streaming Dataflow job. message PubsubLocation { // A pubsub topic, in the form of // "pubsub.googleapis.com/topics//" string topic = 1; // A pubsub subscription, in the form of // "pubsub.googleapis.com/subscriptions//" string subscription = 2; // If set, contains a pubsub label from which to extract record timestamps. // If left empty, record timestamps will be generated upon arrival. string timestamp_label = 3; // If set, contains a pubsub label from which to extract record ids. // If left empty, record deduplication will be strictly best effort. string id_label = 4; // Indicates whether the pipeline allows late-arriving data. bool drop_late_data = 5; // If set, specifies the pubsub subscription that will be used for tracking // custom time timestamps for watermark estimation. string tracking_subscription = 6; // If true, then the client has requested to get pubsub attributes. bool with_attributes = 7; } // Identifies the location of a streaming computation stage, for // stage-to-stage communication. message StreamingStageLocation { // Identifies the particular stream within the streaming Dataflow // job. string stream_id = 1; } // Identifies the location of a streaming side input. message StreamingSideInputLocation { // Identifies the particular side input within the streaming Dataflow job. string tag = 1; // Identifies the state family where this side input is stored. string state_family = 2; } // Identifies the location of a custom souce. message CustomSourceLocation { // Whether this source is stateful. bool stateful = 1; } // Describes a stream of data, either as input to be processed or as // output of a streaming Dataflow job. message StreamLocation { // A specification of a stream's location. oneof location { // The stream is part of another computation within the current // streaming Dataflow job. StreamingStageLocation streaming_stage_location = 1; // The stream is a pubsub stream. PubsubLocation pubsub_location = 2; // The stream is a streaming side input. StreamingSideInputLocation side_input_location = 3; // The stream is a custom source. CustomSourceLocation custom_source_location = 4; } } // State family configuration. message StateFamilyConfig { // The state family value. string state_family = 1; // If true, this family corresponds to a read operation. bool is_read = 2; } // All configuration data for a particular Computation. message ComputationTopology { // The system stage name. string system_stage_name = 1; // The ID of the computation. string computation_id = 5; // The key ranges processed by the computation. repeated KeyRangeLocation key_ranges = 2; // The inputs to the computation. repeated StreamLocation inputs = 3; // The outputs from the computation. repeated StreamLocation outputs = 4; // The state family values. repeated StateFamilyConfig state_families = 7; } // Location information for a specific key-range of a sharded computation. // Currently we only support UTF-8 character splits to simplify encoding into // JSON. message KeyRangeLocation { // The start (inclusive) of the key range. string start = 1; // The end (exclusive) of the key range. string end = 2; // The physical location of this range assignment to be used for // streaming computation cross-worker message delivery. string delivery_endpoint = 3; // The name of the data disk where data for this range is stored. // This name is local to the Google Cloud Platform project and uniquely // identifies the disk within that project, for example // "myproject-1014-104817-4c2-harness-0-disk-1". string data_disk = 5; // DEPRECATED. The location of the persistent state for this range, as a // persistent directory in the worker local filesystem. string deprecated_persistent_directory = 4 [deprecated = true]; } // Describes mounted data disk. message MountedDataDisk { // The name of the data disk. // This name is local to the Google Cloud Platform project and uniquely // identifies the disk within that project, for example // "myproject-1014-104817-4c2-harness-0-disk-1". string data_disk = 1; } // Data disk assignment for a given VM instance. message DataDiskAssignment { // VM instance name the data disks mounted to, for example // "myproject-1014-104817-4c2-harness-0". string vm_instance = 1; // Mounted data disks. The order is important a data disk's 0-based index in // this list defines which persistent directory the disk is mounted to, for // example the list of { "myproject-1014-104817-4c2-harness-0-disk-0" }, // { "myproject-1014-104817-4c2-harness-0-disk-1" }. repeated string data_disks = 2; } // Data disk assignment information for a specific key-range of a sharded // computation. // Currently we only support UTF-8 character splits to simplify encoding into // JSON. message KeyRangeDataDiskAssignment { // The start (inclusive) of the key range. string start = 1; // The end (exclusive) of the key range. string end = 2; // The name of the data disk where data for this range is stored. // This name is local to the Google Cloud Platform project and uniquely // identifies the disk within that project, for example // "myproject-1014-104817-4c2-harness-0-disk-1". string data_disk = 3; } // Describes full or partial data disk assignment information of the computation // ranges. message StreamingComputationRanges { // The ID of the computation. string computation_id = 1; // Data disk assignments for ranges from this computation. repeated KeyRangeDataDiskAssignment range_assignments = 2; } // Streaming appliance snapshot configuration. message StreamingApplianceSnapshotConfig { // If set, indicates the snapshot id for the snapshot being performed. string snapshot_id = 1; // Indicates which endpoint is used to import appliance state. string import_state_endpoint = 2; }