/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ syntax = 'proto3'; import "google/protobuf/any.proto"; import "spark/connect/types.proto"; package spark.connect; option java_multiple_files = true; option java_package = "org.apache.spark.connect.proto"; option go_package = "internal/generated"; // Expression used to refer to fields, functions and similar. This can be used everywhere // expressions in SQL appear. message Expression { oneof expr_type { Literal literal = 1; UnresolvedAttribute unresolved_attribute = 2; UnresolvedFunction unresolved_function = 3; ExpressionString expression_string = 4; UnresolvedStar unresolved_star = 5; Alias alias = 6; Cast cast = 7; UnresolvedRegex unresolved_regex = 8; SortOrder sort_order = 9; LambdaFunction lambda_function = 10; Window window = 11; UnresolvedExtractValue unresolved_extract_value = 12; UpdateFields update_fields = 13; UnresolvedNamedLambdaVariable unresolved_named_lambda_variable = 14; CommonInlineUserDefinedFunction common_inline_user_defined_function = 15; CallFunction call_function = 16; // This field is used to mark extensions to the protocol. When plugins generate arbitrary // relations they can add them here. During the planning the correct resolution is done. google.protobuf.Any extension = 999; } // Expression for the OVER clause or WINDOW clause. message Window { // (Required) The window function. Expression window_function = 1; // (Optional) The way that input rows are partitioned. repeated Expression partition_spec = 2; // (Optional) Ordering of rows in a partition. repeated SortOrder order_spec = 3; // (Optional) Window frame in a partition. // // If not set, it will be treated as 'UnspecifiedFrame'. WindowFrame frame_spec = 4; // The window frame message WindowFrame { // (Required) The type of the frame. FrameType frame_type = 1; // (Required) The lower bound of the frame. FrameBoundary lower = 2; // (Required) The upper bound of the frame. FrameBoundary upper = 3; enum FrameType { FRAME_TYPE_UNDEFINED = 0; // RowFrame treats rows in a partition individually. FRAME_TYPE_ROW = 1; // RangeFrame treats rows in a partition as groups of peers. // All rows having the same 'ORDER BY' ordering are considered as peers. FRAME_TYPE_RANGE = 2; } message FrameBoundary { oneof boundary { // CURRENT ROW boundary bool current_row = 1; // UNBOUNDED boundary. // For lower bound, it will be converted to 'UnboundedPreceding'. // for upper bound, it will be converted to 'UnboundedFollowing'. bool unbounded = 2; // This is an expression for future proofing. We are expecting literals on the server side. Expression value = 3; } } } } // SortOrder is used to specify the data ordering, it is normally used in Sort and Window. // It is an unevaluable expression and cannot be evaluated, so can not be used in Projection. message SortOrder { // (Required) The expression to be sorted. Expression child = 1; // (Required) The sort direction, should be ASCENDING or DESCENDING. SortDirection direction = 2; // (Required) How to deal with NULLs, should be NULLS_FIRST or NULLS_LAST. NullOrdering null_ordering = 3; enum SortDirection { SORT_DIRECTION_UNSPECIFIED = 0; SORT_DIRECTION_ASCENDING = 1; SORT_DIRECTION_DESCENDING = 2; } enum NullOrdering { SORT_NULLS_UNSPECIFIED = 0; SORT_NULLS_FIRST = 1; SORT_NULLS_LAST = 2; } } message Cast { // (Required) the expression to be casted. Expression expr = 1; // (Required) the data type that the expr to be casted to. oneof cast_to_type { DataType type = 2; // If this is set, Server will use Catalyst parser to parse this string to DataType. string type_str = 3; } } message Literal { oneof literal_type { DataType null = 1; bytes binary = 2; bool boolean = 3; int32 byte = 4; int32 short = 5; int32 integer = 6; int64 long = 7; float float = 10; double double = 11; Decimal decimal = 12; string string = 13; // Date in units of days since the UNIX epoch. int32 date = 16; // Timestamp in units of microseconds since the UNIX epoch. int64 timestamp = 17; // Timestamp in units of microseconds since the UNIX epoch (without timezone information). int64 timestamp_ntz = 18; CalendarInterval calendar_interval = 19; int32 year_month_interval = 20; int64 day_time_interval = 21; Array array = 22; Map map = 23; Struct struct = 24; } message Decimal { // the string representation. string value = 1; // The maximum number of digits allowed in the value. // the maximum precision is 38. optional int32 precision = 2; // declared scale of decimal literal optional int32 scale = 3; } message CalendarInterval { int32 months = 1; int32 days = 2; int64 microseconds = 3; } message Array { DataType element_type = 1; repeated Literal elements = 2; } message Map { DataType key_type = 1; DataType value_type = 2; repeated Literal keys = 3; repeated Literal values = 4; } message Struct { DataType struct_type = 1; repeated Literal elements = 2; } } // An unresolved attribute that is not explicitly bound to a specific column, but the column // is resolved during analysis by name. message UnresolvedAttribute { // (Required) An identifier that will be parsed by Catalyst parser. This should follow the // Spark SQL identifier syntax. string unparsed_identifier = 1; // (Optional) The id of corresponding connect plan. optional int64 plan_id = 2; } // An unresolved function is not explicitly bound to one explicit function, but the function // is resolved during analysis following Sparks name resolution rules. message UnresolvedFunction { // (Required) name (or unparsed name for user defined function) for the unresolved function. string function_name = 1; // (Optional) Function arguments. Empty arguments are allowed. repeated Expression arguments = 2; // (Required) Indicate if this function should be applied on distinct values. bool is_distinct = 3; // (Required) Indicate if this is a user defined function. // // When it is not a user defined function, Connect will use the function name directly. // When it is a user defined function, Connect will parse the function name first. bool is_user_defined_function = 4; } // Expression as string. message ExpressionString { // (Required) A SQL expression that will be parsed by Catalyst parser. string expression = 1; } // UnresolvedStar is used to expand all the fields of a relation or struct. message UnresolvedStar { // (Optional) The target of the expansion. // // If set, it should end with '.*' and will be parsed by 'parseAttributeName' // in the server side. optional string unparsed_target = 1; } // Represents all of the input attributes to a given relational operator, for example in // "SELECT `(id)?+.+` FROM ...". message UnresolvedRegex { // (Required) The column name used to extract column with regex. string col_name = 1; // (Optional) The id of corresponding connect plan. optional int64 plan_id = 2; } // Extracts a value or values from an Expression message UnresolvedExtractValue { // (Required) The expression to extract value from, can be // Map, Array, Struct or array of Structs. Expression child = 1; // (Required) The expression to describe the extraction, can be // key of Map, index of Array, field name of Struct. Expression extraction = 2; } // Add, replace or drop a field of `StructType` expression by name. message UpdateFields { // (Required) The struct expression. Expression struct_expression = 1; // (Required) The field name. string field_name = 2; // (Optional) The expression to add or replace. // // When not set, it means this field will be dropped. Expression value_expression = 3; } message Alias { // (Required) The expression that alias will be added on. Expression expr = 1; // (Required) a list of name parts for the alias. // // Scalar columns only has one name that presents. repeated string name = 2; // (Optional) Alias metadata expressed as a JSON map. optional string metadata = 3; } message LambdaFunction { // (Required) The lambda function. // // The function body should use 'UnresolvedAttribute' as arguments, the sever side will // replace 'UnresolvedAttribute' with 'UnresolvedNamedLambdaVariable'. Expression function = 1; // (Required) Function variables. Must contains 1 ~ 3 variables. repeated Expression.UnresolvedNamedLambdaVariable arguments = 2; } message UnresolvedNamedLambdaVariable { // (Required) a list of name parts for the variable. Must not be empty. repeated string name_parts = 1; } } message CommonInlineUserDefinedFunction { // (Required) Name of the user-defined function. string function_name = 1; // (Optional) Indicate if the user-defined function is deterministic. bool deterministic = 2; // (Optional) Function arguments. Empty arguments are allowed. repeated Expression arguments = 3; // (Required) Indicate the function type of the user-defined function. oneof function { PythonUDF python_udf = 4; ScalarScalaUDF scalar_scala_udf = 5; JavaUDF java_udf = 6; } } message PythonUDF { // (Required) Output type of the Python UDF DataType output_type = 1; // (Required) EvalType of the Python UDF int32 eval_type = 2; // (Required) The encoded commands of the Python UDF bytes command = 3; // (Required) Python version being used in the client. string python_ver = 4; } message ScalarScalaUDF { // (Required) Serialized JVM object containing UDF definition, input encoders and output encoder bytes payload = 1; // (Optional) Input type(s) of the UDF repeated DataType inputTypes = 2; // (Required) Output type of the UDF DataType outputType = 3; // (Required) True if the UDF can return null value bool nullable = 4; } message JavaUDF { // (Required) Fully qualified name of Java class string class_name = 1; // (Optional) Output type of the Java UDF optional DataType output_type = 2; // (Required) Indicate if the Java user-defined function is an aggregate function bool aggregate = 3; } message CallFunction { // (Required) Unparsed name of the SQL function. string function_name = 1; // (Optional) Function arguments. Empty arguments are allowed. repeated Expression arguments = 2; }