// Copyright 2023 Ant Group Co., Ltd. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // syntax = "proto3"; package secretflowapis.v1.sdc.teeapps.params; option java_package = "com.alipay.secretflow.secretflowapis.v1.sdc.teeapps.params"; option java_outer_classname = "TableStatisticsProto"; // TaskReport.body for OPERATOR_TABLE_STATISTICS message TableStatisticsReport { message SummaryReport { int32 total_count = 1; // 总记录数 int32 valid_count = 2; // 非空记录数 int32 null_count = 3; // 空记录数 } message ContinuousReport { double mean = 1; // 均值 double max = 2; // 最大值 double min = 3; // 最小值 double q1 = 4; // 1/4分位数 double q2 = 5; // 中位数 double q3 = 6; // 3/4分位数 double variance = 7; // 方差 double standard_deviation = 8; // 标准差 double standard_error = 9; // 标准误差 repeated double hist = 10; // 数值分布图 } message DiscreteReport { int32 distinct = 1; // 取值个数 } message Bin { // 统计特征名称 string field_name = 1; // 统计特征类型 string field_type = 2; // 是否返回当前列的统计值 // 如果: 最小有效样本数 < 100 或 该列(标准差 / (max_0.975 - min_0.025)) < // 0.05 则可能通过统计值反推出原始数据,is_success为false拒绝返回统计值。 bool is_success = 3; // is_success == false的原因 string message = 4; // 数量统计 SummaryReport summary = 5; // 数值类型字段的详细统计信息 ContinuousReport continuous_report = 6; // 字符类型字段的详细统计信息 DiscreteReport discrete_report = 7; } // 数值类型字段 repeated Bin continuous_bins = 1; // 字符类型字段 repeated Bin discrete_bins = 2; }