// Copyright 2017 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.cloud.ml.v1; import "google/api/annotations.proto"; import "google/api/auth.proto"; import "google/protobuf/empty.proto"; import "google/protobuf/timestamp.proto"; option go_package = "google.golang.org/genproto/googleapis/cloud/ml/v1;ml"; option java_multiple_files = true; option java_outer_classname = "JobServiceProto"; option java_package = "com.google.cloud.ml.api.v1"; // Copyright 2017 Google Inc. All Rights Reserved. // // Proto file for the Google Cloud Machine Learning Engine. // Describes the 'job service' to manage training and prediction jobs. // Service to create and manage training and batch prediction jobs. service JobService { // Creates a training or a batch prediction job. rpc CreateJob(CreateJobRequest) returns (Job) { option (google.api.http) = { post: "/v1/{parent=projects/*}/jobs" body: "job" }; } // Lists the jobs in the project. rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) { option (google.api.http) = { get: "/v1/{parent=projects/*}/jobs" }; } // Describes a job. rpc GetJob(GetJobRequest) returns (Job) { option (google.api.http) = { get: "/v1/{name=projects/*/jobs/*}" }; } // Cancels a running job. rpc CancelJob(CancelJobRequest) returns (google.protobuf.Empty) { option (google.api.http) = { post: "/v1/{name=projects/*/jobs/*}:cancel" body: "*" }; } } // Represents input parameters for a training job. message TrainingInput { // A scale tier is an abstract representation of the resources Cloud ML // will allocate to a training job. When selecting a scale tier for your // training job, you should consider the size of your training dataset and // the complexity of your model. As the tiers increase, virtual machines are // added to handle your job, and the individual machines in the cluster // generally have more memory and greater processing power than they do at // lower tiers. The number of training units charged per hour of processing // increases as tiers get more advanced. Refer to the // [pricing guide](/ml/pricing) for more details. Note that in addition to // incurring costs, your use of training resources is constrained by the // [quota policy](/ml/quota). enum ScaleTier { // A single worker instance. This tier is suitable for learning how to use // Cloud ML, and for experimenting with new models using small datasets. BASIC = 0; // Many workers and a few parameter servers. STANDARD_1 = 1; // A large number of workers with many parameter servers. PREMIUM_1 = 3; // A single worker instance [with a GPU](ml/docs/how-tos/using-gpus). BASIC_GPU = 6; // The CUSTOM tier is not a set tier, but rather enables you to use your // own cluster specification. When you use this tier, set values to // configure your processing cluster according to these guidelines: // // * You _must_ set `TrainingInput.masterType` to specify the type // of machine to use for your master node. This is the only required // setting. // // * You _may_ set `TrainingInput.workerCount` to specify the number of // workers to use. If you specify one or more workers, you _must_ also // set `TrainingInput.workerType` to specify the type of machine to use // for your worker nodes. // // * You _may_ set `TrainingInput.parameterServerCount` to specify the // number of parameter servers to use. If you specify one or more // parameter servers, you _must_ also set // `TrainingInput.parameterServerType` to specify the type of machine to // use for your parameter servers. // // Note that all of your workers must use the same machine type, which can // be different from your parameter server type and master type. Your // parameter servers must likewise use the same machine type, which can be // different from your worker type and master type. CUSTOM = 5; } // Required. Specifies the machine types, the number of replicas for workers // and parameter servers. ScaleTier scale_tier = 1; // Optional. Specifies the type of virtual machine to use for your training // job's master worker. // // The following types are supported: // //
//
standard
//
// A basic machine configuration suitable for training simple models with // small to moderate datasets. //
//
large_model
//
// A machine with a lot of memory, specially suited for parameter servers // when your model is large (having many hidden layers or layers with very // large numbers of nodes). //
//
complex_model_s
//
// A machine suitable for the master and workers of the cluster when your // model requires more computation than the standard machine can handle // satisfactorily. //
//
complex_model_m
//
// A machine with roughly twice the number of cores and roughly double the // memory of complex_model_s. //
//
complex_model_l
//
// A machine with roughly twice the number of cores and roughly double the // memory of complex_model_m. //
//
standard_gpu
//
// A machine equivalent to standard that // also includes a // // GPU that you can use in your trainer. //
//
complex_model_m_gpu
//
// A machine equivalent to // coplex_model_m that also includes // four GPUs. //
//
// // You must set this value when `scaleTier` is set to `CUSTOM`. string master_type = 2; // Optional. Specifies the type of virtual machine to use for your training // job's worker nodes. // // The supported values are the same as those described in the entry for // `masterType`. // // This value must be present when `scaleTier` is set to `CUSTOM` and // `workerCount` is greater than zero. string worker_type = 3; // Optional. Specifies the type of virtual machine to use for your training // job's parameter server. // // The supported values are the same as those described in the entry for // `master_type`. // // This value must be present when `scaleTier` is set to `CUSTOM` and // `parameter_server_count` is greater than zero. string parameter_server_type = 4; // Optional. The number of worker replicas to use for the training job. Each // replica in the cluster will be of the type specified in `worker_type`. // // This value can only be used when `scale_tier` is set to `CUSTOM`. If you // set this value, you must also set `worker_type`. int64 worker_count = 5; // Optional. The number of parameter server replicas to use for the training // job. Each replica in the cluster will be of the type specified in // `parameter_server_type`. // // This value can only be used when `scale_tier` is set to `CUSTOM`.If you // set this value, you must also set `parameter_server_type`. int64 parameter_server_count = 6; // Required. The Google Cloud Storage location of the packages with // the training program and any additional dependencies. repeated string package_uris = 7; // Required. The Python module name to run after installing the packages. string python_module = 8; // Optional. Command line arguments to pass to the program. repeated string args = 10; // Optional. The set of Hyperparameters to tune. HyperparameterSpec hyperparameters = 12; // Required. The Google Compute Engine region to run the training job in. string region = 14; // Optional. A Google Cloud Storage path in which to store training outputs // and other data needed for training. This path is passed to your TensorFlow // program as the 'job_dir' command-line argument. The benefit of specifying // this field is that Cloud ML validates the path for use in training. string job_dir = 16; // Optional. The Google Cloud ML runtime version to use for training. If not // set, Google Cloud ML will choose the latest stable version. string runtime_version = 15; } // Represents a set of hyperparameters to optimize. message HyperparameterSpec { // The available types of optimization goals. enum GoalType { // Goal Type will default to maximize. GOAL_TYPE_UNSPECIFIED = 0; // Maximize the goal metric. MAXIMIZE = 1; // Minimize the goal metric. MINIMIZE = 2; } // Required. The type of goal to use for tuning. Available types are // `MAXIMIZE` and `MINIMIZE`. // // Defaults to `MAXIMIZE`. GoalType goal = 1; // Required. The set of parameters to tune. repeated ParameterSpec params = 2; // Optional. How many training trials should be attempted to optimize // the specified hyperparameters. // // Defaults to one. int32 max_trials = 3; // Optional. The number of training trials to run concurrently. // You can reduce the time it takes to perform hyperparameter tuning by adding // trials in parallel. However, each trail only benefits from the information // gained in completed trials. That means that a trial does not get access to // the results of trials running at the same time, which could reduce the // quality of the overall optimization. // // Each trial will use the same scale tier and machine types. // // Defaults to one. int32 max_parallel_trials = 4; // Optional. The Tensorflow summary tag name to use for optimizing trials. For // current versions of Tensorflow, this tag name should exactly match what is // shown in Tensorboard, including all scopes. For versions of Tensorflow // prior to 0.12, this should be only the tag passed to tf.Summary. // By default, "training/hptuning/metric" will be used. string hyperparameter_metric_tag = 5; } // Represents a single hyperparameter to optimize. message ParameterSpec { // The type of the parameter. enum ParameterType { // You must specify a valid type. Using this unspecified type will result in // an error. PARAMETER_TYPE_UNSPECIFIED = 0; // Type for real-valued parameters. DOUBLE = 1; // Type for integral parameters. INTEGER = 2; // The parameter is categorical, with a value chosen from the categories // field. CATEGORICAL = 3; // The parameter is real valued, with a fixed set of feasible points. If // `type==DISCRETE`, feasible_points must be provided, and // {`min_value`, `max_value`} will be ignored. DISCRETE = 4; } // The type of scaling that should be applied to this parameter. enum ScaleType { // By default, no scaling is applied. NONE = 0; // Scales the feasible space to (0, 1) linearly. UNIT_LINEAR_SCALE = 1; // Scales the feasible space logarithmically to (0, 1). The entire feasible // space must be strictly positive. UNIT_LOG_SCALE = 2; // Scales the feasible space "reverse" logarithmically to (0, 1). The result // is that values close to the top of the feasible space are spread out more // than points near the bottom. The entire feasible space must be strictly // positive. UNIT_REVERSE_LOG_SCALE = 3; } // Required. The parameter name must be unique amongst all ParameterConfigs in // a HyperparameterSpec message. E.g., "learning_rate". string parameter_name = 1; // Required. The type of the parameter. ParameterType type = 4; // Required if type is `DOUBLE` or `INTEGER`. This field // should be unset if type is `CATEGORICAL`. This value should be integers if // type is INTEGER. double min_value = 2; // Required if typeis `DOUBLE` or `INTEGER`. This field // should be unset if type is `CATEGORICAL`. This value should be integers if // type is `INTEGER`. double max_value = 3; // Required if type is `CATEGORICAL`. The list of possible categories. repeated string categorical_values = 5; // Required if type is `DISCRETE`. // A list of feasible points. // The list should be in strictly increasing order. For instance, this // parameter might have possible settings of 1.5, 2.5, and 4.0. This list // should not contain more than 1,000 values. repeated double discrete_values = 6; // Optional. How the parameter should be scaled to the hypercube. // Leave unset for categorical parameters. // Some kind of scaling is strongly recommended for real or integral // parameters (e.g., `UNIT_LINEAR_SCALE`). ScaleType scale_type = 7; } // Represents the result of a single hyperparameter tuning trial from a // training job. The TrainingOutput object that is returned on successful // completion of a training job with hyperparameter tuning includes a list // of HyperparameterOutput objects, one for each successful trial. message HyperparameterOutput { // An observed value of a metric. message HyperparameterMetric { // The global training step for this metric. int64 training_step = 1; // The objective value at this training step. double objective_value = 2; } // The trial id for these results. string trial_id = 1; // The hyperparameters given to this trial. map hyperparameters = 2; // The final objective metric seen for this trial. HyperparameterMetric final_metric = 3; // All recorded object metrics for this trial. repeated HyperparameterMetric all_metrics = 4; } // Represents results of a training job. Output only. message TrainingOutput { // The number of hyperparameter tuning trials that completed successfully. // Only set for hyperparameter tuning jobs. int64 completed_trial_count = 1; // Results for individual Hyperparameter trials. // Only set for hyperparameter tuning jobs. repeated HyperparameterOutput trials = 2; // The amount of ML units consumed by the job. double consumed_ml_units = 3; // Whether this job is a hyperparameter tuning job. bool is_hyperparameter_tuning_job = 4; } // Represents input parameters for a prediction job. message PredictionInput { // The format used to separate data instances in the source files. enum DataFormat { // Unspecified format. DATA_FORMAT_UNSPECIFIED = 0; // The source file is a text file with instances separated by the // new-line character. TEXT = 1; // The source file is a TFRecord file. TF_RECORD = 2; // The source file is a GZIP-compressed TFRecord file. TF_RECORD_GZIP = 3; } // Required. The model or the version to use for prediction. oneof model_version { // Use this field if you want to use the default version for the specified // model. The string must use the following format: // // `"projects/[YOUR_PROJECT]/models/[YOUR_MODEL]"` string model_name = 1; // Use this field if you want to specify a version of the model to use. The // string is formatted the same way as `model_version`, with the addition // of the version information: // // `"projects/[YOUR_PROJECT]/models/YOUR_MODEL/versions/[YOUR_VERSION]"` string version_name = 2; // Use this field if you want to specify a Google Cloud Storage path for // the model to use. string uri = 9; } // Required. The format of the input data files. DataFormat data_format = 3; // Required. The Google Cloud Storage location of the input data files. // May contain wildcards. repeated string input_paths = 4; // Required. The output Google Cloud Storage location. string output_path = 5; // Optional. The maximum number of workers to be used for parallel processing. // Defaults to 10 if not specified. int64 max_worker_count = 6; // Required. The Google Compute Engine region to run the prediction job in. string region = 7; // Optional. The Google Cloud ML runtime version to use for this batch // prediction. If not set, Google Cloud ML will pick the runtime version used // during the CreateVersion request for this model version, or choose the // latest stable version when model version information is not available // such as when the model is specified by uri. string runtime_version = 8; } // Represents results of a prediction job. message PredictionOutput { // The output Google Cloud Storage location provided at the job creation time. string output_path = 1; // The number of generated predictions. int64 prediction_count = 2; // The number of data instances which resulted in errors. int64 error_count = 3; // Node hours used by the batch prediction job. double node_hours = 4; } // Represents a training or prediction job. message Job { // Describes the job state. enum State { // The job state is unspecified. STATE_UNSPECIFIED = 0; // The job has been just created and processing has not yet begun. QUEUED = 1; // The service is preparing to run the job. PREPARING = 2; // The job is in progress. RUNNING = 3; // The job completed successfully. SUCCEEDED = 4; // The job failed. // `error_message` should contain the details of the failure. FAILED = 5; // The job is being cancelled. // `error_message` should describe the reason for the cancellation. CANCELLING = 6; // The job has been cancelled. // `error_message` should describe the reason for the cancellation. CANCELLED = 7; } // Required. The user-specified id of the job. string job_id = 1; // Required. Parameters to create a job. oneof input { // Input parameters to create a training job. TrainingInput training_input = 2; // Input parameters to create a prediction job. PredictionInput prediction_input = 3; } // Output only. When the job was created. google.protobuf.Timestamp create_time = 4; // Output only. When the job processing was started. google.protobuf.Timestamp start_time = 5; // Output only. When the job processing was completed. google.protobuf.Timestamp end_time = 6; // Output only. The detailed state of a job. State state = 7; // Output only. The details of a failure or a cancellation. string error_message = 8; // Output only. The current result of the job. oneof output { // The current training job result. TrainingOutput training_output = 9; // The current prediction job result. PredictionOutput prediction_output = 10; } } // Request message for the CreateJob method. message CreateJobRequest { // Required. The project name. // // Authorization: requires `Editor` role on the specified project. string parent = 1; // Required. The job to create. Job job = 2; } // Request message for the ListJobs method. message ListJobsRequest { // Required. The name of the project for which to list jobs. // // Authorization: requires `Viewer` role on the specified project. string parent = 1; // Optional. Specifies the subset of jobs to retrieve. string filter = 2; // Optional. A page token to request the next page of results. // // You get the token from the `next_page_token` field of the response from // the previous call. string page_token = 4; // Optional. The number of jobs to retrieve per "page" of results. If there // are more remaining results than this number, the response message will // contain a valid value in the `next_page_token` field. // // The default value is 20, and the maximum page size is 100. int32 page_size = 5; } // Response message for the ListJobs method. message ListJobsResponse { // The list of jobs. repeated Job jobs = 1; // Optional. Pass this token as the `page_token` field of the request for a // subsequent call. string next_page_token = 2; } // Request message for the GetJob method. message GetJobRequest { // Required. The name of the job to get the description of. // // Authorization: requires `Viewer` role on the parent project. string name = 1; } // Request message for the CancelJob method. message CancelJobRequest { // Required. The name of the job to cancel. // // Authorization: requires `Editor` role on the parent project. string name = 1; }