// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.genomics.v1; import "google/api/annotations.proto"; import "google/protobuf/empty.proto"; import "google/protobuf/field_mask.proto"; import "google/protobuf/struct.proto"; import "google/protobuf/wrappers.proto"; import "google/rpc/status.proto"; option cc_enable_arenas = true; option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics"; option java_multiple_files = true; option java_outer_classname = "AnnotationsProto"; option java_package = "com.google.genomics.v1"; // This service provides storage and positional retrieval of genomic // reference annotations, including variant annotations. service AnnotationServiceV1 { // Creates a new annotation set. Caller must have WRITE permission for the // associated dataset. // // The following fields are required: // // * [datasetId][google.genomics.v1.AnnotationSet.dataset_id] // * [referenceSetId][google.genomics.v1.AnnotationSet.reference_set_id] // // All other fields may be optionally specified, unless documented as being // server-generated (for example, the `id` field). rpc CreateAnnotationSet(CreateAnnotationSetRequest) returns (AnnotationSet) { option (google.api.http) = { post: "/v1/annotationsets" body: "annotation_set" }; } // Gets an annotation set. Caller must have READ permission for // the associated dataset. rpc GetAnnotationSet(GetAnnotationSetRequest) returns (AnnotationSet) { option (google.api.http) = { get: "/v1/annotationsets/{annotation_set_id}" }; } // Updates an annotation set. The update must respect all mutability // restrictions and other invariants described on the annotation set resource. // Caller must have WRITE permission for the associated dataset. rpc UpdateAnnotationSet(UpdateAnnotationSetRequest) returns (AnnotationSet) { option (google.api.http) = { put: "/v1/annotationsets/{annotation_set_id}" body: "annotation_set" }; } // Deletes an annotation set. Caller must have WRITE permission // for the associated annotation set. rpc DeleteAnnotationSet(DeleteAnnotationSetRequest) returns (google.protobuf.Empty) { option (google.api.http) = { delete: "/v1/annotationsets/{annotation_set_id}" }; } // Searches for annotation sets that match the given criteria. Annotation sets // are returned in an unspecified order. This order is consistent, such that // two queries for the same content (regardless of page size) yield annotation // sets in the same order across their respective streams of paginated // responses. Caller must have READ permission for the queried datasets. rpc SearchAnnotationSets(SearchAnnotationSetsRequest) returns (SearchAnnotationSetsResponse) { option (google.api.http) = { post: "/v1/annotationsets/search" body: "*" }; } // Creates a new annotation. Caller must have WRITE permission // for the associated annotation set. // // The following fields are required: // // * [annotationSetId][google.genomics.v1.Annotation.annotation_set_id] // * [referenceName][google.genomics.v1.Annotation.reference_name] or // [referenceId][google.genomics.v1.Annotation.reference_id] // // ### Transcripts // // For annotations of type TRANSCRIPT, the following fields of // [transcript][google.genomics.v1.Annotation.transcript] must be provided: // // * [exons.start][google.genomics.v1.Transcript.Exon.start] // * [exons.end][google.genomics.v1.Transcript.Exon.end] // // All other fields may be optionally specified, unless documented as being // server-generated (for example, the `id` field). The annotated // range must be no longer than 100Mbp (mega base pairs). See the // [Annotation resource][google.genomics.v1.Annotation] // for additional restrictions on each field. rpc CreateAnnotation(CreateAnnotationRequest) returns (Annotation) { option (google.api.http) = { post: "/v1/annotations" body: "annotation" }; } // Creates one or more new annotations atomically. All annotations must // belong to the same annotation set. Caller must have WRITE // permission for this annotation set. For optimal performance, batch // positionally adjacent annotations together. // // If the request has a systemic issue, such as an attempt to write to // an inaccessible annotation set, the entire RPC will fail accordingly. For // lesser data issues, when possible an error will be isolated to the // corresponding batch entry in the response; the remaining well formed // annotations will be created normally. // // For details on the requirements for each individual annotation resource, // see // [CreateAnnotation][google.genomics.v1.AnnotationServiceV1.CreateAnnotation]. rpc BatchCreateAnnotations(BatchCreateAnnotationsRequest) returns (BatchCreateAnnotationsResponse) { option (google.api.http) = { post: "/v1/annotations:batchCreate" body: "*" }; } // Gets an annotation. Caller must have READ permission // for the associated annotation set. rpc GetAnnotation(GetAnnotationRequest) returns (Annotation) { option (google.api.http) = { get: "/v1/annotations/{annotation_id}" }; } // Updates an annotation. Caller must have // WRITE permission for the associated dataset. rpc UpdateAnnotation(UpdateAnnotationRequest) returns (Annotation) { option (google.api.http) = { put: "/v1/annotations/{annotation_id}" body: "annotation" }; } // Deletes an annotation. Caller must have WRITE permission for // the associated annotation set. rpc DeleteAnnotation(DeleteAnnotationRequest) returns (google.protobuf.Empty) { option (google.api.http) = { delete: "/v1/annotations/{annotation_id}" }; } // Searches for annotations that match the given criteria. Results are // ordered by genomic coordinate (by reference sequence, then position). // Annotations with equivalent genomic coordinates are returned in an // unspecified order. This order is consistent, such that two queries for the // same content (regardless of page size) yield annotations in the same order // across their respective streams of paginated responses. Caller must have // READ permission for the queried annotation sets. rpc SearchAnnotations(SearchAnnotationsRequest) returns (SearchAnnotationsResponse) { option (google.api.http) = { post: "/v1/annotations/search" body: "*" }; } } // An annotation set is a logical grouping of annotations that share consistent // type information and provenance. Examples of annotation sets include 'all // genes from refseq', and 'all variant annotations from ClinVar'. message AnnotationSet { // The server-generated annotation set ID, unique across all annotation sets. string id = 1; // The dataset to which this annotation set belongs. string dataset_id = 2; // The ID of the reference set that defines the coordinate space for this // set's annotations. string reference_set_id = 3; // The display name for this annotation set. string name = 4; // The source URI describing the file from which this annotation set was // generated, if any. string source_uri = 5; // The type of annotations contained within this set. AnnotationType type = 6; // A map of additional read alignment information. This must be of the form // map (string key mapping to a list of string values). map info = 17; } // An annotation describes a region of reference genome. The value of an // annotation may be one of several canonical types, supplemented by arbitrary // info tags. An annotation is not inherently associated with a specific // sample or individual (though a client could choose to use annotations in // this way). Example canonical annotation types are `GENE` and // `VARIANT`. message Annotation { // The server-generated annotation ID, unique across all annotations. string id = 1; // The annotation set to which this annotation belongs. string annotation_set_id = 2; // The display name of this annotation. string name = 3; // The ID of the Google Genomics reference associated with this range. string reference_id = 4; // The display name corresponding to the reference specified by // `referenceId`, for example `chr1`, `1`, or `chrX`. string reference_name = 5; // The start position of the range on the reference, 0-based inclusive. int64 start = 6; // The end position of the range on the reference, 0-based exclusive. int64 end = 7; // Whether this range refers to the reverse strand, as opposed to the forward // strand. Note that regardless of this field, the start/end position of the // range always refer to the forward strand. bool reverse_strand = 8; // The data type for this annotation. Must match the containing annotation // set's type. AnnotationType type = 9; oneof value { // A variant annotation, which describes the effect of a variant on the // genome, the coding sequence, and/or higher level consequences at the // organism level e.g. pathogenicity. This field is only set for annotations // of type `VARIANT`. VariantAnnotation variant = 10; // A transcript value represents the assertion that a particular region of // the reference genome may be transcribed as RNA. An alternative splicing // pattern would be represented as a separate transcript object. This field // is only set for annotations of type `TRANSCRIPT`. Transcript transcript = 11; } // A map of additional read alignment information. This must be of the form // map (string key mapping to a list of string values). map info = 12; } message VariantAnnotation { message ClinicalCondition { // A set of names for the condition. repeated string names = 1; // The set of external IDs for this condition. repeated ExternalId external_ids = 2; // The MedGen concept id associated with this gene. // Search for these IDs at http://www.ncbi.nlm.nih.gov/medgen/ string concept_id = 3; // The OMIM id for this condition. // Search for these IDs at http://omim.org/ string omim_id = 4; } enum Type { TYPE_UNSPECIFIED = 0; // `TYPE_OTHER` should be used when no other Type will suffice. // Further explanation of the variant type may be included in the // [info][google.genomics.v1.Annotation.info] field. TYPE_OTHER = 1; // `INSERTION` indicates an insertion. INSERTION = 2; // `DELETION` indicates a deletion. DELETION = 3; // `SUBSTITUTION` indicates a block substitution of // two or more nucleotides. SUBSTITUTION = 4; // `SNP` indicates a single nucleotide polymorphism. SNP = 5; // `STRUCTURAL` indicates a large structural variant, // including chromosomal fusions, inversions, etc. STRUCTURAL = 6; // `CNV` indicates a variation in copy number. CNV = 7; } enum Effect { EFFECT_UNSPECIFIED = 0; // `EFFECT_OTHER` should be used when no other Effect // will suffice. EFFECT_OTHER = 1; // `FRAMESHIFT` indicates a mutation in which the insertion or // deletion of nucleotides resulted in a frameshift change. FRAMESHIFT = 2; // `FRAME_PRESERVING_INDEL` indicates a mutation in which a // multiple of three nucleotides has been inserted or deleted, resulting // in no change to the reading frame of the coding sequence. FRAME_PRESERVING_INDEL = 3; // `SYNONYMOUS_SNP` indicates a single nucleotide polymorphism // mutation that results in no amino acid change. SYNONYMOUS_SNP = 4; // `NONSYNONYMOUS_SNP` indicates a single nucleotide // polymorphism mutation that results in an amino acid change. NONSYNONYMOUS_SNP = 5; // `STOP_GAIN` indicates a mutation that leads to the creation // of a stop codon at the variant site. Frameshift mutations creating // downstream stop codons do not count as `STOP_GAIN`. STOP_GAIN = 6; // `STOP_LOSS` indicates a mutation that eliminates a // stop codon at the variant site. STOP_LOSS = 7; // `SPLICE_SITE_DISRUPTION` indicates that this variant is // found in a splice site for the associated transcript, and alters the // normal splicing pattern. SPLICE_SITE_DISRUPTION = 8; } enum ClinicalSignificance { CLINICAL_SIGNIFICANCE_UNSPECIFIED = 0; // `OTHER` should be used when no other clinical significance // value will suffice. CLINICAL_SIGNIFICANCE_OTHER = 1; UNCERTAIN = 2; BENIGN = 3; LIKELY_BENIGN = 4; LIKELY_PATHOGENIC = 5; PATHOGENIC = 6; DRUG_RESPONSE = 7; HISTOCOMPATIBILITY = 8; CONFERS_SENSITIVITY = 9; RISK_FACTOR = 10; ASSOCIATION = 11; PROTECTIVE = 12; // `MULTIPLE_REPORTED` should be used when multiple clinical // signficances are reported for a variant. The original clinical // significance values may be provided in the `info` field. MULTIPLE_REPORTED = 13; } // Type has been adapted from ClinVar's list of variant types. Type type = 1; // Effect of the variant on the coding sequence. Effect effect = 2; // The alternate allele for this variant. If multiple alternate alleles // exist at this location, create a separate variant for each one, as they // may represent distinct conditions. string alternate_bases = 3; // Google annotation ID of the gene affected by this variant. This should // be provided when the variant is created. string gene_id = 4; // Google annotation IDs of the transcripts affected by this variant. These // should be provided when the variant is created. repeated string transcript_ids = 5; // The set of conditions associated with this variant. // A condition describes the way a variant influences human health. repeated ClinicalCondition conditions = 6; // Describes the clinical significance of a variant. // It is adapted from the ClinVar controlled vocabulary for clinical // significance described at: // http://www.ncbi.nlm.nih.gov/clinvar/docs/clinsig/ ClinicalSignificance clinical_significance = 7; } // A transcript represents the assertion that a particular region of the // reference genome may be transcribed as RNA. message Transcript { message Exon { // The start position of the exon on this annotation's reference sequence, // 0-based inclusive. Note that this is relative to the reference start, and // **not** the containing annotation start. int64 start = 1; // The end position of the exon on this annotation's reference sequence, // 0-based exclusive. Note that this is relative to the reference start, and // *not* the containing annotation start. int64 end = 2; // The frame of this exon. Contains a value of 0, 1, or 2, which indicates // the offset of the first coding base of the exon within the reading frame // of the coding DNA sequence, if any. This field is dependent on the // strandedness of this annotation (see // [Annotation.reverse_strand][google.genomics.v1.Annotation.reverse_strand]). // For forward stranded annotations, this offset is relative to the // [exon.start][google.genomics.v1.Transcript.Exon.start]. For reverse // strand annotations, this offset is relative to the // [exon.end][google.genomics.v1.Transcript.Exon.end] `- 1`. // // Unset if this exon does not intersect the coding sequence. Upon creation // of a transcript, the frame must be populated for all or none of the // coding exons. google.protobuf.Int32Value frame = 3; } message CodingSequence { // The start of the coding sequence on this annotation's reference sequence, // 0-based inclusive. Note that this position is relative to the reference // start, and *not* the containing annotation start. int64 start = 1; // The end of the coding sequence on this annotation's reference sequence, // 0-based exclusive. Note that this position is relative to the reference // start, and *not* the containing annotation start. int64 end = 2; } // The annotation ID of the gene from which this transcript is transcribed. string gene_id = 1; // The exons that compose // this transcript. This field should be unset for genomes where transcript // splicing does not occur, for example prokaryotes. // // Introns are regions of the transcript that are not included in the // spliced RNA product. Though not explicitly modeled here, intron ranges can // be deduced; all regions of this transcript that are not exons are introns. // // Exonic sequences do not necessarily code for a translational product // (amino acids). Only the regions of exons bounded by the // [codingSequence][google.genomics.v1.Transcript.coding_sequence] correspond // to coding DNA sequence. // // Exons are ordered by start position and may not overlap. repeated Exon exons = 2; // The range of the coding sequence for this transcript, if any. To determine // the exact ranges of coding sequence, intersect this range with those of the // [exons][google.genomics.v1.Transcript.exons], if any. If there are any // [exons][google.genomics.v1.Transcript.exons], the // [codingSequence][google.genomics.v1.Transcript.coding_sequence] must start // and end within them. // // Note that in some cases, the reference genome will not exactly match the // observed mRNA transcript e.g. due to variance in the source genome from // reference. In these cases, // [exon.frame][google.genomics.v1.Transcript.Exon.frame] will not necessarily // match the expected reference reading frame and coding exon reference bases // cannot necessarily be concatenated to produce the original transcript mRNA. CodingSequence coding_sequence = 3; } message ExternalId { // The name of the source of this data. string source_name = 1; // The id used by the source of this data. string id = 2; } message CreateAnnotationSetRequest { // The annotation set to create. AnnotationSet annotation_set = 1; } message GetAnnotationSetRequest { // The ID of the annotation set to be retrieved. string annotation_set_id = 1; } message UpdateAnnotationSetRequest { // The ID of the annotation set to be updated. string annotation_set_id = 1; // The new annotation set. AnnotationSet annotation_set = 2; // An optional mask specifying which fields to update. Mutable fields are // [name][google.genomics.v1.AnnotationSet.name], // [source_uri][google.genomics.v1.AnnotationSet.source_uri], and // [info][google.genomics.v1.AnnotationSet.info]. If unspecified, all // mutable fields will be updated. google.protobuf.FieldMask update_mask = 3; } message DeleteAnnotationSetRequest { // The ID of the annotation set to be deleted. string annotation_set_id = 1; } message SearchAnnotationSetsRequest { // Required. The dataset IDs to search within. Caller must have `READ` access // to these datasets. repeated string dataset_ids = 1; // If specified, only annotation sets associated with the given reference set // are returned. string reference_set_id = 2; // Only return annotations sets for which a substring of the name matches this // string (case insensitive). string name = 3; // If specified, only annotation sets that have any of these types are // returned. repeated AnnotationType types = 4; // The continuation token, which is used to page through large result sets. // To get the next page of results, set this parameter to the value of // `nextPageToken` from the previous response. string page_token = 5; // The maximum number of results to return in a single page. If unspecified, // defaults to 128. The maximum value is 1024. int32 page_size = 6; } message SearchAnnotationSetsResponse { // The matching annotation sets. repeated AnnotationSet annotation_sets = 1; // The continuation token, which is used to page through large result sets. // Provide this value in a subsequent request to return the next page of // results. This field will be empty if there aren't any additional results. string next_page_token = 2; } message CreateAnnotationRequest { // The annotation to be created. Annotation annotation = 1; } message BatchCreateAnnotationsRequest { // The annotations to be created. At most 4096 can be specified in a single // request. repeated Annotation annotations = 1; // A unique request ID which enables the server to detect duplicated requests. // If provided, duplicated requests will result in the same response; if not // provided, duplicated requests may result in duplicated data. For a given // annotation set, callers should not reuse `request_id`s when writing // different batches of annotations - behavior in this case is undefined. // A common approach is to use a UUID. For batch jobs where worker crashes are // a possibility, consider using some unique variant of a worker or run ID. string request_id = 2; } message BatchCreateAnnotationsResponse { message Entry { // The creation status. google.rpc.Status status = 1; // The created annotation, if creation was successful. Annotation annotation = 2; } // The resulting per-annotation entries, ordered consistently with the // original request. repeated Entry entries = 1; } message GetAnnotationRequest { // The ID of the annotation to be retrieved. string annotation_id = 1; } message UpdateAnnotationRequest { // The ID of the annotation to be updated. string annotation_id = 1; // The new annotation. Annotation annotation = 2; // An optional mask specifying which fields to update. Mutable fields are // [name][google.genomics.v1.Annotation.name], // [variant][google.genomics.v1.Annotation.variant], // [transcript][google.genomics.v1.Annotation.transcript], and // [info][google.genomics.v1.Annotation.info]. If unspecified, all mutable // fields will be updated. google.protobuf.FieldMask update_mask = 3; } message DeleteAnnotationRequest { // The ID of the annotation to be deleted. string annotation_id = 1; } message SearchAnnotationsRequest { // Required. The annotation sets to search within. The caller must have // `READ` access to these annotation sets. // All queried annotation sets must have the same type. repeated string annotation_set_ids = 1; // Required. `reference_id` or `reference_name` must be set. oneof reference { // The ID of the reference to query. string reference_id = 2; // The name of the reference to query, within the reference set associated // with this query. string reference_name = 3; } // The start position of the range on the reference, 0-based inclusive. If // specified, // [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or // [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name] // must be specified. Defaults to 0. int64 start = 4; // The end position of the range on the reference, 0-based exclusive. If // [referenceId][google.genomics.v1.SearchAnnotationsRequest.reference_id] or // [referenceName][google.genomics.v1.SearchAnnotationsRequest.reference_name] // must be specified, Defaults to the length of the reference. int64 end = 5; // The continuation token, which is used to page through large result sets. // To get the next page of results, set this parameter to the value of // `nextPageToken` from the previous response. string page_token = 6; // The maximum number of results to return in a single page. If unspecified, // defaults to 256. The maximum value is 2048. int32 page_size = 7; } message SearchAnnotationsResponse { // The matching annotations. repeated Annotation annotations = 1; // The continuation token, which is used to page through large result sets. // Provide this value in a subsequent request to return the next page of // results. This field will be empty if there aren't any additional results. string next_page_token = 2; } // When an [Annotation][google.genomics.v1.Annotation] or // [AnnotationSet][google.genomics.v1.AnnotationSet] is created, if `type` is // not specified it will be set to `GENERIC`. enum AnnotationType { ANNOTATION_TYPE_UNSPECIFIED = 0; // A `GENERIC` annotation type should be used when no other annotation // type will suffice. This represents an untyped annotation of the reference // genome. GENERIC = 1; // A `VARIANT` annotation type. VARIANT = 2; // A `GENE` annotation type represents the existence of a gene at the // associated reference coordinates. The start coordinate is typically the // gene's transcription start site and the end is typically the end of the // gene's last exon. GENE = 3; // A `TRANSCRIPT` annotation type represents the assertion that a // particular region of the reference genome may be transcribed as RNA. TRANSCRIPT = 4; }