// Copyright 2024 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. syntax = "proto3"; package google.genomics.v1; import "google/api/annotations.proto"; option cc_enable_arenas = true; option go_package = "google.golang.org/genproto/googleapis/genomics/v1;genomics"; option java_multiple_files = true; option java_outer_classname = "CigarProto"; option java_package = "com.google.genomics.v1"; // A single CIGAR operation. message CigarUnit { // Describes the different types of CIGAR alignment operations that exist. // Used wherever CIGAR alignments are used. enum Operation { OPERATION_UNSPECIFIED = 0; // An alignment match indicates that a sequence can be aligned to the // reference without evidence of an INDEL. Unlike the // `SEQUENCE_MATCH` and `SEQUENCE_MISMATCH` operators, // the `ALIGNMENT_MATCH` operator does not indicate whether the // reference and read sequences are an exact match. This operator is // equivalent to SAM's `M`. ALIGNMENT_MATCH = 1; // The insert operator indicates that the read contains evidence of bases // being inserted into the reference. This operator is equivalent to SAM's // `I`. INSERT = 2; // The delete operator indicates that the read contains evidence of bases // being deleted from the reference. This operator is equivalent to SAM's // `D`. DELETE = 3; // The skip operator indicates that this read skips a long segment of the // reference, but the bases have not been deleted. This operator is commonly // used when working with RNA-seq data, where reads may skip long segments // of the reference between exons. This operator is equivalent to SAM's // `N`. SKIP = 4; // The soft clip operator indicates that bases at the start/end of a read // have not been considered during alignment. This may occur if the majority // of a read maps, except for low quality bases at the start/end of a read. // This operator is equivalent to SAM's `S`. Bases that are soft // clipped will still be stored in the read. CLIP_SOFT = 5; // The hard clip operator indicates that bases at the start/end of a read // have been omitted from this alignment. This may occur if this linear // alignment is part of a chimeric alignment, or if the read has been // trimmed (for example, during error correction or to trim poly-A tails for // RNA-seq). This operator is equivalent to SAM's `H`. CLIP_HARD = 6; // The pad operator indicates that there is padding in an alignment. This // operator is equivalent to SAM's `P`. PAD = 7; // This operator indicates that this portion of the aligned sequence exactly // matches the reference. This operator is equivalent to SAM's `=`. SEQUENCE_MATCH = 8; // This operator indicates that this portion of the aligned sequence is an // alignment match to the reference, but a sequence mismatch. This can // indicate a SNP or a read error. This operator is equivalent to SAM's // `X`. SEQUENCE_MISMATCH = 9; } Operation operation = 1; // The number of genomic bases that the operation runs for. Required. int64 operation_length = 2; // `referenceSequence` is only used at mismatches // (`SEQUENCE_MISMATCH`) and deletions (`DELETE`). // Filling this field replaces SAM's MD tag. If the relevant information is // not available, this field is unset. string reference_sequence = 3; }