syntax = "proto3"; package annonars.genes.base; // Information from ACMG secondary findings list. message AcmgSecondaryFindingRecord { // The HGNC ID. string hgnc_id = 1; // The Ensembl gene ID. string ensembl_gene_id = 2; // The NCBI gene ID. string ncbi_gene_id = 3; // The HGNC gene symbol. string gene_symbol = 4; // The MIM gene ID. string mim_gene_id = 5; // The disease phenotype. string disease_phenotype = 6; // The disease MIM id. string disorder_mim = 7; // The phenotype category. string phenotype_category = 8; // The mode of inheritance. string inheritance = 9; // The version of the ACMG SF list of first appearance. string sf_list_version = 10; // The variants to report according to ACMG SF. string variants_to_report = 11; } /// Enumeration for Haploinsufficiency / Triplosensitivity scores. enum ClingenDosageScore { // unknown CLINGEN_DOSAGE_SCORE_UNKNOWN = 0; // Sufficient evidence for dosage pathogenicity CLINGEN_DOSAGE_SCORE_SUFFICIENT_EVIDENCE_AVAILABLE = 1; // Some evidence for dosage pathogenicity CLINGEN_DOSAGE_SCORE_SOME_EVIDENCE_AVAILABLE = 2; // Little evidence for dosage pathogenicity CLINGEN_DOSAGE_SCORE_LITTLE_EVIDENCE = 3; // No evidence available CLINGEN_DOSAGE_SCORE_NO_EVIDENCE_AVAILABLE = 4; // Gene associated with autosomal recessive phenotype CLINGEN_DOSAGE_SCORE_RECESSIVE = 5; // Dosage sensitivity unlikely CLINGEN_DOSAGE_SCORE_UNLIKELY = 6; } /// `ClinGen` gene dosage sensitivity record. message ClingenDosageRecord { // Gene symbol. string gene_symbol = 1; // NCBI gene ID. string ncbi_gene_id = 2; // Genomic location GRCh37. string genomic_location_37 = 3; // Genomic location GRCh38. string genomic_location_38 = 4; // Haploinsufficiency score. ClingenDosageScore haploinsufficiency_score = 5; // Triplosensitivity score. ClingenDosageScore triplosensitivity_score = 6; // Haploinsufficiency Disease ID. optional string haploinsufficiency_disease_id = 7; // Haploinsufficiency Disease ID. optional string triplosensitivity_disease_id = 8; } // Decipher HI Predictions message DecipherHiRecord { // HGNC identifier. string hgnc_id = 1; // Official HGNC gene symbol. string hgnc_symbol = 2; // P(HI) prediction from DECIPHER HI. double p_hi = 3; // Percent HI index. double hi_index = 4; } // Information from DOMINO. message DominoRecord { // Gene symbol. string gene_symbol = 1; // The score. double score = 2; } // Code for data from the dbNSFP database. message DbnsfpRecord { // Gene symbol from HGNC. string gene_name = 1; // Ensembl gene id (from HGNC). optional string ensembl_gene = 2; // Chromosome number (from HGNC). optional string chr = 3; // Old gene symbol (from HGNC). repeated string gene_old_names = 4; // Other gene names (from HGNC). repeated string gene_other_names = 5; // Uniprot acc (from HGNC). optional string uniprot_acc = 6; // Uniprot id (from HGNC). optional string uniprot_id = 7; // Uniprot id (from HGNC). optional string entrez_gene_id = 8; // CCDS id (from HGNC). repeated string ccds_id = 9; // Refseq gene id (from HGNC). repeated string refseq_id = 10; // UCSC gene id (from HGNC). optional string ucsc_id = 11; // MIM gene id (from OMIM). repeated string mim_id = 12; // MIM gene id from OMIM. repeated string omim_id = 13; // Gene full name (from HGNC). optional string gene_full_name = 14; // Pathway description from Uniprot. optional string pathway_uniprot = 15; // Short name of the Pathway(s) the gene belongs to (from BioCarta). repeated string pathway_biocarta_short = 16; // Full name(s) of the Pathway(s) the gene belongs to (from BioCarta). repeated string pathway_biocarta_full = 17; // Pathway(s) the gene belongs to (from ConsensusPathDB). repeated string pathway_consensus_path_db = 18; // ID(s) of the Pathway(s) the gene belongs to (from KEGG). repeated string pathway_kegg_id = 19; // Full name(s) of the Pathway(s) the gene belongs to (from KEGG). repeated string pathway_kegg_full = 20; // Function description of the gene (from Uniprot). repeated string function_description = 21; // Disease(s) the gene caused or associated with (from Uniprot). repeated string disease_description = 22; // MIM id(s) of the phenotype the gene caused or associated with (from Uniprot). repeated string mim_phenotype_id = 23; // MIM disease name(s) with MIM id(s) in [] (from Uniprot). repeated string mim_disease = 24; // Orphanet Number of the disorder the gene caused or associated with. repeated string orphanet_disorder_id = 25; // Disorder name from Orphanet. repeated string orphanet_disorder = 26; // The type of association beteen the gene and the disorder in Orphanet. repeated string orphanet_association_type = 27; // Trait(s) the gene associated with (from GWAS catalog). repeated string trait_association_gwas = 28; // ID of the mapped Human Phenotype Ontology. repeated string hpo_id = 29; // Name of the mapped Human Phenotype Ontology. repeated string hpo_name = 30; // GO terms for biological process. repeated string go_biological_process = 31; // GO terms for cellular component. repeated string go_cellular_component = 32; // GO terms for molecular function. repeated string go_molecular_function = 33; // Tissue specificity description from Uniprot. repeated string tissue_specificity_uniprot = 34; // Tissues/organs the gene expressed in (egenetics data from BioMart). repeated string expression_egenetics = 35; // Tissues/organs the gene expressed in (GNF/Atlas data from BioMart). repeated string expression_gnf_atlas = 36; // The interacting genes from IntAct. repeated string interactions_intact = 37; // The interacting genes from BioGRID. repeated string interactions_biogrid = 38; // The interacting genes from ConsensusPathDB. repeated string interactions_consensus_path_db = 39; // Estimated probability of haploinsufficiency of the gene (from // doi:10.1371/journal.pgen.1001154). optional double haploinsufficiency = 40; // Estimated probability of haploinsufficiency of the gene (from // doi:10.1093/bioinformatics/btx028). optional double hipred_score = 41; // HIPred prediction of haploinsufficiency of the gene. Y(es) or N(o). (from // doi:10.1093/bioinformatics/btx028). optional string hipred = 42; // A score predicting the gene haploinsufficiency. The higher the score the more likely the // gene is haploinsufficient (from doi: 10.1093/nar/gkv474). optional double ghis = 43; // Estimated probability that gene is a recessive disease gene (from // DOI:10.1126/science.1215040). optional double prec = 44; // Known recessive status of the gene (from DOI:10.1126/science.1215040) "lof-tolerant = // seen in homozygous state in at least one 1000G individual" "recessive = known OMIM // recessive disease" (original annotations from DOI:10.1126/science.1215040). optional string known_rec_info = 45; // Residual Variation Intolerance Score, a measure of intolerance of mutational burden, the // higher the score the more tolerant to mutational burden the gene is. Based on EVS // (ESP6500) data. from doi:10.1371/journal.pgen.1003709. optional double rvis_evs = 46; // The percentile rank of the gene based on RVIS, the higher the percentile the more // tolerant to mutational burden the gene is. Based on EVS (ESP6500) data. optional double rvis_percentile_evs = 47; // "A gene's corresponding FDR p-value for preferential LoF depletion among the ExAC // population. Lower FDR corresponds with genes that are increasingly depleted of LoF // variants." cited from RVIS document. optional double lof_fdr_exac = 48; // "ExAC-based RVIS; setting 'common' MAF filter at 0.05% in at least one of the six // individual ethnic strata from ExAC." cited from RVIS document. optional double rvis_exac = 49; // "Genome-Wide percentile for the new ExAC-based RVIS; setting 'common' MAF filter at 0.05% // in at least one of the six individual ethnic strata from ExAC." cited from RVIS document. optional double rvis_percentile_exac = 50; // "the probability of being loss-of-function intolerant (intolerant of both heterozygous // and homozygous lof variants)" based on ExAC r0.3 data. optional double exac_pli = 51; // "the probability of being intolerant of homozygous, but not heterozygous lof variants" // based on ExAC r0.3 data. optional double exac_prec = 52; // "the probability of being tolerant of both heterozygous and homozygous lof variants" // based on ExAC r0.3 data. optional double exac_pnull = 53; // "the probability of being loss-of-function intolerant (intolerant of both heterozygous // and homozygous lof variants)" based on ExAC r0.3 nonTCGA subset. optional double exac_nontcga_pli = 54; // "the probability of being intolerant of homozygous, but not heterozygous lof variants" // based on ExAC r0.3 nonTCGA subset. optional double exac_nontcga_prec = 55; // "the probability of being tolerant of both heterozygous and homozygous lof variants" // based on ExAC r0.3 nonTCGA subset. optional double exac_nontcga_pnull = 56; // "the probability of being loss-of-function intolerant (intolerant of both heterozygous // and homozygous lof variants)" based on ExAC r0.3 nonpsych subset. optional double exac_nonpsych_pli = 57; // "the probability of being intolerant of homozygous, but not heterozygous lof variants" // based on ExAC r0.3 nonpsych subset. optional double exac_nonpsych_prec = 58; // "the probability of being tolerant of both heterozygous and homozygous lof variants" // based on ExAC r0.3 nonpsych subset/ optional double exac_nonpsych_pnull = 59; // "the probability of being loss-of-function intolerant (intolerant of both heterozygous // and homozygous lof variants)" based on gnomAD 2.1 data. optional double gnomad_pli = 60; // "the probability of being intolerant of homozygous, but not heterozygous lof variants" // based on gnomAD 2.1 data. optional double gnomad_prec = 61; // "the probability of being tolerant of both heterozygous and homozygous lof variants" // based on gnomAD 2.1 data. optional double gnomad_pnull = 62; // "Winsorised deletion intolerance z-score" based on ExAC r0.3.1 CNV data. optional double exac_del_score = 63; // "Winsorised duplication intolerance z-score" based on ExAC r0.3.1 CNV data. optional double exac_dup_score = 64; // "Winsorised cnv intolerance z-score" based on ExAC r0.3.1 CNV data. optional double exac_cnv_score = 65; // "Gene is in a known region of recurrent CNVs mediated by tandem segmental duplications // and intolerance scores are more likely to be biased or noisy." from ExAC r0.3.1 CNV // release. optional string exac_cnv_flag = 66; // gene damage index score, "a genome-wide, gene-level metric of the mutational damage that // has accumulated in the general population" from doi: 10.1073/pnas.1518646112. The higher // the score the less likely the gene is to be responsible for monogenic diseases. optional double gdi = 67; // Phred-scaled GDI scores. optional double gdi_phred = 68; // gene damage prediction (low/medium/high) by GDI for all diseases., optional string gdp_all_disease_causing = 69; // gene damage prediction (low/medium/high) by GDI for all Mendelian diseases. optional string gdp_all_mendelian = 70; // gene damage prediction (low/medium/high) by GDI for Mendelian autosomal dominant // diseases. optional string gdp_all_mendelian_ad = 71; // gene damage prediction (low/medium/high) by GDI for Mendelian autosomal recessive // diseases. optional string gdp_mendelian_ar = 72; // gene damage prediction (low/medium/high) by GDI for all primary immunodeficiency // diseases. optional string gdp_pid = 73; // gene damage prediction (low/medium/high) by GDI for primary immunodeficiency autosomal // dominant diseases. optional string gdp_pid_ad = 74; // gene damage prediction (low/medium/high) by GDI for primary immunodeficiency autosomal // recessive diseases. optional string gdp_pid_ar = 75; // gene damage prediction (low/medium/high) by GDI for all cancer disease. optional string gdp_cancer = 76; // gene damage prediction (low/medium/high) by GDI for cancer recessive disease. optional string gdb_cancer_rec = 77; // gene damage prediction (low/medium/high) by GDI for cancer dominant disease. optional string gdp_cancer_dom = 78; // A percentile score for gene intolerance to functional change. The lower the score the // higher gene intolerance to functional change. For details see doi: // 10.1093/bioinformatics/btv602. optional double loftool_score = 79; // The fraction of individuals in the 1000 Genomes Project data (N=2504) who are either // Heterozygote or Homozygote of LOF SNVs whose MAF<0.005. This fraction is from a method // for ranking genes based on mutational burden called SORVA (Significance Of Rare // VAriants). Please see doi: 10.1101/103218 for details. optional double sorva_lof_maf_5_het_or_hom = 80; // The fraction of individuals in the 1000 Genomes Project data (N=2504) who are either // Compound Heterozygote or Homozygote of LOF SNVs whose MAF<0.005. This fraction is from a // method for ranking genes based on mutational burden called SORVA (Significance Of Rare // VAriants). Please see doi: 10.1101/103218 for details. optional double sorva_lof_maf_5_hom_or_comphet = 81; // The fraction of individuals in the 1000 Genomes Project data (N=2504) who are either // Heterozygote or Homozygote of LOF SNVs whose MAF<0.001. This fraction is from a method // for ranking genes based on mutational burden called SORVA (Significance Of Rare // VAriants). Please see doi: 10.1101/103218 for details. optional double sorva_lof_maf_1_het_or_hom = 82; // The fraction of individuals in the 1000 Genomes Project data (N=2504) who are either // Compound Heterozygote or Homozygote of LOF SNVs whose MAF<0.001. This fraction is from a // method for ranking genes based on mutational burden called SORVA (Significance Of Rare // VAriants). Please see doi: 10.1101/103218 for details. optional double sorva_lof_maf_1_hom_or_comphet = 83; // The fraction of individuals in the 1000 Genomes Project data (N=2504) who are either // Heterozygote or Homozygote of LOF or missense SNVs whose MAF<0.005. This fraction is from // a method for ranking genes based on mutational burden called SORVA (Significance Of Rare // VAriants). Please see doi: 10.1101/103218 for details. optional double sorva_lof_or_mis_maf_5_het_or_hom = 84; // The fraction of individuals in the 1000 Genomes Project data (N=2504) who are either // Compound Heterozygote or Homozygote of LOF or missense SNVs whose MAF<0.005. This // fraction is from a method for ranking genes based on mutational burden called SORVA // (Significance Of Rare VAriants). Please see doi: 10.1101/103218 for details. optional double sorva_lof_or_mis_maf_5_hom_or_comphet = 85; // The fraction of individuals in the 1000 Genomes Project data (N=2504) who are either // Heterozygote or Homozygote of LOF or missense SNVs whose MAF<0.001. This fraction is from // a method for ranking genes based on mutational burden called SORVA (Significance Of Rare // VAriants). Please see doi: 10.1101/103218 for details. optional double sorva_lof_or_mis_maf_1_het_or_hom = 86; // The fraction of individuals in the 1000 Genomes Project data (N=2504) who are either // Compound Heterozygote or Homozygote of LOF or missense SNVs whose MAF<0.001. This // fraction is from a method for ranking genes based on mutational burden called SORVA // (Significance Of Rare VAriants). Please see doi: 10.1101/103218 for details. optional double sorva_lof_or_mis_maf_1_hom_or_comphet = 87; // Essential ("E") or Non-essential phenotype-changing ("N") based on Mouse Genome // Informatics database. from doi:10.1371/journal.pgen.1003484. optional string essential_gene = 88; // Essential ("E") or Non-essential phenotype-changing ("N") based on large scale CRISPR // experiments. from doi: 10.1126/science.aac7041. optional string essential_gene_crispr = 89; // Essential ("E"), context-Specific essential ("S"), or Non-essential phenotype-changing // ("N") based on large scale CRISPR experiments. from // http://dx.doi.org/10.1016/j.cell.2015.11.015. optional string essential_gene_crispr2 = 90; // Essential ("E"), HAP1-Specific essential ("H"), KBM7-Specific essential ("K"), or // Non-essential phenotype-changing ("N"), based on large scale mutagenesis experiments. // from doi: 10.1126/science.aac7557. optional string essential_gene_gene_trap = 91; // A probability prediction of the gene being essential. From // doi:10.1371/journal.pcbi.1002886. optional double gene_indispensability_score = 92; // Essential ("E") or loss-of-function tolerant ("N") based on Gene_indispensability_score. optional string gene_indispensability_pred = 93; // Homolog mouse gene name from MGI. optional string mgi_mouse_gene = 94; // Phenotype description for the homolog mouse gene from MGI. optional string mgi_mouse_phenotype = 95; // Homolog zebrafish gene name from ZFIN. optional string zfin_zebrafish_gene = 96; // Affected structure of the homolog zebrafish gene from ZFIN. optional string zfin_zebrafish_structure = 97; // Phenotype description for the homolog zebrafish gene from ZFIN. optional string zfin_zebrafish_phenotype_quality = 98; // Phenotype tag for the homolog zebrafish gene from ZFIN" optional string zfin_zebrafish_phenotype_tag = 99; } // Code for data from the gnomAD constraints. message GnomadConstraintsRecord { /// The Ensembl gene ID. string ensembl_gene_id = 1; /// The NCBI gene ID. string entrez_id = 2; /// The HGNC gene symbol. string gene_symbol = 3; /// The expected number of loss-of-function variants. optional double exp_lof = 4; /// The expected number of missense variants. optional double exp_mis = 5; /// The expected number of synonymous variants. optional double exp_syn = 6; /// The missense-related Z-score. optional double mis_z = 7; /// The observed number of loss-of-function variants. optional uint32 obs_lof = 8; /// The observed number of missense variants. optional uint32 obs_mis = 9; /// The observed number of synonymous variants. optional uint32 obs_syn = 10; /// The loss-of-function observed/expected ratio. optional double oe_lof = 11; /// The lower bound of the loss-of-function observed/expected ratio. optional double oe_lof_lower = 12; /// The upper bound of the loss-of-function observed/expected ratio. optional double oe_lof_upper = 13; /// The missense observed/expected ratio. optional double oe_mis = 14; /// The lower bound of the missense observed/expected ratio. optional double oe_mis_lower = 15; /// The upper bound of the missense observed/expected ratio. optional double oe_mis_upper = 16; /// The synonymous observed/expected ratio. optional double oe_syn = 17; /// The lower bound of the synonymous observed/expected ratio. optional double oe_syn_lower = 18; /// The upper bound of the synonymous observed/expected ratio. optional double oe_syn_upper = 19; /// The probability of loss-of-function intolerance (pLI score). optional double pli = 20; /// The synonymous-related Z-score. optional double syn_z = 21; /// The probability of loss-of-function intolerance (pLI score) from ExAC. optional double exac_pli = 22; /// The observed number of loss-of-function variants from ExAC. optional double exac_obs_lof = 23; /// The expected number of loss-of-function variants from ExAC. optional double exac_exp_lof = 24; /// The loss-of-function observed/expected ratio from ExAC. optional double exac_oe_lof = 25; } // Status of the symbol report, which can be either "Approved" or "Entry Withdrawn". enum HgncStatus { // unknown HGNC_STATUS_UNKNOWN = 0; // Approved by HGNC. HGNC_STATUS_APPROVED = 1; // Withdrawn by HGNC. HGNC_STATUS_WITHDRAWN = 2; } // Information from the locus-specific dabase. message HgncLsdb { // The name of the Locus Specific Mutation Database. string name = 1; // The URL for the gene. string url = 2; } // A record from the HGNC database. message HgncRecord { // HGNC ID. A unique ID created by the HGNC for every approved symbol. string hgnc_id = 1; // The HGNC approved gene symbol. string symbol = 2; // HGNC approved name for the gene. string name = 3; // A group name for a set of related locus types as defined by the HGNC // (e.g. non-coding RNA). optional string locus_group = 4; // The locus type as defined by the HGNC (e.g. RNA, transfer). optional string locus_type = 5; // Status of the symbol report. HgncStatus status = 6; // Cytogenetic location of the gene (e.g. 2q34). optional string location = 7; // Sortable cytogenic location of the gene (e.g. 02q34). optional string location_sortable = 8; // Other symbols used to refer to this gene. repeated string alias_symbol = 9; // Other names used to refer to this gene. repeated string alias_name = 10; // Prevous symbols used to refer to this gene. repeated string prev_symbol = 11; // Previous names used to refer to this gene. repeated string prev_name = 12; // Name given to a gene group. repeated string gene_group = 13; // ID used to designate a gene group. repeated uint32 gene_group_id = 14; // The date the entry was first approved. optional string date_approved_reserved = 15; // The date the gene symbol was last changed. optional string date_symbol_changed = 16; // The date the gene name was last changed. optional string date_name_changed = 17; // Date the entry was last modified. optional string date_modified = 18; // Entrez gene id. optional string entrez_id = 19; // Ensembl gene id. optional string ensembl_gene_id = 20; // Vega gene id. optional string vega_id = 21; // UCSC gene id. optional string ucsc_id = 22; // ENA accession number(s). repeated string ena = 23; // RefSeq nucleotide accession(s). repeated string refseq_accession = 24; // Consensus CDS ID(ds). repeated string ccds_id = 25; // Uniprot IDs. repeated string uniprot_ids = 26; // Pubmed IDs. repeated uint32 pubmed_id = 27; // Mouse genome informatics database ID(s). repeated string mgd_id = 28; // Rat genome database gene ID(s). repeated string rgd_id = 29; // The name of the Locus Specific Mutation Database and URL for the gene. repeated HgncLsdb lsdb = 30; // Symbol used within COSMIC. optional string cosmic = 31; // OMIM ID(s). repeated string omim_id = 32; // miRBase ID. optional string mirbase = 33; // Homeobox Database ID. optional uint32 homeodb = 34; // snoRNABase ID. optional string snornabase = 35; // Symbol used to link to the SLC tables database at bioparadigms.org // for the gene. optional string bioparadigms_slc = 36; // Orphanet ID. optional uint32 orphanet = 37; // Pseudogene.org. optional string pseudogene_org = 38; // Symbol used within HORDE for the gene. optional string horde_id = 39; // ID used to link to the MEROPS peptidase database. optional string merops = 40; // Symbol used within international ImMunoGeneTics information system. optional string imgt = 41; // The objectId used to link to the IUPHAR/BPS Guide to PHARMACOLOGY // database. optional string iuphar = 42; // Symbol used within the Human Cell Differentiation Molecule database. optional string cd = 43; // ID to link to the Mamit-tRNA database optional uint32 mamit_trnadb = 44; // lncRNA Database ID. optional string lncrnadb = 45; // ENZYME EC accession number. repeated string enzyme_id = 46; // ID used to link to the Human Intermediate Filament Database. optional string intermediate_filament_db = 47; // The HGNC ID that the Alliance of Genome Resources (AGR) use. optional string agr = 48; // NCBI and Ensembl transcript IDs/acessions including the version // number. repeated string mane_select = 49; } // Reference into function record. message RifEntry { // The RIF text. string text = 1; // PubMed IDs. repeated uint32 pmids = 2; } // A record from the NCBI gene database. message NcbiRecord { // NCBI Gene ID. string gene_id = 1; // Gene summary. optional string summary = 2; // "Reference Into Function" entry. repeated RifEntry rif_entries = 3; } // Description of an OMIM record. message OmimTerm { // The OMIM ID. string omim_id = 1; // The OMIM label. string label = 2; } // A record from the OMIM gene association. message OmimRecord { // The HGNC gene ID. string hgnc_id = 1; // The associated OMIM records. repeated OmimTerm omim_diseases = 2; } // Description of an ORDO record. message OrphaTerm { // The ORPHA ID. string orpha_id = 1; // The disease name. string label = 2; } // A record from the ORDO gene association. message OrphaRecord { // The HGNC gene ID. string hgnc_id = 1; // The associated ORPHA diseases. repeated OrphaTerm orpha_diseases = 2; } // Entry in the rCNV dosage sensitivity scores (Collins et al., 2022). message RcnvRecord { // The HGNC ID. string hgnc_id = 1; // The pHaplo value. double p_haplo = 2; // The pTriplo value. double p_triplo = 3; } // Entry with sHet information (Weghorn et al., 2019). message ShetRecord { // The HGNC ID. string hgnc_id = 1; // The sHet value. double s_het = 2; } // Enumeration for GTEx V8 tissue enum GtexTissue { // unknown GTEX_TISSUE_UNKNOWN = 0; // Adipose Tissue GTEX_TISSUE_ADIPOSE_TISSUE = 1; // Adrenal Gland GTEX_TISSUE_ADRENAL_GLAND = 2; // Bladder GTEX_TISSUE_BLADDER = 3; // Blood GTEX_TISSUE_BLOOD = 4; // Blood Vessel GTEX_TISSUE_BLOOD_VESSEL = 5; // Bone Marrow GTEX_TISSUE_BONE_MARROW = 6; // Brain GTEX_TISSUE_BRAIN = 7; // Breast GTEX_TISSUE_BREAST = 8; // Cervix Uteri GTEX_TISSUE_CERVIX_UTERI = 9; // Colon GTEX_TISSUE_COLON = 10; // Esophagus GTEX_TISSUE_ESOPHAGUS = 11; // Fallopian Tube GTEX_TISSUE_FALLOPIAN_TUBE = 12; // Heart GTEX_TISSUE_HEART = 13; // Kidney GTEX_TISSUE_KIDNEY = 14; // Liver GTEX_TISSUE_LIVER = 15; // Lung GTEX_TISSUE_LUNG = 16; // Muscle GTEX_TISSUE_MUSCLE = 17; // Nerve GTEX_TISSUE_NERVE = 18; // Ovary GTEX_TISSUE_OVARY = 19; // Pancreas GTEX_TISSUE_PANCREAS = 20; // Pituitary GTEX_TISSUE_PITUITARY = 21; // Prostate GTEX_TISSUE_PROSTATE = 22; // Salivary Gland GTEX_TISSUE_SALIVARY_GLAND = 23; // Skin GTEX_TISSUE_SKIN = 24; // Small Intestine GTEX_TISSUE_SMALL_INTESTINE = 25; // Spleen GTEX_TISSUE_SPLEEN = 26; // Stomach GTEX_TISSUE_STOMACH = 27; // Testis GTEX_TISSUE_TESTIS = 28; // Thyroid GTEX_TISSUE_THYROID = 29; // Uterus GTEX_TISSUE_UTERUS = 30; // Vagina GTEX_TISSUE_VAGINA = 31; } // Enumeration for GTEx V8 tissue details enum GtexTissueDetailed { // unknown GTEX_TISSUE_DETAILED_UNKNOWN = 0; // Adipose - Subcutaneous GTEX_TISSUE_DETAILED_ADIPOSE_SUBCUTANEOUS = 1; // Adipose - Visceral (Omentum) GTEX_TISSUE_DETAILED_ADIPOSE_VISCERAL_OMENTUM = 2; // Adrenal Gland GTEX_TISSUE_DETAILED_ADRENAL_GLAND = 3; // Artery - Aorta GTEX_TISSUE_DETAILED_ARTERY_AORTA = 4; // Artery - Coronary GTEX_TISSUE_DETAILED_ARTERY_CORONARY = 5; // Artery - Tibial GTEX_TISSUE_DETAILED_ARTERY_TIBIAL = 6; // Bladder GTEX_TISSUE_DETAILED_BLADDER = 7; // Brain - Amygdala GTEX_TISSUE_DETAILED_BRAIN_AMYGDALA = 8; // Brain - Anterior cingulate cortex (BA24) GTEX_TISSUE_DETAILED_BRAIN_ANTERIOR_CINGULATE_CORTEX = 9; // Brain - Caudate (basal ganglia) GTEX_TISSUE_DETAILED_BRAIN_CAUDATE_BASAL_GANGLIA = 10; // Brain - Cerebellar Hemisphere GTEX_TISSUE_DETAILED_BRAIN_CEREBELLAR_HEMISPHERE = 11; // Brain - Cerebellum GTEX_TISSUE_DETAILED_BRAIN_CEREBELLUM = 12; // Brain - Cortex GTEX_TISSUE_DETAILED_BRAIN_CORTEX = 13; // Brain - Frontal Cortex (BA9) GTEX_TISSUE_DETAILED_BRAIN_FRONTAL_CORTEX = 14; // Brain - Hippocampus GTEX_TISSUE_DETAILED_BRAIN_HIPPOCAMPUS = 15; // Brain - Hypothalamus GTEX_TISSUE_DETAILED_BRAIN_HYPOTHALAMUS = 16; // Brain - Nucleus accumbens (basal ganglia) GTEX_TISSUE_DETAILED_BRAIN_NUCLEUS_ACCUMBENS = 17; // Brain - Putamen (basal ganglia) GTEX_TISSUE_DETAILED_BRAIN_PUTAMEN_BASAL_GANGLIA = 18; // Brain - Spinal cord (cervical c-1) GTEX_TISSUE_DETAILED_BRAIN_SPINAL_CORD = 19; // Brain - Substantia nigra GTEX_TISSUE_DETAILED_BRAIN_SUBSTANTIA_NIGRA = 20; // Breast - Mammary Tissue GTEX_TISSUE_DETAILED_BREAST_MAMMARY_TISSUE = 21; // Cells - Cultured fibroblasts GTEX_TISSUE_DETAILED_CELLS_CULTURED_FIBROBLASTS = 22; // Cells - EBV-transformed lymphocytes GTEX_TISSUE_DETAILED_CELLS_EBV_TRANSFORMED_LYMPHOCYTES = 23; // Cells - Leukemia cell line (CML) GTEX_TISSUE_DETAILED_CELLS_LEUKEMIA_CELL_LINE = 24; // Cervix - Ectocervix GTEX_TISSUE_DETAILED_CERVIX_ECTOCERVIX = 25; // Cervix - Endocervix GTEX_TISSUE_DETAILED_CERVIX_ENDOCERVIX = 26; // Colon - Sigmoid GTEX_TISSUE_DETAILED_COLON_SIGMOID = 27; // Colon - Transverse GTEX_TISSUE_DETAILED_COLON_TRANSVERSE = 28; // Esophagus - Gastroesophageal Junction GTEX_TISSUE_DETAILED_ESOPHAGUS_GASTROESOPHAGEAL_JUNCTION = 29; // Esophagus - Mucosa GTEX_TISSUE_DETAILED_ESOPHAGUS_MUCOSA = 30; // Esophagus - Muscularis GTEX_TISSUE_DETAILED_ESOPHAGUS_MUSCULARIS = 31; // Fallopian Tube GTEX_TISSUE_DETAILED_FALLOPIAN_TUBE = 32; // Heart - Atrial Appendage GTEX_TISSUE_DETAILED_HEART_ATRIAL_APPENDAGE = 33; // Heart - Left Ventricle GTEX_TISSUE_DETAILED_HEART_LEFT_VENTRICLE = 34; // Kidney - Cortex GTEX_TISSUE_DETAILED_KIDNEY_CORTEX = 35; // Kidney - Medulla GTEX_TISSUE_DETAILED_KIDNEY_MEDULLA = 36; // Liver GTEX_TISSUE_DETAILED_LIVER = 37; // Lung GTEX_TISSUE_DETAILED_LUNG = 38; // Minor Salivary Gland GTEX_TISSUE_DETAILED_MINOR_SALIVARY_GLAND = 39; // Muscle - Skeletal GTEX_TISSUE_DETAILED_MUSCLE_SKELETAL = 40; // Nerve - Tibial GTEX_TISSUE_DETAILED_NERVE_TIBIAL = 41; // Ovary GTEX_TISSUE_DETAILED_OVARY = 42; // Pancreas GTEX_TISSUE_DETAILED_PANCREAS = 43; // Pituitary GTEX_TISSUE_DETAILED_PITUITARY = 44; // Prostate GTEX_TISSUE_DETAILED_PROSTATE = 45; // Salivary Gland GTEX_TISSUE_DETAILED_SALIVARY_GLAND = 46; // Skin - Not Sun Exposed (Suprapubic) GTEX_TISSUE_DETAILED_SKIN_NOT_SUN_EXPOSED_SUPRAPUBIC = 47; // Skin - Sun Exposed (Lower leg) GTEX_TISSUE_DETAILED_SKIN_SUN_EXPOSED_LOWER_LEG = 48; // Small Intestine - Terminal Ileum GTEX_TISSUE_DETAILED_SMALL_INTESTINE_TERMINAL_ILEUM = 49; // Spleen GTEX_TISSUE_DETAILED_SPLEEN = 50; // Stomach GTEX_TISSUE_DETAILED_STOMACH = 51; // Testis GTEX_TISSUE_DETAILED_TESTIS = 52; // Thyroid GTEX_TISSUE_DETAILED_THYROID = 53; // Uterus GTEX_TISSUE_DETAILED_UTERUS = 54; // Vagina GTEX_TISSUE_DETAILED_VAGINA = 55; // Whole Blood GTEX_TISSUE_DETAILED_WHOLE_BLOOD = 56; } // Entry with the tissue-specific information for a gene. message GtexTissueRecord { // The tissue type GtexTissue tissue = 1; // The detailed tissue type GtexTissueDetailed tissue_detailed = 2; // TPM counts repeated float tpms = 3; } // Entry with the GTEx information. message GtexRecord { // The HGNC ID. string hgnc_id = 1; // ENSEMBL gene ID. string ensembl_gene_id = 2; // ENSEMBL gene version. string ensembl_gene_version = 3; // Counts per tissue repeated GtexTissueRecord records = 4; } // Entry in PanelApp. message PanelAppRecord { /// Gene identity information. message GeneData { // HGNC ID. optional string hgnc_id = 1; // HGNC gene symbol. optional string hgnc_symbol = 2; // Gene symbol. optional string gene_symbol = 3; } // Enumeration for entity types. enum EntityType { // Unknown ENTITY_TYPE_UNKNOWN = 0; // Gene ENTITY_TYPE_GENE = 1; // Short Tandem Repeat ENTITY_TYPE_STR = 2; // Region ENTITY_TYPE_REGION = 3; } // Enumeration for confidence levels. enum ConfidenceLevel { // Unknown CONFIDENCE_LEVEL_UNKNOWN = 0; // None CONFIDENCE_LEVEL_NONE = 1; // Red CONFIDENCE_LEVEL_RED = 2; // Amber CONFIDENCE_LEVEL_AMBER = 3; // Green CONFIDENCE_LEVEL_GREEN = 4; } // Enumeration for penetrance. enum Penetrance { // Unknown PENETRANCE_UNKNOWN = 0; // Complete PENETRANCE_COMPLETE = 1; // Incomplete PENETRANCE_INCOMPLETE = 2; } // Message for panel statistics. message PanelStats { // Number of genes. uint32 number_of_genes = 1; // Number of STRs. uint32 number_of_strs = 2; // Number of regions. uint32 number_of_regions = 3; } // Message for panel types. message PanelType { // Type name. string name = 1; // Slug. string slug = 2; // Description. string description = 3; } // Message for panel information. message Panel { // Panel ID. uint32 id = 1; // Panel hash ID. optional string hash_id = 2; // Panel name. string name = 3; // Disease group. string disease_group = 4; // Disease subgroup. string disease_sub_group = 5; // Version string version = 6; // Creation date of version. string version_created = 7; // Relevant disorders. repeated string relevant_disorders = 8; // Stats. PanelStats stats = 9; // Panel types. repeated PanelType types = 10; } // Gene identity information. GeneData gene_data = 1; // Entity type. EntityType entity_type = 2; // Entity name. string entity_name = 3; // Confidence level. ConfidenceLevel confidence_level = 4; // Penetrance. Penetrance penetrance = 5; // Publications. repeated string publications = 6; // Evidence. repeated string evidence = 7; // Phenotypes. repeated string phenotypes = 8; // Mode of inheritance. string mode_of_inheritance = 9; // Panel. Panel panel = 10; } // Record from the integrated conditions computation. message ConditionsRecord { // A gene-disease association entry. message GeneDiseaseAssociationEntry { // Enumeration for sources. enum GeneDiseaseAssociationSource { // nil GENE_DISEASE_ASSOCIATION_SOURCE_UNKNOWN = 0; // OMIM GENE_DISEASE_ASSOCIATION_SOURCE_OMIM = 1; // Orphanet GENE_DISEASE_ASSOCIATION_SOURCE_ORPHANET = 2; // PanelApp GENE_DISEASE_ASSOCIATION_SOURCE_PANELAPP = 3; } // Enumeration for confidence levels. enum ConfidenceLevel { // nil CONFIDENCE_LEVEL_UNKNOWN = 0; // High confidence. CONFIDENCE_LEVEL_HIGH = 1; // Medium confidence. CONFIDENCE_LEVEL_MEDIUM = 2; // Low confidence. CONFIDENCE_LEVEL_LOW = 3; } // The gene-disease association source. GeneDiseaseAssociationSource source = 1; // The gene-disease association confidence level. ConfidenceLevel confidence = 2; } // A labeled disorder. message LabeledDisorder { // The disorder ID. string term_id = 1; // The disorder name. optional string title = 2; } // A gene-disease association. message GeneDiseaseAssociation { // The HGNC ID. string hgnc_id = 1; // The gene-disease association entries. repeated LabeledDisorder labeled_disorders = 2; // Overall disease name. optional string disease_name = 3; // Disease definition. optional string disease_definition = 4; // The gene-disease association sources. repeated GeneDiseaseAssociationEntry.GeneDiseaseAssociationSource sources = 5; // Overall disease-gene association confidence level. GeneDiseaseAssociationEntry.ConfidenceLevel confidence = 6; } // A panel from PanelApp. message PanelappPanel { // PanelApp panel ID. int32 id = 1; // PanelApp panel name. string name = 2; // PanelApp panel version. string version = 3; } // An association of a gene by HGNC with a panel from PanelApp. message PanelappAssociation { // Enumeration for PanelApp confidence level. enum PanelappConfidence { // nil PANELAPP_CONFIDENCE_UNKNOWN = 0; // PanelApp green confidence. PANELAPP_CONFIDENCE_GREEN = 1; // PanelApp amber confidence. PANELAPP_CONFIDENCE_AMBER = 2; // PanelApp red confidence. PANELAPP_CONFIDENCE_RED = 3; // PanelApp none confidence (when removed after expert review). PANELAPP_CONFIDENCE_NONE = 4; } // Enumeration for entity type. enum PanelappEntityType { // nil PANELAPP_ENTITY_TYPE_UNKNOWN = 0; // PanelApp gene entity type. PANELAPP_ENTITY_TYPE_GENE = 1; // PanelApp region entity type. PANELAPP_ENTITY_TYPE_REGION = 2; // PanelApp short tandem repeat entity type. PANELAPP_ENTITY_TYPE_STR = 3; } // The HGNC ID. string hgnc_id = 1; // The PanelApp confidence level. PanelappConfidence confidence_level = 2; // The PanelApp entity type. PanelappEntityType entity_type = 3; // The PanelApp entity name. optional string mode_of_inheritance = 4; // The PanelApp publications. repeated string phenotypes = 5; // The PanelApp panel. PanelappPanel panel = 6; } // The HGNC ID. string hgnc_id = 1; // The gene-disease associations. repeated GeneDiseaseAssociation disease_associations = 2; // The PanelApp associations. repeated PanelappAssociation panelapp_associations = 3; } // Entry in the genes RocksDB database. message Record { // Information from the ACMG secondary finding list. AcmgSecondaryFindingRecord acmg_sf = 1; // Information from ClinGen dosage curation. ClingenDosageRecord clingen = 2; // Information from dbNSFP. DbnsfpRecord dbnsfp = 3; // Information from the gnomAD constraints database. GnomadConstraintsRecord gnomad_constraints = 4; // Information from the HGNC database. HgncRecord hgnc = 5; // Information from the NCBI gene database (aka "Entrez"). NcbiRecord ncbi = 6; // Information about gene to OMIM term annotation, composed from clingen and HPO. OmimRecord omim = 7; // Information about gene to Orphanet annotation, derived from Orphapacket. OrphaRecord orpha = 8; // Information from the rCNV dosage sensitivity scores (Collins et al., 2022). RcnvRecord rcnv = 9; // Information from the sHet score (Weghor et al., 2019) ShetRecord shet = 10; // Information from GTEx data GtexRecord gtex = 11; // Information from DOMINO. DominoRecord domino = 12; // DECIPHER HI score. DecipherHiRecord decipher_hi = 13; // GenomicsEngland PanelApp gene information. repeated PanelAppRecord panelapp = 14; // Conditions record. ConditionsRecord conditions = 15; }