Crates.io | bioutils |
lib.rs | bioutils |
version | 0.2.4 |
source | src |
created_at | 2021-02-23 08:30:09.762737 |
updated_at | 2021-09-21 21:08:44.180343 |
description | Utilities and alphabets for biological sequence data. |
homepage | |
repository | |
max_upload_size | |
id | 359382 |
size | 288,792 |
Open for feature requests and edits!
For a related bioinformatics resource: Bioinformatics in Rust
// Download the latest grch38 reference fasta gz and gtf gz to current directory
use std::path::Path;
let path = Path::new("./");
download_grch38_primary_assembly_genome_fa_gz(&path);
download_gencode_vxx_primary_assembly_annotation_gtf_gz(&path);
// Download a fastq from the ENA
use std::path::Path;
use bioutils::files::http::bioutils_curl;
let fastq_ftp = "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR170/009/SRR1700869/";
let fastq_gz = "SRR1700869.fastq.gz";
let out_directory = Path::new("./");
bioutils_curl(fastq_ftp, fastq_gz, &out_directory);
// Checks if all elements in the slice are contained in a character set. Boolean version shown, but also available in result and option.
use bioutils::utils::check::AllAsRefSlice;
use bioutils::charsets::bioutils::*;
let dna_reference = &[67u8,67u8,67u8,67u8];
println!("{:?}", dna_reference.result_is_all_charset(BioUtilsCharSet::Dna));
// Also possible to use a custom &[u8] to check if all elements are contained in the character set.
let dna_reference_2 = &[67u8,68u8,67u8,68u8];
println!("{:?}", dna_reference_2.result_is_all_charset_with(&[67u8, 68u8]));
let dna_reference_3 = &[67u8,68u8,67u8,67u8];
println!("{:?}", dna_reference_3.result_is_all_charset_with(&[67u8,68u8]).unwrap().is_all_charset_with(&[5u8,6u8,7u8]));
// Create a new random vector of specific length with a Bioutils charset or choosing from u8s in a given slice
use bioutils::utils::new::RandomBioVec;
use bioutils::charsets::bioutils::*;
let new_vec = Vec::<u8>::random_vec(&12, BioUtilsCharSet::Dna);
let new_vec_2 = Vec::<u8>::random_vec_with(&12, &[1u8,2u8,3u8]);
// Takes a BioUtilsCharSet and a ThreadRng and replaces any character not in the charset with a random character from the characterset.
// Also possible with custom &[u8]
// create a random number generator
let mut rng1 = rand::thread_rng();
let mut dna_correction = b"ACTGQQQ";
dna_correction.mut_clean(BioUtilsCharSet::Dna, rng1);
// dna_correction.mut_clean_with(&[1u8,2u8,3u8], rng1);
// Recode can be used if we want to convert between PHRED, etc. scores and encodings or find the complement. Recode checks if contains non-encoding values and will return none if there are any.
// Get the reverse complement of the sequence. Other options for finding the complement are available (mix case, Nn included).
let mut reverse_complement = b"ACTG".to_owned();
reverse_complement.mut_rev_recode(BioUtilsRecodeSet::DnaComplement);
println!("{:?}", reverse_complement);
let mut phred33_score = 12u8;
phred33_score.recode_u8(BioUtilsRecodeSet::Phred33Encode);
println!("{:?}", phred33_score);
let mut phred33_encoding = 34u8;
&phred33_encoding.mut_recode_u8(BioUtilsRecodeSet::Phred33Decode);
println!("{:?}", phred33_encoding);
let mut phred33_score_2 = b"00000".to_owned();
phred33_score_2.mut_recode(BioUtilsRecodeSet::Phred33Encode);
println!("{:?}", phred33_score_2);
// Wrapper around memchr that returns an iterator over cg sites
use memchr::memmem;
use memchr::memmem::FindIter;
use bioutils::utils::find::*;
let seqcg = b"ACGA";
// Returns an iterator over the cg sites
let mut iter = seqcg.iter_cg();
// Returns all cpg sites in a vector
let cgpos = b"ACGA".all_positions_cg();