use bgzip::tabix::Tabix; use clap::Parser; use std::fs::File; use std::io::{stdout, Write}; #[derive(Debug, Parser)] struct Args { #[command()] file: String, #[arg(short, long)] output: Option, } fn main() -> anyhow::Result<()> { let parser = Args::parse(); let file = Tabix::from_reader(File::open(&parser.file)?)?; let out: Box = if let Some(out) = parser.output { Box::new(File::create(out)?) } else { Box::new(stdout().lock()) }; let mut csv_out = csv::WriterBuilder::new().flexible(true).from_writer(out); csv_out.write_record(&[ "# of sequences", "format", "coordinate rule", "column for the sequence name", "column for the start of a region", "column for the end fo a region", "meta", "skip", "Length of concatenated sequence names", ])?; csv_out.write_record(&[ format!("{}", file.number_of_references), match file.format & 0xffff { 0 => "Generic".to_string(), 1 => "SAM".to_string(), 2 => "VCF".to_string(), _ => format!("Unknown: {}", file.format), }, match file.format & 0x10000 { 0 => "GFF Rule".to_string(), _ => "BED Rule".to_string(), }, format!("{}", file.column_for_sequence), format!("{}", file.column_for_begin), format!("{}", file.column_for_end), format!("{}", String::from_utf8_lossy(&file.meta)), format!("{}", file.skip), format!("{}", file.length_of_concatenated_sequence_names), ])?; csv_out.write_record(&[""])?; csv_out.write_record(&[ "sequence index", "sequence name", "bin index", "bin", "chunk index", "begin", "end", ])?; for (i, (ref_name, sequence)) in file.names.iter().zip(file.sequences.iter()).enumerate() { let mut bins: Vec<_> = sequence.bins.values().collect(); bins.sort_by_key(|x| x.bin); for (j, bin) in bins.iter().enumerate() { for (k, x) in bin.chunks.iter().enumerate() { csv_out.write_record(&[ format!("{}", i), String::from_utf8_lossy(ref_name).to_string(), format!("{}", j), format!("0x{:x}", bin.bin), format!("{}", k), format!("0x{:x}", x.begin), format!("0x{:x}", x.end), ])?; } } } Ok(()) }