// Command line tests for `gfatk` // run with `cargo test --release` use assert_cmd::prelude::*; use predicates::prelude::*; use std::process::Command; // test `gfatk linear` // H VN:Z:1.0 // S 11 ACCTT ll:f:30.0 // S 12 TCAAGG ll:f:60.0 // S 13 CTTGATT ll:f:30.0 // L 11 + 12 - 4M ec:i:1 // L 12 - 13 + 5M ec:i:1 // L 11 + 13 + 3M ec:i:1 // L 12 + 11 - 4M ec:i:1 // L 13 - 12 + 5M ec:i:1 // L 13 - 11 - 3M ec:i:1 // Expected output from gfatk linear // There are two possible paths that could // be output with the same probability. // PATH: 11+ -> 12- -> 13+ // 11 ACCTT // 12 CCTTGA // 13 CTTGATT // P ACCTTGATT // PATH: 13- -> 12+ -> 11- // 13 AATCAAG // 12 TCAAGG // 11 AAGGT // P AATCAAGGT // TODO: test `-i` flag #[test] fn test_gfa_linear_stdout() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; cmd.arg("linear").arg("./tests/test_linear.gfa"); cmd.assert() .stdout(predicate::str::contains("ACCTTGATT").or(predicate::str::contains("AATCAAGGT"))); Ok(()) } // test gfatk linear on a single segmented GFA // this would previously error out. #[test] fn test_gfa_linear_single_segment() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; cmd.arg("linear").arg("./tests/test_single_segment.gfa"); cmd.assert().stdout(predicate::str::contains("AGCGTA")); Ok(()) } // test `gfatk overlap` // same test GFA as `gfatk linear` // H VN:Z:1.0 // S 11 ACCTT ll:f:30.0 // S 12 TCAAGG ll:f:60.0 // S 13 CTTGATT ll:f:30.0 // L 11 + 12 - 4M ec:i:1 // L 12 - 13 + 5M ec:i:1 // L 11 + 13 + 3M ec:i:1 // L 12 + 11 - 4M ec:i:1 // L 13 - 12 + 5M ec:i:1 // L 13 - 11 - 3M ec:i:1 // walking through the links above // the overlaps with one base pair either side should be // 11 + -> 12 - : Overlap = CCTT; -1 = A; +1 = G --> ACCTTG // 12 - -> 13 + : Overlap = CTTGA; -1 = C; +1 = T --> CCTTGAT // 11 + -> 13 + : Overlap = CTT; -1 = C; +1 = G --> CCTTG // 12 + -> 11 - : Overlap = AAGG; -1 = C; +1 = T --> CAAGGT // 13 - -> 12 + : Overlap = TCAAG; -1 = A; +1 = G --> ATCAAGG // 13 - -> 11 - : Overlap = AAG; -1 = C; +1 = G --> CAAGG #[test] fn test_gfa_overlap_stdout() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; // just one base either side cmd.arg("overlap") .arg("./tests/test_linear.gfa") .arg("-s") .arg("1"); // test all the overlaps are present. cmd.assert().stdout(predicate::str::contains("ACCTTG").and( predicate::str::contains("CCTTGAT").and( predicate::str::contains("CCTTG").and( predicate::str::contains("CAAGGT").and( predicate::str::contains("ATCAAGG").and(predicate::str::contains("CAAGG")), ), ), ), )); Ok(()) } // test `gfatk extract` // # Duplicate 11,12,13 // # To 14,15,16 // H VN:Z:1.0 // S 11 ACCTT ll:f:30.0 // S 12 TCAAGG ll:f:60.0 // S 13 CTTGATT ll:f:30.0 // L 11 + 12 - 4M ec:i:1 // L 12 - 13 + 5M ec:i:1 // L 11 + 13 + 3M ec:i:1 // L 12 + 11 - 4M ec:i:1 // L 13 - 12 + 5M ec:i:1 // L 13 - 11 - 3M ec:i:1 // # duplication here // S 14 ACCTT ll:f:30.0 // S 15 TCAAGG ll:f:60.0 // S 16 CTTGATT ll:f:30.0 // L 14 + 15 - 4M ec:i:1 // L 15 - 16 + 5M ec:i:1 // L 14 + 16 + 3M ec:i:1 // L 15 + 14 - 4M ec:i:1 // L 16 - 15 + 5M ec:i:1 // L 16 - 14 - 3M ec:i:1 // two subgraphs present above, segment ID 11 // will extract a graph equivalent to // ./test_linear.gfa #[test] fn test_subgraph_extraction() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; // just one base either side cmd.arg("extract") .arg("./tests/test_subgraphs.gfa") .arg("-s") .arg("11"); // should be the same output as ./tests/test_linear.gfa cmd.assert().stdout(predicate::str::contains( "H VN:Z:1.0 S 11 ACCTT ll:f:30 S 12 TCAAGG ll:f:60 S 13 CTTGATT ll:f:30 L 11 + 12 - 4M ec:i:1 L 12 - 13 + 5M ec:i:1 L 11 + 13 + 3M ec:i:1 L 12 + 11 - 4M ec:i:1 L 13 - 12 + 5M ec:i:1 L 13 - 11 - 3M ec:i:1 ", )); Ok(()) } // now we will test multiple values placed as args on the command line // so the output here should be equivalent to the input GFA (plus a little rearrangement) #[test] fn test_subgraph_extraction_multiple() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; // just one base either side cmd.arg("extract") .arg("./tests/test_subgraphs.gfa") .arg("-s") .arg("11,14"); cmd.assert().stdout(predicate::str::starts_with( "H VN:Z:1.0 S 11 ACCTT ll:f:30 S 12 TCAAGG ll:f:60 S 13 CTTGATT ll:f:30 S 14 ACCTT ll:f:30 S 15 TCAAGG ll:f:60 S 16 CTTGATT ll:f:30 L 11 + 12 - 4M ec:i:1 L 12 - 13 + 5M ec:i:1 L 11 + 13 + 3M ec:i:1 L 12 + 11 - 4M ec:i:1 L 13 - 12 + 5M ec:i:1 L 13 - 11 - 3M ec:i:1 L 14 + 15 - 4M ec:i:1 L 15 - 16 + 5M ec:i:1 L 14 + 16 + 3M ec:i:1 L 15 + 14 - 4M ec:i:1 L 16 - 15 + 5M ec:i:1 L 16 - 14 - 3M ec:i:1 ", )); Ok(()) } // test `gfatk trim` // H VN:Z:1.0 // S 11 ACCTT ll:f:30.0 // S 12 TCAAGG ll:f:60.0 // S 13 CTTGATT ll:f:30.0 // S 14 TTGGGG ll:f:30.0 // L 11 + 12 - 4M ec:i:1 // L 12 - 13 + 5M ec:i:1 // L 11 + 13 + 3M ec:i:1 // L 12 + 11 - 4M ec:i:1 // L 13 - 12 + 5M ec:i:1 // L 13 - 11 - 3M ec:i:1 // L 14 + 11 + 2M ec:i:1 // L 11 - 14 - 2M ec:i:1 // added segment 14, with only two links // therefore this should be removed #[test] fn test_gfa_trim_sterr() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; cmd.arg("trim").arg("./tests/test_trim.gfa"); cmd.assert().stderr(predicate::str::contains( "[+] Removed segment 14 from GFA. ", )); Ok(()) } // test `gfatk fasta` // H VN:Z:1.0 // S 11 ACCTT ll:f:30.0 // S 12 TCAAGG ll:f:60.0 // S 13 CTTGATT ll:f:30.0 // L 11 + 12 - 4M ec:i:1 // L 12 - 13 + 5M ec:i:1 // L 11 + 13 + 3M ec:i:1 // L 12 + 11 - 4M ec:i:1 // L 13 - 12 + 5M ec:i:1 // L 13 - 11 - 3M ec:i:1 // should output three fasta records in the order // in which they appear in the GFA #[test] fn test_gfa_fasta_stdout() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; cmd.arg("fasta").arg("./tests/test_linear.gfa"); cmd.assert().stdout(predicate::str::contains( ">11 ACCTT >12 TCAAGG >13 CTTGATT ", )); Ok(()) } // test for no edge coverage tags // if user wants to use: // `gfatk linear`, `gfatk dot`, `gfatk trim` or `gfatk stats` // Edge coverage must be present. #[test] fn test_gfa_edge_coverage_failure_linear() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; cmd.arg("linear").arg("./tests/test_no_ec.gfa"); cmd.assert().failure(); Ok(()) } #[test] fn test_gfa_edge_coverage_failure_dot() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; cmd.arg("dot").arg("./tests/test_no_ec.gfa"); cmd.assert().failure(); Ok(()) } #[test] fn test_gfa_edge_coverage_failure_trim() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; cmd.arg("trim").arg("./tests/test_no_ec.gfa"); cmd.assert().failure(); Ok(()) } #[test] fn test_gfa_edge_coverage_failure_stats() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; cmd.arg("stats").arg("./tests/test_no_ec.gfa"); cmd.assert().failure(); Ok(()) } // test segment coverage tag presence // only relevant for: // `gfatk linear -i ` #[test] fn test_gfa_node_coverage_failure_linear() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; cmd.arg("linear").arg("./tests/test_no_ll.gfa").arg("-i"); cmd.assert().failure(); Ok(()) } // see `fn test_gfa_linear_stdout()` // for expected output explanation #[test] fn test_gfa_path_linear() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; let path = "13-,12+,11-"; cmd.arg("path").arg("./tests/test_linear.gfa").arg(path); cmd.assert().stdout(predicate::str::contains( ">13-,12+,11- AATCAAGGT ", )); Ok(()) } // test `gfatk path` // // As this GFA is circular, a legal path could loop forever. // But we will stop at 5 segments. // // H VN:Z:1.0 // S 1 AGCGTA ll:f:30.0 // S 2 TAACAG ll:f:30.0 // L 1 + 2 + 2M ec:i:1 // L 2 + 1 + 2M ec:i:1 // // // PATH: 1+ -> 2+ -> 1+ -> 2+ -> 1+ // 1 AGCGTA // 2 TAACAG // 1 AGCGTA // 2 TAACAG // 1 AGCGTA // P AGCGTAACAGCGTAACAGCGTA // #[test] fn test_gfa_path_circular() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; let path = "1+, 2+, 1+, 2+, 1+"; cmd.arg("path").arg("./tests/test_circular.gfa").arg(path); cmd.assert().stdout(predicate::str::contains( ">1+,2+,1+,2+,1+ AGCGTAACAGCGTAACAGCGTA ", )); Ok(()) } // add a quick test for this, as this was broken before. // a single segment (given it's in the GFA), will print the // segment in full. #[test] fn test_gfa_path_single_segment() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; let path = "11+"; cmd.arg("path").arg("./tests/test_linear.gfa").arg(path); cmd.assert().stdout(predicate::str::contains( ">11+ ACCTT ", )); Ok(()) } // Regarding issue #14 - obtain path sequences as specified by P lines // in a GFA. // Path 1: 11+,12-,13+ // Path 2: 13-,12+,11- (i.e. reverse comp of path 1) #[test] fn test_gfa_path_all() -> Result<(), Box> { let mut cmd = Command::cargo_bin("gfatk")?; let flag = "--all"; cmd.arg("path").arg("./tests/test_path_all.gfa").arg(flag); cmd.assert().stdout(predicate::str::contains( ">14 ACCTTGATT >15 AATCAAGGT ", )); Ok(()) }