{%- include "header" -%} {# Keep a blank line #} #----------------------------# # Usage #----------------------------# USAGE=" Usage: $0 [LEN_N50] [N_CONTIG] [LEN_SUM] Default values: LEN_N50 longer than 100000 N_CONTIG less than 1000 LEN_SUM longer than 1000000 $ bash n50.sh 100000 100 " if ! [ -z "$1" ]; then if ! [[ $1 =~ ^[0-9]+$ ]]; then echo >&2 "$USAGE" exit 1 fi fi LEN_N50=${1:-100000} N_CONTIG=${2:-1000} LEN_SUM=${3:-1000000} #----------------------------# # Run #----------------------------# log_warn n50.sh touch n50.tsv log_info Keep only the results in the list cat n50.tsv | (echo -e "name\tN50\tS\tC" && cat) | # Headers tsv-uniq | # keep the first header line tsv-filter -H --gt "N50:0" | # unfinished downloads keep-header -- tsv-join -f url.tsv -k 1 \ > tmp.tsv mv tmp.tsv n50.tsv log_info Calculate N50 not in the list cat url.tsv | tsv-join -f n50.tsv -k 1 -e | parallel --colsep '\t' --no-run-if-empty --linebuffer -k -j {{ parallel }} ' if [[ ! -e "{3}/{1}" ]]; then exit fi log_debug "{3}\t{1}" find "{3}/{1}" -type f -name "*_genomic.fna.gz" | grep -v "_from_" | # exclude CDS and rna xargs cat | faops n50 -H -S -C stdin | # do not display header (echo -e "{1}" && cat) | datamash transpose ' \ > tmp1.tsv # Combine new results with the old ones cat n50.tsv tmp1.tsv | tsv-uniq | keep-header -- sort \ > tmp2.tsv mv tmp2.tsv n50.tsv rm tmp*.tsv # Filter results with custom criteria cat n50.tsv | tsv-filter -H --ge "N50:${LEN_N50}" | tsv-filter -H --le "C:${N_CONTIG}" | tsv-filter -H --ge "S:${LEN_SUM}" \ > n50.pass.tsv log_info Done. exit 0