{%- include "header" -%}
{# Keep a blank line #}
#----------------------------#
# Run
#----------------------------#
log_warn Protein/count.sh

#----------------------------#
# filtered species.tsv
#----------------------------#
log_info "Protein/species-f.tsv"
cat species.tsv |
{% for i in ins -%}
    tsv-join -f ../{{ i }} -k 1 |
{% endfor -%}
{% for i in not_ins -%}
    tsv-join -e -f ../{{ i }} -k 1 |
{% endfor -%}
    cat \
    > species-f.tsv

#----------------------------#
# Each species
#----------------------------#
log_info "Count each species"
cat species-f.tsv |
    tsv-select -f 2 |
    tsv-uniq |
while read SPECIES; do
    if [[ -f "${SPECIES}"/info.tsv ]]; then
        continue
    fi

    if [[ ! -f "${SPECIES}"/info.tsv ]]; then
        continue
    fi

    N_STRAIN=$(cat "${SPECIES}"/strains.tsv | wc -l)
    N_TOTAL=$(
        cat "${SPECIES}"/info.tsv |
            tsv-summarize -H --count |
            sed '1d'
        )
    N_DEDUP=$(
        cat "${SPECIES}"/info.tsv |
            tsv-summarize -H --unique-count id |
            sed '1d'
        )
    N_REP=$(
        cat "${SPECIES}"/info.tsv |
            tsv-summarize -H --unique-count rep |
            sed '1d'
        )

    printf "#item\tcount\n" \
        > "${SPECIES}"/counts.tsv

    printf "strain\t%s\n" "${N_STRAIN}" \
        >> "${SPECIES}"/counts.tsv

    printf "total\t%s\n" "${N_TOTAL}" \
        >> "${SPECIES}"/counts.tsv

    printf "dedup\t%s\n" "${N_DEDUP}" \
        >> "${SPECIES}"/counts.tsv

    printf "rep\t%s\n" "${N_REP}" \
        >> "${SPECIES}"/counts.tsv

done

#----------------------------#
# Total
#----------------------------#
log_info "Count total"
cat species-f.tsv |
    tsv-select -f 2 |
    tsv-uniq |
while read SPECIES; do
    if [[ ! -f "${SPECIES}"/counts.tsv ]]; then
        continue
    fi

    cat "${SPECIES}"/counts.tsv |
        datamash transpose |
        sed "s/^count/${SPECIES}/"
done |
    tsv-uniq \
    > counts.tsv

log_info Done.

exit 0