#!/bin/sh
set -e
tmp="$(mktemp -d)"
trap "rm -rf '$tmp'" EXIT TERM
# What to do if the XDG standard isn't there...
if [ -z "$XDG_CONFIG_HOME" ]; then
if [ -d "$HOME/Library/Preferences/" ]; then
config_default="$HOME/Library/Preferences/Unipept"
elif [ -d "$HOME/.config" ]; then
# why weren't the XDG variables set then?
config_default="$HOME/.config/unipept"
else
config_default="$HOME/.unipept"
fi
else
config_default="$XDG_CONFIG_HOME/unipept"
fi
USAGE="
Visualizing data with the UMGAP.
Usage: $0 -t [-r rank] ...
$0 -w
$0 -u ...
Where:
A (optionally GZIP-compressed) FASTA file of taxa.
Options:
-t Output a CSV frequency table on species rank.
-w Output an HTML webpage of an interactive visualization.
-u Print a shareable URL to a online interactive visualisation.
-c dir The configuration directory. Defaults to '$config_default'.
-r rank Set the rank for the CSV frequency table (default: species).
"
# =========================================================================== #
# Some functions.
# =========================================================================== #
# Logging
log() {
[ -z "$VERBOSE" ] && return
printf "log: %s\n" "$*" >&2
}
debug() {
[ -z "$VERBOSE" -a -z "$DEBUG" ] && return
printf "debug: %s\n" "$*" >&2
}
# print stuff to stderr and exits with fault
crash() {
debug "encountered error"
echo "$*" >&2
exit 1
}
# function to fetch the configuration directory
configdir=""
getconfigdir() {
if [ -n "$configdir" ]; then
echo "$configdir"
elif [ -d "$config_default" ]; then
echo "$config_default"
elif [ -d /etc/umgap ]; then
echo /etc/umgap
else
crash "No configuration directory found. Please run umgap-setup or use the '-c' argument."
fi
}
# =========================================================================== #
# Argument parsing.
# =========================================================================== #
debug "parsing the arguments"
rank="species"
while getopts c:r:wtu f; do
case "$f" in
c) configdir="$OPTARG" ;;
r) rank="$OPTARG" ;;
w) type="html" ;;
t) type="csv" ;;
u) type="url" ;;
\?) crash "$USAGE" '' ;;
esac
done
shift "$(( OPTIND - 1 ))"
[ "$#" -lt 1 ] && crash "$USAGE"
[ -z "$type" ] && crash "$USAGE"
[ "$type" = "html" -a "$#" -gt 1 ] && crash "$USAGE"
# =========================================================================== #
# Environmental checks.
# =========================================================================== #
debug "checking if umgap is installed"
if ! umgap -V > /dev/null; then
crash 'Cannot find the umgap executable. Please ensure it is installed and located in your $PATH.'
fi
debug "checking if we have a taxons file for the frequency table"
if [ "$type" = "csv" ]; then
versions="$(find -H "$(getconfigdir)" -mindepth 1 -maxdepth 1 \
-printf '%P\n' | sort -n)"
for candidate in $versions; do
[ ! -h "$(getconfigdir)/$candidate/taxons.tsv" ] && continue
version="$candidate"
done
[ -n "$version" ] || crash "No taxon table found for frequency counting. Please run umgap-setup."
debug "using version '$version'"
fi
# =========================================================================== #
# The actual visualization code
# =========================================================================== #
case "$type" in
url)
for file in "$@"; do
filetype="$(file --mime-type "$file")" || crash "Could not determine filetype of '$file'."
printf "%s: " "$file"
if [ "$filetype" != "${filetype%gzip}" ]; then
log "Inputfile '$file' is compressed"
zcat "$file"
else
cat "$file"
fi | umgap taxa2tree --url
done
;;
html)
umgap taxa2tree < "$1" ;;
csv)
inputfiles=""
for file in "$@"; do
filetype="$(file --mime-type "$file")" || crash "Could not determine filetype of '$file'."
filename="$(printf '%s' "$file" | tr -c '[:alnum:].-' '_')"
mkfifo "$tmp/$filename"
if [ "$filetype" != "${filetype%gzip}" ]; then
log "Inputfile '$file' is compressed"
zcat "$file" > "$tmp/$filename" &
else
cat "$file" > "$tmp/$filename" &
fi
inputfiles="$inputfiles $tmp/$filename"
done
umgap taxa2freq -r "$rank" "$(getconfigdir)/$version/taxons.tsv" $inputfiles \
| sed '1s_,[^,]*/_,_g'
;;
esac