#! /usr/bin/perl $top_builddir = shift; $top_srcdir = shift; $wrkdir = shift; $tblfile = shift; $msafile = shift; $fafile = shift; $outfile = shift; $phmmer = "$top_builddir/src/phmmer"; $searchopts = "-E 200 --cpu 1"; if (! -d $top_builddir) { die "didn't find build directory $top_builddir"; } if (! -d $top_srcdir) { die "didn't find source directory $top_srcdir"; } if (! -x $phmmer) { die "didn't find executable $jackhmmer"; } if (! -e $wrkdir) { die "$wrkdir doesn't exist"; } open(OUTFILE,">$outfile") || die "failed to open $outfile"; open(TABLE, "$tblfile") || die "failed to open $tblfile"; MSA: while () { ($msaname) = split; $cmd = "esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname"; $output = `$cmd`; if ($?) { print "FAILED: $cmd\n"; next MSA; } # Fetch the query MSA from the benchmark; tmp .sto file here $cmd = "esl-seqstat --amino -a $wrkdir/$msaname.sto | grep '^=' | awk '{print \$2}'"; $output = `$cmd`; if ($?) { print "FAILED: $cmd\n", next MSA; } # Extract list of indiv seq names. --amino for robustness, some msa's v. small @qnames = split(/^/,$output); chop (@qnames); $qname = $qnames[0]; $cmd = "esl-sfetch -o $wrkdir/$msaname.query $wrkdir/$msaname.sto $qname > /dev/null"; `$cmd`; if ($?) { print "FAILED: $cmd\n"; next MSA; } # Pick a single seq (first one) to tmp file; tmp .query file here $cmd = "$phmmer $searchopts --tblout $wrkdir/$msaname.tmp $wrkdir/$msaname.query $fafile > /dev/null"; `$cmd`; if ($?) { print "FAILED: $cmd\n"; next MSA; } # phmmer against benchmark db; tmp .tmp output file here if (! open(OUTPUT, "$wrkdir/$msaname.tmp")) { print "FAILED: to open $wrkdir/$msaname.tmp"; next MSA; } while () { if (/^\#/) { next; } @fields = split(' ', $_, 7); $target = $fields[0]; $pval = $fields[4]; $bitscore = $fields[5]; printf OUTFILE "%g %.1f %s %s\n", $pval, $bitscore, $target, $msaname; } close OUTPUT; unlink "$wrkdir/$msaname.tmp"; unlink "$wrkdir/$msaname.query"; unlink "$wrkdir/$msaname.sto"; } close TABLE; close OUTFILE;