#! /usr/bin/perl -w # Do a piece of a profmark benchmark, for phmmer searches by FPS # (family-pairwise-search; best E-value of all individual queries). # # This script is normally called by pmark_master.pl; its command line # syntax is tied to pmark_master.pl. # # Usage: x-phmmer-fps # Example: ./x-phmmer-fps ~/releases/hmmer-3.0/build-icc-mpi ~/src/hmmer testdir test.tbl pmark.msa test.fa test.out # # SRE, Sun Feb 7 08:54:28 2010 [Casa de Gatos] # SVN $Id$ BEGIN { $top_builddir = shift; $top_srcdir = shift; $wrkdir = shift; $tblfile = shift; $msafile = shift; $fafile = shift; $outfile = shift; } $phmmer = "$top_builddir/src/phmmer"; $opts = ""; if (! -d $top_builddir) { die "didn't find build directory $top_builddir"; } if (! -d $top_srcdir) { die "didn't find source directory $top_srcdir"; } if (! -x $phmmer) { die "didn't find executable $phmmer"; } if (! -e $wrkdir) { die "$wrkdir doesn't exist"; } open(OUTFILE,">$outfile") || die "failed to open $outfile"; open(TABLE, "$tblfile") || die "failed to open $tblfile"; MSA: while () { ($msaname) = split; %seen = (); %best_pval = (); %best_bitscore = (); `esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname`; if ($?) { print "FAILED: esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname\n"; next MSA; } # Extract a list of individual sequence names from the multiple alignment. $output = `esl-seqstat -a $wrkdir/$msaname.sto | grep "^=" | awk '{print \$2}'`; if ($?) { print "FAILED: esl-seqstat\n"; next MSA; } @qnames = split(/^/,$output); chop (@qnames); # Loop over each query; phmmer; accumulate best pval for each target foreach $qname (@qnames) { $output = `esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname`; if ($?) { print "FAILED: esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname\n"; next MSA; } `$phmmer --cpu 1 --tblout $wrkdir/$msaname.tmp $opts $wrkdir/$msaname.query.fa $fafile > /dev/null`; if ($?) { print "FAILED: $phmmer --tblout $msaname.tmp $opts $wrkdir/$msaname.query.fa $fafile\n"; next MSA; } open(OUTPUT, "$wrkdir/$msaname.tmp") || die "FAILED: to open $wrkdir/$msaname.tmp tabular output file"; while () { if (/^\#/) { next; } @fields = split(' ', $_, 7); $target = $fields[0]; $pval = $fields[4]; $bitscore = $fields[5]; if (! $seen{$target} || $pval < $best_pval{$target}) { $seen{$target} = 1; $best_pval{$target} = $pval; $best_bitscore{$target} = $bitscore; } } close OUTPUT; } # Append to the outfile. foreach $target (keys(%seen)) { printf OUTFILE "%g %.1f %s %s\n", $best_pval{$target}, $best_bitscore{$target}, $target, $msaname; } unlink "$wrkdir/$msaname.tmp"; unlink "$wrkdir/$msaname.query.fa"; unlink "$wrkdir/$msaname.sto"; } close TABLE; close OUTFILE;