#! /usr/bin/perl -w # Do a piece of a profmark benchmark, for FASTA searches by FPS # (family-pairwise-search; best E-value of all individual queries). # # This script is normally called by pmark_master.pl; its command line # syntax is tied to pmark_master.pl. # # Usage: x-fasta-fps # Example: ./x-fasta-fps /usr/local/fasta-36x2 ~/src/hmmer testdir test.tbl pmark.msa test.fa test.out # # SRE, Tue Apr 20 10:14:39 2010 [Janelia] # SVN $Id$ BEGIN { $top_builddir = shift; $top_srcdir = shift; $wrkdir = shift; $tblfile = shift; $msafile = shift; $fafile = shift; $outfile = shift; } use lib "${top_srcdir}/easel/demotic"; use demotic_fasta; $fasta = "${top_builddir}/bin/fasta36"; $opts = "-q"; if (! -d $top_builddir) { die "didn't find BLAST build directory $top_builddir"; } if (! -d $top_srcdir) { die "didn't find H3 source directory $top_srcdir"; } if (! -x $fasta) { die "didn't find executable $fasta"; } if (! -e $wrkdir) { die "$wrkdir doesn't exist"; } open(OUTFILE,">$outfile") || die "failed to open $outfile"; open(TABLE, "$tblfile") || die "failed to open $tblfile"; MSA: while () { ($msaname) = split; %seen = (); %best_pval = (); %best_bitscore = (); `esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname`; if ($?) { print "FAILED: esl-afetch -o $wrkdir/$msaname.sto $msafile $msaname\n"; next MSA; } # Extract a list of individual sequence names from the multiple alignment. $output = `esl-seqstat -a $wrkdir/$msaname.sto | grep "^=" | awk '{print \$2}'`; if ($?) { print "FAILED: esl-seqstat -a $wrkdir/$msaname.sto\n"; next MSA; } @qnames = split(/^/,$output); chop (@qnames); # Loop over each query; blast; accumulate best pval for each target foreach $qname (@qnames) { $output = `esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname`; if ($?) { print "FAILED: esl-sfetch -o $wrkdir/$msaname.query.fa $wrkdir/$msaname.sto $qname\n"; next MSA; } if (! open(FASTA, "$fasta $opts $wrkdir/$msaname.query.fa $fafile |")) { print "FAILED: $fasta $opts $wrkdir/$msaname.query.fa $fafile |\n"; next MSA; } if (! demotic_fasta::parse(\*FASTA)) { print "FAILED: demotic parser for fasta output\n"; next MSA; } for ($i = 0; $i < $demotic_fasta::nhits; $i++) { $target = $demotic_fasta::hit_target[$i]; $pval = $demotic_fasta::hit_Eval[$i]; $bitscore = $demotic_fasta::hit_bitscore[$i]; if (! $seen{$target} || $pval < $best_pval{$target}) { $seen{$target} = 1; $best_pval{$target} = $pval; $best_bitscore{$target} = $bitscore; } } close FASTA; } # Append to the outfile. foreach $target (keys(%seen)) { printf OUTFILE "%g %.1f %s %s\n", $best_pval{$target}, $best_bitscore{$target}, $target, $msaname; } unlink "$wrkdir/$msaname.sto"; unlink "$wrkdir/$msaname.query.fa"; } close TABLE; close OUTFILE;