/* Routines for manipulating sequence alignment score matrices, * such as the BLOSUM and PAM matrices. * * Contents: * 1. The ESL_SCOREMATRIX object. * 2. Some classic score matrices. * 3. Deriving a score matrix probabilistically. * 4. Reading/writing matrices from/to files. * 5. Implicit probabilistic basis, I: given bg. * 6. Implicit probabilistic basis, II: bg unknown. [Yu/Altschul03,05] * 7. Experiment driver. * 8 Utility programs. * 9. Unit tests. * 10. Test driver. * 11. Example program. */ #include "esl_config.h" #include #include #include "easel.h" #include "esl_alphabet.h" #include "esl_composition.h" #include "esl_dmatrix.h" #include "esl_fileparser.h" #include "esl_rootfinder.h" #include "esl_ratematrix.h" #include "esl_vectorops.h" #include "esl_scorematrix.h" /***************************************************************** *# 1. The ESL_SCOREMATRIX object *****************************************************************/ /* Function: esl_scorematrix_Create() * Synopsis: Allocate and initialize an object. * * Purpose: Allocates a score matrix for alphabet , initializes * all scores to zero. * * Args: abc - pointer to digital alphabet * * Returns: a pointer to the new object. * * Throws: on allocation failure. */ ESL_SCOREMATRIX * esl_scorematrix_Create(const ESL_ALPHABET *abc) { ESL_SCOREMATRIX *S = NULL; int status; int i; ESL_ALLOC(S, sizeof(ESL_SCOREMATRIX)); S->s = NULL; S->K = abc->K; S->Kp = abc->Kp; S->isval = NULL; S->abc_r = abc; S->nc = 0; S->outorder = NULL; S->name = NULL; S->path = NULL; ESL_ALLOC(S->s, sizeof(int *) * abc->Kp); S->s[0] = NULL; ESL_ALLOC(S->isval, sizeof(char) * abc->Kp); for (i = 0; i < abc->Kp; i++) S->isval[i] = FALSE; ESL_ALLOC(S->outorder, sizeof(char) * (abc->Kp+1)); S->outorder[0] = '\0'; /* init to empty string. */ ESL_ALLOC(S->s[0], sizeof(int) * abc->Kp * abc->Kp); for (i = 1; i < abc->Kp; i++) S->s[i] = S->s[0] + abc->Kp * i; for (i = 0; i < abc->Kp*abc->Kp; i++) S->s[0][i] = 0; return S; ERROR: esl_scorematrix_Destroy(S); return NULL; } /* Function: esl_scorematrix_Copy() * Synopsis: Copy matrix to . * * Purpose: Copy score matrix into . Caller * has allocated for the same alphabet as * . * * Returns: on success. * * Throws: if isn't allocated for * the same alphabet as . * on allocation error. */ int esl_scorematrix_Copy(const ESL_SCOREMATRIX *src, ESL_SCOREMATRIX *dest) { int i,j; int status; if (src->abc_r->type != dest->abc_r->type || src->K != dest->K || src->Kp != dest->Kp) ESL_EXCEPTION(eslEINCOMPAT, "source and dest score matrix types don't match"); for (i = 0; i < src->Kp; i++) for (j = 0; j < src->Kp; j++) dest->s[i][j] = src->s[i][j]; for (i = 0; i < src->Kp; i++) dest->isval[i] = src->isval[i]; dest->nc = src->nc; for (i = 0; i < src->nc; i++) dest->outorder[i] = src->outorder[i]; dest->outorder[dest->nc] = '\0'; if ((status = esl_strdup(src->name, -1, &(dest->name))) != eslOK) return status; if ((status = esl_strdup(src->path, -1, &(dest->path))) != eslOK) return status; return eslOK; } /* Function: esl_scorematrix_Clone() * Synopsis: Allocate a duplicate of a matrix. * * Purpose: Allocates a new matrix and makes it a duplicate * of . Return a pointer to the new matrix. * * Throws: on allocation failure. */ ESL_SCOREMATRIX * esl_scorematrix_Clone(const ESL_SCOREMATRIX *S) { ESL_SCOREMATRIX *dup = NULL; if ((dup = esl_scorematrix_Create(S->abc_r)) == NULL) return NULL; if (esl_scorematrix_Copy(S, dup) != eslOK) { esl_scorematrix_Destroy(dup); return NULL; } return dup; } /* Function: esl_scorematrix_Compare() * Synopsis: Compare two matrices for equality. * * Purpose: Compares two score matrices. Returns if they * are identical, if they differ. Every aspect * of the two matrices is compared. * * The annotation (name, filename path) are not * compared; we may want to compare an internally * generated scorematrix to one read from a file. */ int esl_scorematrix_Compare(const ESL_SCOREMATRIX *S1, const ESL_SCOREMATRIX *S2) { int a,b; if (strcmp(S1->outorder, S2->outorder) != 0) return eslFAIL; if (S1->nc != S2->nc) return eslFAIL; for (a = 0; a < S1->nc; a++) if (S1->isval[a] != S2->isval[a]) return eslFAIL; for (a = 0; a < S1->Kp; a++) for (b = 0; b < S1->Kp; b++) if (S1->s[a][b] != S2->s[a][b]) return eslFAIL; return eslOK; } /* Function: esl_scorematrix_CompareCanon() * Synopsis: Compares scores of canonical residues for equality. * * Purpose: Compares the scores of canonical residues in * two score matrices and for equality. * Returns if they are identical, * if they differ. Peripheral aspects of the scoring matrices * having to do with noncanonical residues, output * order, and suchlike are ignored. */ int esl_scorematrix_CompareCanon(const ESL_SCOREMATRIX *S1, const ESL_SCOREMATRIX *S2) { int a,b; for (a = 0; a < S1->K; a++) for (b = 0; b < S1->K; b++) if (S1->s[a][b] != S2->s[a][b]) return eslFAIL; return eslOK; } /* Function: esl_scorematrix_Max() * Synopsis: Returns maximum value in score matrix. * * Purpose: Returns the maximum value in score matrix . */ int esl_scorematrix_Max(const ESL_SCOREMATRIX *S) { int i,j; int max = S->s[0][0]; for (i = 0; i < S->K; i++) for (j = 0; j < S->K; j++) if (S->s[i][j] > max) max = S->s[i][j]; return max; } /* Function: esl_scorematrix_Min() * Synopsis: Returns minimum value in score matrix. * * Purpose: Returns the minimum value in score matrix . */ int esl_scorematrix_Min(const ESL_SCOREMATRIX *S) { int i,j; int min = S->s[0][0]; for (i = 0; i < S->K; i++) for (j = 0; j < S->K; j++) if (S->s[i][j] < min) min = S->s[i][j]; return min; } /* Function: esl_scorematrix_IsSymmetric() * Synopsis: Returns for symmetric matrix. * * Purpose: Returns if matrix is symmetric, * or if it's not. */ int esl_scorematrix_IsSymmetric(const ESL_SCOREMATRIX *S) { int i,j; for (i = 0; i < S->K; i++) for (j = i; j < S->K; j++) if (S->s[i][j] != S->s[j][i]) return FALSE; return TRUE; } /* Function: esl_scorematrix_ExpectedScore() * Synopsis: Calculates the expected score of a matrix. * * Purpose: Calculates the expected score of a matrix , * given background frequencies and ; * return it in <*ret_E>. * * The expected score is defined as * $\sum_{ab} f_a f_b \sigma_{ab}$. * * The expected score is in whatever units the score matrix * is in. If you know $\lambda$, you can convert it to * units of bits ($\log 2$) by multiplying it by $\lambda / * \log 2$. * * Args: S - score matrix * fi - background frequencies $f_i$ (0..K-1) * fj - background frequencies $f_j$ (0..K-1) * ret_E - RETURN: expected score * * Returns: on success. */ int esl_scorematrix_ExpectedScore(ESL_SCOREMATRIX *S, double *fi, double *fj, double *ret_E) { double E = 0.; int a,b; for (a = 0; a < S->K; a++) for (b = 0; b < S->K; b++) E += fi[a] * fj[b] * (double) S->s[a][b]; *ret_E = E; return eslOK; } /* Function: esl_scorematrix_RelEntropy() * Synopsis: Calculates relative entropy of a matrix. * * Purpose: Calculates the relative entropy of score matrix in * bits, given its background distributions and and * its scale . * * The relative entropy is defined as $\sum_{ab} p_{ab} * \log_2 \frac{p_{ab}} {f_a f_b}$, the average score (in * bits) of homologous aligned sequences. In general it is * $\geq 0$ (and certainly so in the case when background * frequencies $f_a$ and $f_b$ are the marginals of the * $p_{ab}$ joint ptobabilities). * * Args: S - score matrix * fi - background freqs for sequence i * fj - background freqs for sequence j * lambda - scale factor $\lambda$ for * ret_D - RETURN: relative entropy. * * Returns: on success, and contains the relative * entropy. * * Throws: on allocation error. * if the implied $p_{ij}$'s don't sum to one, * probably indicating that was not the correct * for , , and . * In either exception, is returned as 0.0. */ int esl_scorematrix_RelEntropy(const ESL_SCOREMATRIX *S, const double *fi, const double *fj, double lambda, double *ret_D) { int status; double pij; double sum = 0.; int i,j; double D = 0; for (i = 0; i < S->K; i++) for (j = 0; j < S->K; j++) { pij = fi[i] * fj[j] * exp(lambda * (double) S->s[i][j]); sum += pij; if (pij > 0.) D += pij * log(pij / (fi[i] * fj[j])); } if (esl_DCompare(sum, 1.0, 1e-3) != eslOK) ESL_XEXCEPTION(eslEINVAL, "pij's don't sum to one (%.4f): bad lambda or bad bg?", sum); D /= eslCONST_LOG2; *ret_D = D; return eslOK; ERROR: *ret_D = 0.; return status; } /* Function: esl_scorematrix_JointToConditionalOnQuery() * Synopsis: Convert a joint probability matrix to conditional probs P(b|a) * * Purpose: Given a joint probability matrix

that has been calculated * by or * (or one that obeys the same conditions; see below), * convert the joint probabilities to conditional * probabilities , where is a residue in the target, * and is a residue in the query. * * $P(b \mid a) = P(ab) / P(a)$, where $P(a) = \sum_b P(ab)$. * * The value stored in mx[a][b]> is $P(b \mid a)$. * * All values in

involving the codes for gap, * nonresidue, and missing data (codes ,, and * ) are 0.0, not probabilities. Only rows/columns * are valid probability vectors. * * Returns: on success. * * Throws: (no abnormal error conditions) * * Xref: J9/87. */ int esl_scorematrix_JointToConditionalOnQuery(const ESL_ALPHABET *abc, ESL_DMATRIX *P) { int a,b; /* P(b|a) = P(ab) / P(a) * and P(a) = P(a,X), the value at [a][Kp-3] */ for (a = 0; a < abc->Kp-2; a++) for (b = 0; b < abc->Kp-2; b++) P->mx[a][b] = (P->mx[a][abc->Kp-3] == 0.0 ? 0.0 : P->mx[a][b] / P->mx[a][abc->Kp-3]); return eslOK; } /* Function: esl_scorematrix_Destroy() * Synopsis: Frees a matrix. * * Purpose: Frees a score matrix. */ void esl_scorematrix_Destroy(ESL_SCOREMATRIX *S) { if (S == NULL) return; if (S->s != NULL) { if (S->s[0] != NULL) free(S->s[0]); free(S->s); } if (S->isval != NULL) free(S->isval); if (S->outorder != NULL) free(S->outorder); if (S->name != NULL) free(S->name); if (S->path != NULL) free(S->path); free(S); return; } /*------------------- end, scorematrix object -------------------*/ /***************************************************************** *# 2. Some classic score matrices. *****************************************************************/ /* PAM30, PAM70, PAM120, PAM240, BLOSUM45, BLOSUM50, BLOSUM62, BLOSUM80, BLOSUM90 */ /* Standard matrices are reformatted to Easel static data by the UTILITY1 program; see below */ /* TODO: Instead of storing the classical low-precision versions of * these, we should recalculate each one from its original * probabilistic basis, and store it at higher integer precision, * allowing the Yu/Altschul procedure to work. If we do that, we might also store * lambda and background probabilities. */ #define eslAADIM 29 struct esl_scorematrix_aa_preload_s { char *name; int matrix[eslAADIM][eslAADIM]; }; static const struct esl_scorematrix_aa_preload_s ESL_SCOREMATRIX_AA_PRELOADS[] = { { "PAM30", { /* A C D E F G H I K L M N P Q R S T V W Y - B J Z O U X * ~ */ { 6, -6, -3, -2, -8, -2, -7, -5, -7, -6, -5, -4, -2, -4, -7, 0, -1, -2, -13, -8, 0, -3, 0, -3, 0, 0, -3, -17, 0, }, /* A */ { -6, 10, -14, -14, -13, -9, -7, -6, -14, -15, -13, -11, -8, -14, -8, -3, -8, -6, -15, -4, 0, -12, 0, -14, 0, 0, -9, -17, 0, }, /* C */ { -3, -14, 8, 2, -15, -3, -4, -7, -4, -12, -11, 2, -8, -2, -10, -4, -5, -8, -15, -11, 0, 6, 0, 1, 0, 0, -5, -17, 0, }, /* D */ { -2, -14, 2, 8, -14, -4, -5, -5, -4, -9, -7, -2, -5, 1, -9, -4, -6, -6, -17, -8, 0, 1, 0, 6, 0, 0, -5, -17, 0, }, /* E */ { -8, -13, -15, -14, 9, -9, -6, -2, -14, -3, -4, -9, -10, -13, -9, -6, -9, -8, -4, 2, 0, -10, 0, -13, 0, 0, -8, -17, 0, }, /* F */ { -2, -9, -3, -4, -9, 6, -9, -11, -7, -10, -8, -3, -6, -7, -9, -2, -6, -5, -15, -14, 0, -3, 0, -5, 0, 0, -5, -17, 0, }, /* G */ { -7, -7, -4, -5, -6, -9, 9, -9, -6, -6, -10, 0, -4, 1, -2, -6, -7, -6, -7, -3, 0, -1, 0, -1, 0, 0, -5, -17, 0, }, /* H */ { -5, -6, -7, -5, -2, -11, -9, 8, -6, -1, -1, -5, -8, -8, -5, -7, -2, 2, -14, -6, 0, -6, 0, -6, 0, 0, -5, -17, 0, }, /* I */ { -7, -14, -4, -4, -14, -7, -6, -6, 7, -8, -2, -1, -6, -3, 0, -4, -3, -9, -12, -9, 0, -2, 0, -4, 0, 0, -5, -17, 0, }, /* K */ { -6, -15, -12, -9, -3, -10, -6, -1, -8, 7, 1, -7, -7, -5, -8, -8, -7, -2, -6, -7, 0, -9, 0, -7, 0, 0, -6, -17, 0, }, /* L */ { -5, -13, -11, -7, -4, -8, -10, -1, -2, 1, 11, -9, -8, -4, -4, -5, -4, -1, -13, -11, 0, -10, 0, -5, 0, 0, -5, -17, 0, }, /* M */ { -4, -11, 2, -2, -9, -3, 0, -5, -1, -7, -9, 8, -6, -3, -6, 0, -2, -8, -8, -4, 0, 6, 0, -3, 0, 0, -3, -17, 0, }, /* N */ { -2, -8, -8, -5, -10, -6, -4, -8, -6, -7, -8, -6, 8, -3, -4, -2, -4, -6, -14, -13, 0, -7, 0, -4, 0, 0, -5, -17, 0, }, /* P */ { -4, -14, -2, 1, -13, -7, 1, -8, -3, -5, -4, -3, -3, 8, -2, -5, -5, -7, -13, -12, 0, -3, 0, 6, 0, 0, -5, -17, 0, }, /* Q */ { -7, -8, -10, -9, -9, -9, -2, -5, 0, -8, -4, -6, -4, -2, 8, -3, -6, -8, -2, -10, 0, -7, 0, -4, 0, 0, -6, -17, 0, }, /* R */ { 0, -3, -4, -4, -6, -2, -6, -7, -4, -8, -5, 0, -2, -5, -3, 6, 0, -6, -5, -7, 0, -1, 0, -5, 0, 0, -3, -17, 0, }, /* S */ { -1, -8, -5, -6, -9, -6, -7, -2, -3, -7, -4, -2, -4, -5, -6, 0, 7, -3, -13, -6, 0, -3, 0, -6, 0, 0, -4, -17, 0, }, /* T */ { -2, -6, -8, -6, -8, -5, -6, 2, -9, -2, -1, -8, -6, -7, -8, -6, -3, 7, -15, -7, 0, -8, 0, -6, 0, 0, -5, -17, 0, }, /* V */ { -13, -15, -15, -17, -4, -15, -7, -14, -12, -6, -13, -8, -14, -13, -2, -5, -13, -15, 13, -5, 0, -10, 0, -14, 0, 0, -11, -17, 0, }, /* W */ { -8, -4, -11, -8, 2, -14, -3, -6, -9, -7, -11, -4, -13, -12, -10, -7, -6, -7, -5, 10, 0, -6, 0, -9, 0, 0, -7, -17, 0, }, /* Y */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* - */ { -3, -12, 6, 1, -10, -3, -1, -6, -2, -9, -10, 6, -7, -3, -7, -1, -3, -8, -10, -6, 0, 6, 0, 0, 0, 0, -5, -17, 0, }, /* B */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* J */ { -3, -14, 1, 6, -13, -5, -1, -6, -4, -7, -5, -3, -4, 6, -4, -5, -6, -6, -14, -9, 0, 0, 0, 6, 0, 0, -5, -17, 0, }, /* Z */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* O */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* U */ { -3, -9, -5, -5, -8, -5, -5, -5, -5, -6, -5, -3, -5, -5, -6, -3, -4, -5, -11, -7, 0, -5, 0, -5, 0, 0, -5, -17, 0, }, /* X */ { -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, -17, 0, -17, 0, -17, 0, 0, -17, 1, 0, }, /* * */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* ~ */ }}, { "PAM70", { /* A C D E F G H I K L M N P Q R S T V W Y - B J Z O U X * ~ */ { 5, -4, -1, -1, -6, 0, -4, -2, -4, -4, -3, -2, 0, -2, -4, 1, 1, -1, -9, -5, 0, -1, 0, -1, 0, 0, -2, -11, 0, }, /* A */ { -4, 9, -9, -9, -8, -6, -5, -4, -9, -10, -9, -7, -5, -9, -5, -1, -5, -4, -11, -2, 0, -8, 0, -9, 0, 0, -6, -11, 0, }, /* C */ { -1, -9, 6, 3, -10, -1, -1, -5, -2, -8, -7, 3, -4, 0, -6, -1, -2, -5, -10, -7, 0, 5, 0, 2, 0, 0, -3, -11, 0, }, /* D */ { -1, -9, 3, 6, -9, -2, -2, -4, -2, -6, -4, 0, -3, 2, -5, -2, -3, -4, -11, -6, 0, 2, 0, 5, 0, 0, -3, -11, 0, }, /* E */ { -6, -8, -10, -9, 8, -7, -4, 0, -9, -1, -2, -6, -7, -9, -7, -4, -6, -5, -2, 4, 0, -7, 0, -9, 0, 0, -5, -11, 0, }, /* F */ { 0, -6, -1, -2, -7, 6, -6, -6, -5, -7, -6, -1, -3, -4, -6, 0, -3, -3, -10, -9, 0, -1, 0, -3, 0, 0, -3, -11, 0, }, /* G */ { -4, -5, -1, -2, -4, -6, 8, -6, -3, -4, -6, 1, -2, 2, 0, -3, -4, -4, -5, -1, 0, 0, 0, 1, 0, 0, -3, -11, 0, }, /* H */ { -2, -4, -5, -4, 0, -6, -6, 7, -4, 1, 1, -3, -5, -5, -3, -4, -1, 3, -9, -4, 0, -4, 0, -4, 0, 0, -3, -11, 0, }, /* I */ { -4, -9, -2, -2, -9, -5, -3, -4, 6, -5, 0, 0, -4, -1, 2, -2, -1, -6, -7, -7, 0, -1, 0, -2, 0, 0, -3, -11, 0, }, /* K */ { -4, -10, -8, -6, -1, -7, -4, 1, -5, 6, 2, -5, -5, -3, -6, -6, -4, 0, -4, -4, 0, -6, 0, -4, 0, 0, -4, -11, 0, }, /* L */ { -3, -9, -7, -4, -2, -6, -6, 1, 0, 2, 10, -5, -5, -2, -2, -3, -2, 0, -8, -7, 0, -6, 0, -3, 0, 0, -3, -11, 0, }, /* M */ { -2, -7, 3, 0, -6, -1, 1, -3, 0, -5, -5, 6, -3, -1, -3, 1, 0, -5, -6, -3, 0, 5, 0, -1, 0, 0, -2, -11, 0, }, /* N */ { 0, -5, -4, -3, -7, -3, -2, -5, -4, -5, -5, -3, 7, -1, -2, 0, -2, -3, -9, -9, 0, -4, 0, -2, 0, 0, -3, -11, 0, }, /* P */ { -2, -9, 0, 2, -9, -4, 2, -5, -1, -3, -2, -1, -1, 7, 0, -3, -3, -4, -8, -8, 0, -1, 0, 5, 0, 0, -2, -11, 0, }, /* Q */ { -4, -5, -6, -5, -7, -6, 0, -3, 2, -6, -2, -3, -2, 0, 8, -1, -4, -5, 0, -7, 0, -4, 0, -2, 0, 0, -3, -11, 0, }, /* R */ { 1, -1, -1, -2, -4, 0, -3, -4, -2, -6, -3, 1, 0, -3, -1, 5, 2, -3, -3, -5, 0, 0, 0, -2, 0, 0, -1, -11, 0, }, /* S */ { 1, -5, -2, -3, -6, -3, -4, -1, -1, -4, -2, 0, -2, -3, -4, 2, 6, -1, -8, -4, 0, -1, 0, -3, 0, 0, -2, -11, 0, }, /* T */ { -1, -4, -5, -4, -5, -3, -4, 3, -6, 0, 0, -5, -3, -4, -5, -3, -1, 6, -10, -5, 0, -5, 0, -4, 0, 0, -2, -11, 0, }, /* V */ { -9, -11, -10, -11, -2, -10, -5, -9, -7, -4, -8, -6, -9, -8, 0, -3, -8, -10, 13, -3, 0, -7, 0, -10, 0, 0, -7, -11, 0, }, /* W */ { -5, -2, -7, -6, 4, -9, -1, -4, -7, -4, -7, -3, -9, -8, -7, -5, -4, -5, -3, 9, 0, -4, 0, -7, 0, 0, -5, -11, 0, }, /* Y */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* - */ { -1, -8, 5, 2, -7, -1, 0, -4, -1, -6, -6, 5, -4, -1, -4, 0, -1, -5, -7, -4, 0, 5, 0, 1, 0, 0, -2, -11, 0, }, /* B */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* J */ { -1, -9, 2, 5, -9, -3, 1, -4, -2, -4, -3, -1, -2, 5, -2, -2, -3, -4, -10, -7, 0, 1, 0, 5, 0, 0, -3, -11, 0, }, /* Z */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* O */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* U */ { -2, -6, -3, -3, -5, -3, -3, -3, -3, -4, -3, -2, -3, -2, -3, -1, -2, -2, -7, -5, 0, -2, 0, -3, 0, 0, -3, -11, 0, }, /* X */ { -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, -11, 0, -11, 0, -11, 0, 0, -11, 1, 0, }, /* * */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* ~ */ }}, { "PAM120", { /* A C D E F G H I K L M N P Q R S T V W Y - B J Z O U X * ~ */ { 3, -3, 0, 0, -4, 1, -3, -1, -2, -3, -2, -1, 1, -1, -3, 1, 1, 0, -7, -4, 0, 0, 0, -1, 0, 0, -1, -8, 0, }, /* A */ { -3, 9, -7, -7, -6, -4, -4, -3, -7, -7, -6, -5, -4, -7, -4, 0, -3, -3, -8, -1, 0, -6, 0, -7, 0, 0, -4, -8, 0, }, /* C */ { 0, -7, 5, 3, -7, 0, 0, -3, -1, -5, -4, 2, -3, 1, -3, 0, -1, -3, -8, -5, 0, 4, 0, 3, 0, 0, -2, -8, 0, }, /* D */ { 0, -7, 3, 5, -7, -1, -1, -3, -1, -4, -3, 1, -2, 2, -3, -1, -2, -3, -8, -5, 0, 3, 0, 4, 0, 0, -1, -8, 0, }, /* E */ { -4, -6, -7, -7, 8, -5, -3, 0, -7, 0, -1, -4, -5, -6, -5, -3, -4, -3, -1, 4, 0, -5, 0, -6, 0, 0, -3, -8, 0, }, /* F */ { 1, -4, 0, -1, -5, 5, -4, -4, -3, -5, -4, 0, -2, -3, -4, 1, -1, -2, -8, -6, 0, 0, 0, -2, 0, 0, -2, -8, 0, }, /* G */ { -3, -4, 0, -1, -3, -4, 7, -4, -2, -3, -4, 2, -1, 3, 1, -2, -3, -3, -3, -1, 0, 1, 0, 1, 0, 0, -2, -8, 0, }, /* H */ { -1, -3, -3, -3, 0, -4, -4, 6, -3, 1, 1, -2, -3, -3, -2, -2, 0, 3, -6, -2, 0, -3, 0, -3, 0, 0, -1, -8, 0, }, /* I */ { -2, -7, -1, -1, -7, -3, -2, -3, 5, -4, 0, 1, -2, 0, 2, -1, -1, -4, -5, -5, 0, 0, 0, -1, 0, 0, -2, -8, 0, }, /* K */ { -3, -7, -5, -4, 0, -5, -3, 1, -4, 5, 3, -4, -3, -2, -4, -4, -3, 1, -3, -2, 0, -4, 0, -3, 0, 0, -2, -8, 0, }, /* L */ { -2, -6, -4, -3, -1, -4, -4, 1, 0, 3, 8, -3, -3, -1, -1, -2, -1, 1, -6, -4, 0, -4, 0, -2, 0, 0, -2, -8, 0, }, /* M */ { -1, -5, 2, 1, -4, 0, 2, -2, 1, -4, -3, 4, -2, 0, -1, 1, 0, -3, -4, -2, 0, 3, 0, 0, 0, 0, -1, -8, 0, }, /* N */ { 1, -4, -3, -2, -5, -2, -1, -3, -2, -3, -3, -2, 6, 0, -1, 1, -1, -2, -7, -6, 0, -2, 0, -1, 0, 0, -2, -8, 0, }, /* P */ { -1, -7, 1, 2, -6, -3, 3, -3, 0, -2, -1, 0, 0, 6, 1, -2, -2, -3, -6, -5, 0, 0, 0, 4, 0, 0, -1, -8, 0, }, /* Q */ { -3, -4, -3, -3, -5, -4, 1, -2, 2, -4, -1, -1, -1, 1, 6, -1, -2, -3, 1, -5, 0, -2, 0, -1, 0, 0, -2, -8, 0, }, /* R */ { 1, 0, 0, -1, -3, 1, -2, -2, -1, -4, -2, 1, 1, -2, -1, 3, 2, -2, -2, -3, 0, 0, 0, -1, 0, 0, -1, -8, 0, }, /* S */ { 1, -3, -1, -2, -4, -1, -3, 0, -1, -3, -1, 0, -1, -2, -2, 2, 4, 0, -6, -3, 0, 0, 0, -2, 0, 0, -1, -8, 0, }, /* T */ { 0, -3, -3, -3, -3, -2, -3, 3, -4, 1, 1, -3, -2, -3, -3, -2, 0, 5, -8, -3, 0, -3, 0, -3, 0, 0, -1, -8, 0, }, /* V */ { -7, -8, -8, -8, -1, -8, -3, -6, -5, -3, -6, -4, -7, -6, 1, -2, -6, -8, 12, -2, 0, -6, 0, -7, 0, 0, -5, -8, 0, }, /* W */ { -4, -1, -5, -5, 4, -6, -1, -2, -5, -2, -4, -2, -6, -5, -5, -3, -3, -3, -2, 8, 0, -3, 0, -5, 0, 0, -3, -8, 0, }, /* Y */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* - */ { 0, -6, 4, 3, -5, 0, 1, -3, 0, -4, -4, 3, -2, 0, -2, 0, 0, -3, -6, -3, 0, 4, 0, 2, 0, 0, -1, -8, 0, }, /* B */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* J */ { -1, -7, 3, 4, -6, -2, 1, -3, -1, -3, -2, 0, -1, 4, -1, -1, -2, -3, -7, -5, 0, 2, 0, 4, 0, 0, -1, -8, 0, }, /* Z */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* O */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* U */ { -1, -4, -2, -1, -3, -2, -2, -1, -2, -2, -2, -1, -2, -1, -2, -1, -1, -1, -5, -3, 0, -1, 0, -1, 0, 0, -2, -8, 0, }, /* X */ { -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, 0, -8, 0, -8, 0, 0, -8, 1, 0, }, /* * */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* ~ */ }}, { "PAM240", { /* A C D E F G H I K L M N P Q R S T V W Y - B J Z O U X * ~ */ { 2, -2, 0, 0, -4, 1, -1, -1, -1, -2, -1, 0, 1, 0, -2, 1, 1, 0, -6, -4, 0, 0, 0, 0, 0, 0, 0, -8, 0, }, /* A */ { -2, 12, -5, -6, -5, -4, -4, -2, -6, -6, -5, -4, -3, -6, -4, 0, -2, -2, -8, 0, 0, -5, 0, -6, 0, 0, -3, -8, 0, }, /* C */ { 0, -5, 4, 4, -6, 1, 1, -2, 0, -4, -3, 2, -1, 2, -1, 0, 0, -2, -7, -4, 0, 3, 0, 3, 0, 0, -1, -8, 0, }, /* D */ { 0, -6, 4, 4, -6, 0, 1, -2, 0, -3, -2, 1, -1, 3, -1, 0, 0, -2, -7, -4, 0, 3, 0, 3, 0, 0, -1, -8, 0, }, /* E */ { -4, -5, -6, -6, 9, -5, -2, 1, -5, 2, 0, -4, -5, -5, -5, -3, -3, -1, 0, 7, 0, -5, 0, -5, 0, 0, -2, -8, 0, }, /* F */ { 1, -4, 1, 0, -5, 5, -2, -3, -2, -4, -3, 0, -1, -1, -3, 1, 0, -1, -7, -5, 0, 0, 0, 0, 0, 0, -1, -8, 0, }, /* G */ { -1, -4, 1, 1, -2, -2, 7, -3, 0, -2, -2, 2, 0, 3, 2, -1, -1, -2, -3, 0, 0, 1, 0, 2, 0, 0, -1, -8, 0, }, /* H */ { -1, -2, -2, -2, 1, -3, -3, 5, -2, 2, 2, -2, -2, -2, -2, -1, 0, 4, -5, -1, 0, -2, 0, -2, 0, 0, -1, -8, 0, }, /* I */ { -1, -6, 0, 0, -5, -2, 0, -2, 5, -3, 0, 1, -1, 1, 3, 0, 0, -3, -4, -5, 0, 1, 0, 0, 0, 0, -1, -8, 0, }, /* K */ { -2, -6, -4, -3, 2, -4, -2, 2, -3, 6, 4, -3, -3, -2, -3, -3, -2, 2, -2, -1, 0, -4, 0, -3, 0, 0, -1, -8, 0, }, /* L */ { -1, -5, -3, -2, 0, -3, -2, 2, 0, 4, 7, -2, -2, -1, 0, -2, -1, 2, -4, -3, 0, -2, 0, -2, 0, 0, -1, -8, 0, }, /* M */ { 0, -4, 2, 1, -4, 0, 2, -2, 1, -3, -2, 2, -1, 1, 0, 1, 0, -2, -4, -2, 0, 2, 0, 1, 0, 0, 0, -8, 0, }, /* N */ { 1, -3, -1, -1, -5, -1, 0, -2, -1, -3, -2, -1, 6, 0, 0, 1, 0, -1, -6, -5, 0, -1, 0, 0, 0, 0, -1, -8, 0, }, /* P */ { 0, -6, 2, 3, -5, -1, 3, -2, 1, -2, -1, 1, 0, 4, 1, -1, -1, -2, -5, -4, 0, 1, 0, 3, 0, 0, -1, -8, 0, }, /* Q */ { -2, -4, -1, -1, -5, -3, 2, -2, 3, -3, 0, 0, 0, 1, 6, 0, -1, -3, 2, -4, 0, -1, 0, 0, 0, 0, -1, -8, 0, }, /* R */ { 1, 0, 0, 0, -3, 1, -1, -1, 0, -3, -2, 1, 1, -1, 0, 2, 1, -1, -3, -3, 0, 0, 0, 0, 0, 0, 0, -8, 0, }, /* S */ { 1, -2, 0, 0, -3, 0, -1, 0, 0, -2, -1, 0, 0, -1, -1, 1, 3, 0, -5, -3, 0, 0, 0, -1, 0, 0, 0, -8, 0, }, /* T */ { 0, -2, -2, -2, -1, -1, -2, 4, -3, 2, 2, -2, -1, -2, -3, -1, 0, 4, -6, -3, 0, -2, 0, -2, 0, 0, -1, -8, 0, }, /* V */ { -6, -8, -7, -7, 0, -7, -3, -5, -4, -2, -4, -4, -6, -5, 2, -3, -5, -6, 17, 0, 0, -5, 0, -6, 0, 0, -4, -8, 0, }, /* W */ { -4, 0, -4, -4, 7, -5, 0, -1, -5, -1, -3, -2, -5, -4, -4, -3, -3, -3, 0, 10, 0, -3, 0, -4, 0, 0, -2, -8, 0, }, /* Y */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* - */ { 0, -5, 3, 3, -5, 0, 1, -2, 1, -4, -2, 2, -1, 1, -1, 0, 0, -2, -5, -3, 0, 3, 0, 2, 0, 0, -1, -8, 0, }, /* B */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* J */ { 0, -6, 3, 3, -5, 0, 2, -2, 0, -3, -2, 1, 0, 3, 0, 0, -1, -2, -6, -4, 0, 2, 0, 3, 0, 0, -1, -8, 0, }, /* Z */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* O */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* U */ { 0, -3, -1, -1, -2, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, 0, 0, -1, -4, -2, 0, -1, 0, -1, 0, 0, -1, -8, 0, }, /* X */ { -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, 0, -8, 0, -8, 0, 0, -8, 1, 0, }, /* * */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* ~ */ }}, { "BLOSUM45", { /* A C D E F G H I K L M N P Q R S T V W Y - B J Z O U X * ~ */ { 5, -1, -2, -1, -2, 0, -2, -1, -1, -1, -1, -1, -1, -1, -2, 1, 0, 0, -2, -2, 0, -1, 0, -1, 0, 0, 0, -5, 0, }, /* A */ { -1, 12, -3, -3, -2, -3, -3, -3, -3, -2, -2, -2, -4, -3, -3, -1, -1, -1, -5, -3, 0, -2, 0, -3, 0, 0, -2, -5, 0, }, /* C */ { -2, -3, 7, 2, -4, -1, 0, -4, 0, -3, -3, 2, -1, 0, -1, 0, -1, -3, -4, -2, 0, 5, 0, 1, 0, 0, -1, -5, 0, }, /* D */ { -1, -3, 2, 6, -3, -2, 0, -3, 1, -2, -2, 0, 0, 2, 0, 0, -1, -3, -3, -2, 0, 1, 0, 4, 0, 0, -1, -5, 0, }, /* E */ { -2, -2, -4, -3, 8, -3, -2, 0, -3, 1, 0, -2, -3, -4, -2, -2, -1, 0, 1, 3, 0, -3, 0, -3, 0, 0, -1, -5, 0, }, /* F */ { 0, -3, -1, -2, -3, 7, -2, -4, -2, -3, -2, 0, -2, -2, -2, 0, -2, -3, -2, -3, 0, -1, 0, -2, 0, 0, -1, -5, 0, }, /* G */ { -2, -3, 0, 0, -2, -2, 10, -3, -1, -2, 0, 1, -2, 1, 0, -1, -2, -3, -3, 2, 0, 0, 0, 0, 0, 0, -1, -5, 0, }, /* H */ { -1, -3, -4, -3, 0, -4, -3, 5, -3, 2, 2, -2, -2, -2, -3, -2, -1, 3, -2, 0, 0, -3, 0, -3, 0, 0, -1, -5, 0, }, /* I */ { -1, -3, 0, 1, -3, -2, -1, -3, 5, -3, -1, 0, -1, 1, 3, -1, -1, -2, -2, -1, 0, 0, 0, 1, 0, 0, -1, -5, 0, }, /* K */ { -1, -2, -3, -2, 1, -3, -2, 2, -3, 5, 2, -3, -3, -2, -2, -3, -1, 1, -2, 0, 0, -3, 0, -2, 0, 0, -1, -5, 0, }, /* L */ { -1, -2, -3, -2, 0, -2, 0, 2, -1, 2, 6, -2, -2, 0, -1, -2, -1, 1, -2, 0, 0, -2, 0, -1, 0, 0, -1, -5, 0, }, /* M */ { -1, -2, 2, 0, -2, 0, 1, -2, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -2, 0, 4, 0, 0, 0, 0, -1, -5, 0, }, /* N */ { -1, -4, -1, 0, -3, -2, -2, -2, -1, -3, -2, -2, 9, -1, -2, -1, -1, -3, -3, -3, 0, -2, 0, -1, 0, 0, -1, -5, 0, }, /* P */ { -1, -3, 0, 2, -4, -2, 1, -2, 1, -2, 0, 0, -1, 6, 1, 0, -1, -3, -2, -1, 0, 0, 0, 4, 0, 0, -1, -5, 0, }, /* Q */ { -2, -3, -1, 0, -2, -2, 0, -3, 3, -2, -1, 0, -2, 1, 7, -1, -1, -2, -2, -1, 0, -1, 0, 0, 0, 0, -1, -5, 0, }, /* R */ { 1, -1, 0, 0, -2, 0, -1, -2, -1, -3, -2, 1, -1, 0, -1, 4, 2, -1, -4, -2, 0, 0, 0, 0, 0, 0, 0, -5, 0, }, /* S */ { 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 2, 5, 0, -3, -1, 0, 0, 0, -1, 0, 0, 0, -5, 0, }, /* T */ { 0, -1, -3, -3, 0, -3, -3, 3, -2, 1, 1, -3, -3, -3, -2, -1, 0, 5, -3, -1, 0, -3, 0, -3, 0, 0, -1, -5, 0, }, /* V */ { -2, -5, -4, -3, 1, -2, -3, -2, -2, -2, -2, -4, -3, -2, -2, -4, -3, -3, 15, 3, 0, -4, 0, -2, 0, 0, -2, -5, 0, }, /* W */ { -2, -3, -2, -2, 3, -3, 2, 0, -1, 0, 0, -2, -3, -1, -1, -2, -1, -1, 3, 8, 0, -2, 0, -2, 0, 0, -1, -5, 0, }, /* Y */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* - */ { -1, -2, 5, 1, -3, -1, 0, -3, 0, -3, -2, 4, -2, 0, -1, 0, 0, -3, -4, -2, 0, 4, 0, 2, 0, 0, -1, -5, 0, }, /* B */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* J */ { -1, -3, 1, 4, -3, -2, 0, -3, 1, -2, -1, 0, -1, 4, 0, 0, -1, -3, -2, -2, 0, 2, 0, 4, 0, 0, -1, -5, 0, }, /* Z */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* O */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* U */ { 0, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, -1, -2, -1, 0, -1, 0, -1, 0, 0, -1, -5, 0, }, /* X */ { -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, 0, -5, 0, -5, 0, 0, -5, 1, 0, }, /* * */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* ~ */ }}, { "BLOSUM50", { /* A C D E F G H I K L M N P Q R S T V W Y - B J Z O U X * ~ */ { 5, -1, -2, -1, -3, 0, -2, -1, -1, -2, -1, -1, -1, -1, -2, 1, 0, 0, -3, -2, 0, -2, 0, -1, 0, 0, -1, -5, 0, }, /* A */ { -1, 13, -4, -3, -2, -3, -3, -2, -3, -2, -2, -2, -4, -3, -4, -1, -1, -1, -5, -3, 0, -3, 0, -3, 0, 0, -2, -5, 0, }, /* C */ { -2, -4, 8, 2, -5, -1, -1, -4, -1, -4, -4, 2, -1, 0, -2, 0, -1, -4, -5, -3, 0, 5, 0, 1, 0, 0, -1, -5, 0, }, /* D */ { -1, -3, 2, 6, -3, -3, 0, -4, 1, -3, -2, 0, -1, 2, 0, -1, -1, -3, -3, -2, 0, 1, 0, 5, 0, 0, -1, -5, 0, }, /* E */ { -3, -2, -5, -3, 8, -4, -1, 0, -4, 1, 0, -4, -4, -4, -3, -3, -2, -1, 1, 4, 0, -4, 0, -4, 0, 0, -2, -5, 0, }, /* F */ { 0, -3, -1, -3, -4, 8, -2, -4, -2, -4, -3, 0, -2, -2, -3, 0, -2, -4, -3, -3, 0, -1, 0, -2, 0, 0, -2, -5, 0, }, /* G */ { -2, -3, -1, 0, -1, -2, 10, -4, 0, -3, -1, 1, -2, 1, 0, -1, -2, -4, -3, 2, 0, 0, 0, 0, 0, 0, -1, -5, 0, }, /* H */ { -1, -2, -4, -4, 0, -4, -4, 5, -3, 2, 2, -3, -3, -3, -4, -3, -1, 4, -3, -1, 0, -4, 0, -3, 0, 0, -1, -5, 0, }, /* I */ { -1, -3, -1, 1, -4, -2, 0, -3, 6, -3, -2, 0, -1, 2, 3, 0, -1, -3, -3, -2, 0, 0, 0, 1, 0, 0, -1, -5, 0, }, /* K */ { -2, -2, -4, -3, 1, -4, -3, 2, -3, 5, 3, -4, -4, -2, -3, -3, -1, 1, -2, -1, 0, -4, 0, -3, 0, 0, -1, -5, 0, }, /* L */ { -1, -2, -4, -2, 0, -3, -1, 2, -2, 3, 7, -2, -3, 0, -2, -2, -1, 1, -1, 0, 0, -3, 0, -1, 0, 0, -1, -5, 0, }, /* M */ { -1, -2, 2, 0, -4, 0, 1, -3, 0, -4, -2, 7, -2, 0, -1, 1, 0, -3, -4, -2, 0, 4, 0, 0, 0, 0, -1, -5, 0, }, /* N */ { -1, -4, -1, -1, -4, -2, -2, -3, -1, -4, -3, -2, 10, -1, -3, -1, -1, -3, -4, -3, 0, -2, 0, -1, 0, 0, -2, -5, 0, }, /* P */ { -1, -3, 0, 2, -4, -2, 1, -3, 2, -2, 0, 0, -1, 7, 1, 0, -1, -3, -1, -1, 0, 0, 0, 4, 0, 0, -1, -5, 0, }, /* Q */ { -2, -4, -2, 0, -3, -3, 0, -4, 3, -3, -2, -1, -3, 1, 7, -1, -1, -3, -3, -1, 0, -1, 0, 0, 0, 0, -1, -5, 0, }, /* R */ { 1, -1, 0, -1, -3, 0, -1, -3, 0, -3, -2, 1, -1, 0, -1, 5, 2, -2, -4, -2, 0, 0, 0, 0, 0, 0, -1, -5, 0, }, /* S */ { 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 2, 5, 0, -3, -2, 0, 0, 0, -1, 0, 0, 0, -5, 0, }, /* T */ { 0, -1, -4, -3, -1, -4, -4, 4, -3, 1, 1, -3, -3, -3, -3, -2, 0, 5, -3, -1, 0, -4, 0, -3, 0, 0, -1, -5, 0, }, /* V */ { -3, -5, -5, -3, 1, -3, -3, -3, -3, -2, -1, -4, -4, -1, -3, -4, -3, -3, 15, 2, 0, -5, 0, -2, 0, 0, -3, -5, 0, }, /* W */ { -2, -3, -3, -2, 4, -3, 2, -1, -2, -1, 0, -2, -3, -1, -1, -2, -2, -1, 2, 8, 0, -3, 0, -2, 0, 0, -1, -5, 0, }, /* Y */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* - */ { -2, -3, 5, 1, -4, -1, 0, -4, 0, -4, -3, 4, -2, 0, -1, 0, 0, -4, -5, -3, 0, 5, 0, 2, 0, 0, -1, -5, 0, }, /* B */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* J */ { -1, -3, 1, 5, -4, -2, 0, -3, 1, -3, -1, 0, -1, 4, 0, 0, -1, -3, -2, -2, 0, 2, 0, 5, 0, 0, -1, -5, 0, }, /* Z */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* O */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* U */ { -1, -2, -1, -1, -2, -2, -1, -1, -1, -1, -1, -1, -2, -1, -1, -1, 0, -1, -3, -1, 0, -1, 0, -1, 0, 0, -1, -5, 0, }, /* X */ { -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, 0, -5, 0, -5, 0, 0, -5, 1, 0, }, /* * */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* ~ */ }}, { "BLOSUM62", { /* A C D E F G H I K L M N P Q R S T V W Y - B J Z O U X * ~ */ { 4, 0, -2, -1, -2, 0, -2, -1, -1, -1, -1, -2, -1, -1, -1, 1, 0, 0, -3, -2, 0, -2, 0, -1, 0, 0, 0, -4, 0, }, /* A */ { 0, 9, -3, -4, -2, -3, -3, -1, -3, -1, -1, -3, -3, -3, -3, -1, -1, -1, -2, -2, 0, -3, 0, -3, 0, 0, -2, -4, 0, }, /* C */ { -2, -3, 6, 2, -3, -1, -1, -3, -1, -4, -3, 1, -1, 0, -2, 0, -1, -3, -4, -3, 0, 4, 0, 1, 0, 0, -1, -4, 0, }, /* D */ { -1, -4, 2, 5, -3, -2, 0, -3, 1, -3, -2, 0, -1, 2, 0, 0, -1, -2, -3, -2, 0, 1, 0, 4, 0, 0, -1, -4, 0, }, /* E */ { -2, -2, -3, -3, 6, -3, -1, 0, -3, 0, 0, -3, -4, -3, -3, -2, -2, -1, 1, 3, 0, -3, 0, -3, 0, 0, -1, -4, 0, }, /* F */ { 0, -3, -1, -2, -3, 6, -2, -4, -2, -4, -3, 0, -2, -2, -2, 0, -2, -3, -2, -3, 0, -1, 0, -2, 0, 0, -1, -4, 0, }, /* G */ { -2, -3, -1, 0, -1, -2, 8, -3, -1, -3, -2, 1, -2, 0, 0, -1, -2, -3, -2, 2, 0, 0, 0, 0, 0, 0, -1, -4, 0, }, /* H */ { -1, -1, -3, -3, 0, -4, -3, 4, -3, 2, 1, -3, -3, -3, -3, -2, -1, 3, -3, -1, 0, -3, 0, -3, 0, 0, -1, -4, 0, }, /* I */ { -1, -3, -1, 1, -3, -2, -1, -3, 5, -2, -1, 0, -1, 1, 2, 0, -1, -2, -3, -2, 0, 0, 0, 1, 0, 0, -1, -4, 0, }, /* K */ { -1, -1, -4, -3, 0, -4, -3, 2, -2, 4, 2, -3, -3, -2, -2, -2, -1, 1, -2, -1, 0, -4, 0, -3, 0, 0, -1, -4, 0, }, /* L */ { -1, -1, -3, -2, 0, -3, -2, 1, -1, 2, 5, -2, -2, 0, -1, -1, -1, 1, -1, -1, 0, -3, 0, -1, 0, 0, -1, -4, 0, }, /* M */ { -2, -3, 1, 0, -3, 0, 1, -3, 0, -3, -2, 6, -2, 0, 0, 1, 0, -3, -4, -2, 0, 3, 0, 0, 0, 0, -1, -4, 0, }, /* N */ { -1, -3, -1, -1, -4, -2, -2, -3, -1, -3, -2, -2, 7, -1, -2, -1, -1, -2, -4, -3, 0, -2, 0, -1, 0, 0, -2, -4, 0, }, /* P */ { -1, -3, 0, 2, -3, -2, 0, -3, 1, -2, 0, 0, -1, 5, 1, 0, -1, -2, -2, -1, 0, 0, 0, 3, 0, 0, -1, -4, 0, }, /* Q */ { -1, -3, -2, 0, -3, -2, 0, -3, 2, -2, -1, 0, -2, 1, 5, -1, -1, -3, -3, -2, 0, -1, 0, 0, 0, 0, -1, -4, 0, }, /* R */ { 1, -1, 0, 0, -2, 0, -1, -2, 0, -2, -1, 1, -1, 0, -1, 4, 1, -2, -3, -2, 0, 0, 0, 0, 0, 0, 0, -4, 0, }, /* S */ { 0, -1, -1, -1, -2, -2, -2, -1, -1, -1, -1, 0, -1, -1, -1, 1, 5, 0, -2, -2, 0, -1, 0, -1, 0, 0, 0, -4, 0, }, /* T */ { 0, -1, -3, -2, -1, -3, -3, 3, -2, 1, 1, -3, -2, -2, -3, -2, 0, 4, -3, -1, 0, -3, 0, -2, 0, 0, -1, -4, 0, }, /* V */ { -3, -2, -4, -3, 1, -2, -2, -3, -3, -2, -1, -4, -4, -2, -3, -3, -2, -3, 11, 2, 0, -4, 0, -3, 0, 0, -2, -4, 0, }, /* W */ { -2, -2, -3, -2, 3, -3, 2, -1, -2, -1, -1, -2, -3, -1, -2, -2, -2, -1, 2, 7, 0, -3, 0, -2, 0, 0, -1, -4, 0, }, /* Y */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* - */ { -2, -3, 4, 1, -3, -1, 0, -3, 0, -4, -3, 3, -2, 0, -1, 0, -1, -3, -4, -3, 0, 4, 0, 1, 0, 0, -1, -4, 0, }, /* B */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* J */ { -1, -3, 1, 4, -3, -2, 0, -3, 1, -3, -1, 0, -1, 3, 0, 0, -1, -2, -3, -2, 0, 1, 0, 4, 0, 0, -1, -4, 0, }, /* Z */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* O */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* U */ { 0, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, 0, 0, -1, -2, -1, 0, -1, 0, -1, 0, 0, -1, -4, 0, }, /* X */ { -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 0, -4, 0, -4, 0, 0, -4, 1, 0, }, /* * */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* ~ */ }}, { "BLOSUM80", { /* A C D E F G H I K L M N P Q R S T V W Y - B J Z O U X * ~ */ { 7, -1, -3, -2, -4, 0, -3, -3, -1, -3, -2, -3, -1, -2, -3, 2, 0, -1, -5, -4, 0, -3, 0, -2, 0, 0, -1, -8, 0, }, /* A */ { -1, 13, -7, -7, -4, -6, -7, -2, -6, -3, -3, -5, -6, -5, -6, -2, -2, -2, -5, -5, 0, -6, 0, -7, 0, 0, -4, -8, 0, }, /* C */ { -3, -7, 10, 2, -6, -3, -2, -7, -2, -7, -6, 2, -3, -1, -3, -1, -2, -6, -8, -6, 0, 6, 0, 1, 0, 0, -3, -8, 0, }, /* D */ { -2, -7, 2, 8, -6, -4, 0, -6, 1, -6, -4, -1, -2, 3, -1, -1, -2, -4, -6, -5, 0, 1, 0, 6, 0, 0, -2, -8, 0, }, /* E */ { -4, -4, -6, -6, 10, -6, -2, -1, -5, 0, 0, -6, -6, -5, -5, -4, -4, -2, 0, 4, 0, -6, 0, -6, 0, 0, -3, -8, 0, }, /* F */ { 0, -6, -3, -4, -6, 9, -4, -7, -3, -7, -5, -1, -5, -4, -4, -1, -3, -6, -6, -6, 0, -2, 0, -4, 0, 0, -3, -8, 0, }, /* G */ { -3, -7, -2, 0, -2, -4, 12, -6, -1, -5, -4, 1, -4, 1, 0, -2, -3, -5, -4, 3, 0, -1, 0, 0, 0, 0, -2, -8, 0, }, /* H */ { -3, -2, -7, -6, -1, -7, -6, 7, -5, 2, 2, -6, -5, -5, -5, -4, -2, 4, -5, -3, 0, -6, 0, -6, 0, 0, -2, -8, 0, }, /* I */ { -1, -6, -2, 1, -5, -3, -1, -5, 8, -4, -3, 0, -2, 2, 3, -1, -1, -4, -6, -4, 0, -1, 0, 1, 0, 0, -2, -8, 0, }, /* K */ { -3, -3, -7, -6, 0, -7, -5, 2, -4, 6, 3, -6, -5, -4, -4, -4, -3, 1, -4, -2, 0, -7, 0, -5, 0, 0, -2, -8, 0, }, /* L */ { -2, -3, -6, -4, 0, -5, -4, 2, -3, 3, 9, -4, -4, -1, -3, -3, -1, 1, -3, -3, 0, -5, 0, -3, 0, 0, -2, -8, 0, }, /* M */ { -3, -5, 2, -1, -6, -1, 1, -6, 0, -6, -4, 9, -4, 0, -1, 1, 0, -5, -7, -4, 0, 5, 0, -1, 0, 0, -2, -8, 0, }, /* N */ { -1, -6, -3, -2, -6, -5, -4, -5, -2, -5, -4, -4, 12, -3, -3, -2, -3, -4, -7, -6, 0, -4, 0, -2, 0, 0, -3, -8, 0, }, /* P */ { -2, -5, -1, 3, -5, -4, 1, -5, 2, -4, -1, 0, -3, 9, 1, -1, -1, -4, -4, -3, 0, -1, 0, 5, 0, 0, -2, -8, 0, }, /* Q */ { -3, -6, -3, -1, -5, -4, 0, -5, 3, -4, -3, -1, -3, 1, 9, -2, -2, -4, -5, -4, 0, -2, 0, 0, 0, 0, -2, -8, 0, }, /* R */ { 2, -2, -1, -1, -4, -1, -2, -4, -1, -4, -3, 1, -2, -1, -2, 7, 2, -3, -6, -3, 0, 0, 0, -1, 0, 0, -1, -8, 0, }, /* S */ { 0, -2, -2, -2, -4, -3, -3, -2, -1, -3, -1, 0, -3, -1, -2, 2, 8, 0, -5, -3, 0, -1, 0, -2, 0, 0, -1, -8, 0, }, /* T */ { -1, -2, -6, -4, -2, -6, -5, 4, -4, 1, 1, -5, -4, -4, -4, -3, 0, 7, -5, -3, 0, -6, 0, -4, 0, 0, -2, -8, 0, }, /* V */ { -5, -5, -8, -6, 0, -6, -4, -5, -6, -4, -3, -7, -7, -4, -5, -6, -5, -5, 16, 3, 0, -8, 0, -5, 0, 0, -5, -8, 0, }, /* W */ { -4, -5, -6, -5, 4, -6, 3, -3, -4, -2, -3, -4, -6, -3, -4, -3, -3, -3, 3, 11, 0, -5, 0, -4, 0, 0, -3, -8, 0, }, /* Y */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* - */ { -3, -6, 6, 1, -6, -2, -1, -6, -1, -7, -5, 5, -4, -1, -2, 0, -1, -6, -8, -5, 0, 6, 0, 0, 0, 0, -3, -8, 0, }, /* B */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* J */ { -2, -7, 1, 6, -6, -4, 0, -6, 1, -5, -3, -1, -2, 5, 0, -1, -2, -4, -5, -4, 0, 0, 0, 6, 0, 0, -1, -8, 0, }, /* Z */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* O */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* U */ { -1, -4, -3, -2, -3, -3, -2, -2, -2, -2, -2, -2, -3, -2, -2, -1, -1, -2, -5, -3, 0, -3, 0, -1, 0, 0, -2, -8, 0, }, /* X */ { -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, -8, 0, -8, 0, -8, 0, 0, -8, 1, 0, }, /* * */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* ~ */ }}, { "BLOSUM90", { /* A C D E F G H I K L M N P Q R S T V W Y - B J Z O U X * ~ */ { 5, -1, -3, -1, -3, 0, -2, -2, -1, -2, -2, -2, -1, -1, -2, 1, 0, -1, -4, -3, 0, -2, 0, -1, 0, 0, -1, -6, 0, }, /* A */ { -1, 9, -5, -6, -3, -4, -5, -2, -4, -2, -2, -4, -4, -4, -5, -2, -2, -2, -4, -4, 0, -4, 0, -5, 0, 0, -3, -6, 0, }, /* C */ { -3, -5, 7, 1, -5, -2, -2, -5, -1, -5, -4, 1, -3, -1, -3, -1, -2, -5, -6, -4, 0, 4, 0, 0, 0, 0, -2, -6, 0, }, /* D */ { -1, -6, 1, 6, -5, -3, -1, -4, 0, -4, -3, -1, -2, 2, -1, -1, -1, -3, -5, -4, 0, 0, 0, 4, 0, 0, -2, -6, 0, }, /* E */ { -3, -3, -5, -5, 7, -5, -2, -1, -4, 0, -1, -4, -4, -4, -4, -3, -3, -2, 0, 3, 0, -4, 0, -4, 0, 0, -2, -6, 0, }, /* F */ { 0, -4, -2, -3, -5, 6, -3, -5, -2, -5, -4, -1, -3, -3, -3, -1, -3, -5, -4, -5, 0, -2, 0, -3, 0, 0, -2, -6, 0, }, /* G */ { -2, -5, -2, -1, -2, -3, 8, -4, -1, -4, -3, 0, -3, 1, 0, -2, -2, -4, -3, 1, 0, -1, 0, 0, 0, 0, -2, -6, 0, }, /* H */ { -2, -2, -5, -4, -1, -5, -4, 5, -4, 1, 1, -4, -4, -4, -4, -3, -1, 3, -4, -2, 0, -5, 0, -4, 0, 0, -2, -6, 0, }, /* I */ { -1, -4, -1, 0, -4, -2, -1, -4, 6, -3, -2, 0, -2, 1, 2, -1, -1, -3, -5, -3, 0, -1, 0, 1, 0, 0, -1, -6, 0, }, /* K */ { -2, -2, -5, -4, 0, -5, -4, 1, -3, 5, 2, -4, -4, -3, -3, -3, -2, 0, -3, -2, 0, -5, 0, -4, 0, 0, -2, -6, 0, }, /* L */ { -2, -2, -4, -3, -1, -4, -3, 1, -2, 2, 7, -3, -3, 0, -2, -2, -1, 0, -2, -2, 0, -4, 0, -2, 0, 0, -1, -6, 0, }, /* M */ { -2, -4, 1, -1, -4, -1, 0, -4, 0, -4, -3, 7, -3, 0, -1, 0, 0, -4, -5, -3, 0, 4, 0, -1, 0, 0, -2, -6, 0, }, /* N */ { -1, -4, -3, -2, -4, -3, -3, -4, -2, -4, -3, -3, 8, -2, -3, -2, -2, -3, -5, -4, 0, -3, 0, -2, 0, 0, -2, -6, 0, }, /* P */ { -1, -4, -1, 2, -4, -3, 1, -4, 1, -3, 0, 0, -2, 7, 1, -1, -1, -3, -3, -3, 0, -1, 0, 4, 0, 0, -1, -6, 0, }, /* Q */ { -2, -5, -3, -1, -4, -3, 0, -4, 2, -3, -2, -1, -3, 1, 6, -1, -2, -3, -4, -3, 0, -2, 0, 0, 0, 0, -2, -6, 0, }, /* R */ { 1, -2, -1, -1, -3, -1, -2, -3, -1, -3, -2, 0, -2, -1, -1, 5, 1, -2, -4, -3, 0, 0, 0, -1, 0, 0, -1, -6, 0, }, /* S */ { 0, -2, -2, -1, -3, -3, -2, -1, -1, -2, -1, 0, -2, -1, -2, 1, 6, -1, -4, -2, 0, -1, 0, -1, 0, 0, -1, -6, 0, }, /* T */ { -1, -2, -5, -3, -2, -5, -4, 3, -3, 0, 0, -4, -3, -3, -3, -2, -1, 5, -3, -3, 0, -4, 0, -3, 0, 0, -2, -6, 0, }, /* V */ { -4, -4, -6, -5, 0, -4, -3, -4, -5, -3, -2, -5, -5, -3, -4, -4, -4, -3, 11, 2, 0, -6, 0, -4, 0, 0, -3, -6, 0, }, /* W */ { -3, -4, -4, -4, 3, -5, 1, -2, -3, -2, -2, -3, -4, -3, -3, -3, -2, -3, 2, 8, 0, -4, 0, -3, 0, 0, -2, -6, 0, }, /* Y */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* - */ { -2, -4, 4, 0, -4, -2, -1, -5, -1, -5, -4, 4, -3, -1, -2, 0, -1, -4, -6, -4, 0, 4, 0, 0, 0, 0, -2, -6, 0, }, /* B */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* J */ { -1, -5, 0, 4, -4, -3, 0, -4, 1, -4, -2, -1, -2, 4, 0, -1, -1, -3, -4, -3, 0, 0, 0, 4, 0, 0, -1, -6, 0, }, /* Z */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* O */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* U */ { -1, -3, -2, -2, -2, -2, -2, -2, -1, -2, -1, -2, -2, -1, -2, -1, -1, -2, -3, -2, 0, -2, 0, -1, 0, 0, -2, -6, 0, }, /* X */ { -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, 0, -6, 0, -6, 0, 0, -6, 1, 0, }, /* * */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /* ~ */ }}, }; #define eslNTDIM 18 struct esl_scorematrix_nt_preload_s { char *name; int matrix[eslNTDIM][eslNTDIM]; }; /* "DNA1" matrix * * Travis Wheeler created the "DNA1" custom matrix for nhmmer. It's * derived from the DNA prior (see ), by * computing mean posterior joint probabilities p_ij for a single * observed count of each residue, assuming uniform background, and * symmetricizing the result by taking means; then calling * with lambda = 0.02. * * The p_ij matrix was: * A C G T * 0.143 0.033 0.037 0.037 A * 0.033 0.136 0.029 0.044 C * 0.037 0.029 0.157 0.034 G * 0.037 0.044 0.034 0.136 T * * Travis estimated the DNA prior from a subset of Rfam 10.0 seed * alignments, based on a procedure from Eric Nawrocki: remove * columns with >50% gaps, collect weighted counts, and estimate * a four-component Dirichlet mixture. * * [xref email from Travis 8/21/2017] * */ static const struct esl_scorematrix_nt_preload_s ESL_SCOREMATRIX_NT_PRELOADS[] = { { "DNA1", { /* A C G T - R Y M K S W H B V D N * ~ */ { 41, -32, -26, -26, 0, 18, -29, 17, -26, -29, 18, 6, -28, 6, 7, 0, -38, 0, }, /*A*/ { -32, 39, -38, -17, 0, -35, 18, 15, -26, 14, -24, 6, 6, 3, -28, -1, -38, 0, }, /*C*/ { -26, -38, 46, -31, 0, 22, -34, -32, 21, 20, -29, -32, 8, 9, 10, 1, -38, 0, }, /*G*/ { -26, -17, -31, 39, 0, -28, 18, -21, 15, -23, 16, 7, 7, -24, 5, 0, -38, 0, }, /*T*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /*-*/ { 18, -35, 22, -28, 0, 20, -32, -2, 3, 1, 0, -9, -7, 7, 8, 1, -38, 0, }, /*R*/ { -29, 18, -34, 18, 0, -32, 18, 0, -1, -1, 0, 7, 6, -9, -9, -1, -38, 0, }, /*Y*/ { 17, 15, -32, -21, 0, -2, 0, 16, -26, -3, 1, 6, -8, 4, -7, -1, -38, 0, }, /*M*/ { -26, -26, 21, 15, 0, 3, -1, -26, 18, 3, -1, -8, 7, -5, 7, 1, -38, 0, }, /*K*/ { -29, 14, 20, -23, 0, 1, -1, -3, 3, 17, -26, -9, 7, 6, -6, 0, -38, 0, }, /*S*/ { 18, -24, -29, 16, 0, 0, 0, 1, -1, -26, 17, 7, -8, -7, 6, 0, -38, 0, }, /*W*/ { 6, 6, -32, 7, 0, -9, 7, 6, -8, -9, 7, 7, -3, -3, -3, 0, -38, 0, }, /*H*/ { -28, 6, 8, 7, 0, -7, 6, -8, 7, 7, -8, -3, 7, -2, -2, 0, -38, 0, }, /*B*/ { 6, 3, 9, -24, 0, 7, -9, 4, -5, 6, -7, -3, -2, 6, -1, 0, -38, 0, }, /*V*/ { 7, -28, 10, 5, 0, 8, -9, -7, 7, -6, 6, -3, -2, -1, 7, 0, -38, 0, }, /*D*/ { 0, -1, 1, 0, 0, 1, -1, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /*N*/ { -38, -38, -38, -38, 0, -38, -38, -38, -38, -38, -38, -38, -38, -38, -38, 0, -38, 0, }, /***/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }, /*~*/ }}, }; /* Function: esl_scorematrix_Set() * Synopsis: Set one of several standard matrices. * * Purpose: Set the allocated score matrix to standard score * matrix , where is the name of one of * several matrices built-in to Easel. For example, * . * * The alphabet for (abc_r>) must be set already. * * Built-in amino acid score matrices in Easel include * BLOSUM45, BLOSUM50, BLOSUM62, BLOSUM80, BLOSUM90, PAM30, * PAM70, PAM120, and PAM240. * * Returns: on success, and the scores in are set. * * if is not available as a built-in matrix * for the alphabet that's set in . * * Throws: on allocation error. */ int esl_scorematrix_Set(const char *name, ESL_SCOREMATRIX *S) { int which; int x, y; if (S->abc_r->type == eslAMINO) { int nmat = sizeof(ESL_SCOREMATRIX_AA_PRELOADS) / sizeof(struct esl_scorematrix_aa_preload_s); for (which = 0; which < nmat; which++) if (strcmp(ESL_SCOREMATRIX_AA_PRELOADS[which].name, name) == 0) break; if (which >= nmat) return eslENOTFOUND; ESL_DASSERT1(( S->Kp >= 24 )); // strcpy below is safe. The assertion tries to convince static analyzer of that. strcpy(S->outorder, "ARNDCQEGHILKMFPSTWYVBZX*"); /* All standard PAM, BLOSUM matrices have same list of valid * residues. If that ever changes, make a data elem in the * structures above. */ /* Transfer scores from static built-in storage */ for (x = 0; x < S->Kp; x++) for (y = 0; y < S->Kp; y++) S->s[x][y] = ESL_SCOREMATRIX_AA_PRELOADS[which].matrix[x][y]; } else if (S->abc_r->type == eslDNA || S->abc_r->type == eslRNA) { int nmat = sizeof(ESL_SCOREMATRIX_NT_PRELOADS) / sizeof(struct esl_scorematrix_nt_preload_s); for (which = 0; which < nmat; which++) if (strcmp(ESL_SCOREMATRIX_NT_PRELOADS[which].name, name) == 0) break; if (which >= nmat) return eslENOTFOUND; ESL_DASSERT1(( S->Kp >= 15 )); // strcpy below is safe. The assertion tries to convince static analyzer of that. strcpy(S->outorder, "ACGTRYMKSWHBVDN"); /* Transfer scores from static built-in storage */ for (x = 0; x < S->Kp; x++) for (y = 0; y < S->Kp; y++) S->s[x][y] = ESL_SCOREMATRIX_NT_PRELOADS[which].matrix[x][y]; } else return eslENOTFOUND; /* no DNA matrices are built in yet! */ /* Use list to set */ S->nc = strlen(S->outorder); for (y = 0; y < S->nc; y++) { x = esl_abc_DigitizeSymbol(S->abc_r, S->outorder[y]); S->isval[x] = TRUE; } /* Copy the name */ if (esl_strdup(name, -1, &(S->name)) != eslOK) return eslEMEM; return eslOK; } /* Function: esl_scorematrix_SetIdentity() * Synopsis: Set matrix to +1 match, 0 mismatch. * * Purpose: Sets score matrix to be +1 for a match, * 0 for a mismatch. may be for any alphabet. * * Rarely useful in real use, but may be useful to create * simple examples (including debugging). * * Returns: on success, and the scores in are set. */ int esl_scorematrix_SetIdentity(ESL_SCOREMATRIX *S) { int a; int x; for (a = 0; a < S->abc_r->Kp*S->abc_r->Kp; a++) S->s[0][a] = 0; for (a = 0; a < S->K; a++) S->s[a][a] = 1; for (x = 0; x < S->K; x++) S->isval[x] = TRUE; for (x = S->abc_r->K; x < S->Kp; x++) S->isval[x] = FALSE; strncpy(S->outorder, S->abc_r->sym, S->K); S->outorder[S->K] = '\0'; S->nc = S->K; return eslOK; } /*---------------- end, some classic score matrices --------*/ /***************************************************************** *# 3. Deriving a score matrix probabilistically. *****************************************************************/ /* Function: esl_scorematrix_SetFromProbs() * Synopsis: Set matrix from target and background probabilities. * * Purpose: Sets the scores in a new score matrix from target joint * probabilities in

, query background probabilities , and * target background probabilities , with scale factor : * $s_{ij} = \frac{1}{\lambda} \frac{p_{ij}}{f_i f_j}$. * * Size of everything must match the canonical alphabet * size in . That is, abc->K> is the canonical * alphabet size of ;

must contain $K times K$ * probabilities $P_{ij}$, and , must be vectors of * K probabilities. All probabilities must be nonzero. * * Args: S - score matrix to set scores in * lambda - scale factor * P - matrix of joint probabilities P_ij (KxK) * fi - query background probabilities (0..K-1) * fj - target background probabilities * * Returns: on success, and contains the calculated score matrix. */ int esl_scorematrix_SetFromProbs(ESL_SCOREMATRIX *S, double lambda, const ESL_DMATRIX *P, const double *fi, const double *fj) { int i,j; double sc; for (i = 0; i < S->abc_r->K; i++) for (j = 0; j < S->abc_r->K; j++) { sc = log(P->mx[i][j] / (fi[i] * fj[j])) / lambda; S->s[i][j] = (int) (sc + (sc>0 ? 0.5 : -0.5)); /* that's rounding to the nearest integer */ } for (i = 0; i < S->abc_r->K; i++) S->isval[i] = TRUE; S->nc = S->abc_r->K; strncpy(S->outorder, S->abc_r->sym, S->abc_r->K); S->outorder[S->nc] = '\0'; return eslOK; } /* Function: esl_scorematrix_SetWAG() * Synopsis: Set matrix using the WAG evolutionary model. * * Purpose: Parameterize an amino acid score matrix using the WAG * rate matrix \citep{WhelanGoldman01} as the underlying * evolutionary model, at a distance of * substitutions/site, with scale factor . * * Args: S - score matrix to set parameters in. Must be created for * an amino acid alphabet. * lambda - scale factor for scores * t - distance to exponentiate WAG to, in substitutions/site * * Returns: on success, and the 20x20 residue scores in are set. * * Throws: if isn't an allocated amino acid score matrix. * on allocation failure. */ int esl_scorematrix_SetWAG(ESL_SCOREMATRIX *S, double lambda, double t) { ESL_DMATRIX *Q = NULL; ESL_DMATRIX *P = NULL; static double wagpi[20]; int i,j; int status; if (S->K != 20) ESL_EXCEPTION(eslEINVAL, "Must be using an amino acid alphabet (K=20) to make WAG-based matrices"); if (( Q = esl_dmatrix_Create(20, 20)) == NULL) { status = eslEMEM; goto ERROR; } if (( P = esl_dmatrix_Create(20, 20)) == NULL) { status = eslEMEM; goto ERROR; } if ((status = esl_composition_WAG(wagpi)) != eslOK) goto ERROR; if ((status = esl_rmx_SetWAG(Q, wagpi)) != eslOK) goto ERROR; if ((status = esl_dmx_Exp(Q, t, P)) != eslOK) goto ERROR; for (i = 0; i < 20; i++) for (j = 0; j < 20; j++) P->mx[i][j] *= wagpi[i]; /* P_ij = P(j|i) pi_i */ esl_scorematrix_SetFromProbs(S, lambda, P, wagpi, wagpi); if ((status = esl_strdup("WAG", -1, &(S->name))) != eslOK) goto ERROR; esl_dmatrix_Destroy(Q); esl_dmatrix_Destroy(P); return eslOK; ERROR: if (Q != NULL) esl_dmatrix_Destroy(Q); if (Q != NULL) esl_dmatrix_Destroy(P); return status; } /*--------------- end, deriving score matrices ------------------*/ /***************************************************************** *# 4. Reading/writing matrices from/to files *****************************************************************/ /* Function: esl_scorematrix_Read() * Synopsis: Read a standard matrix input file. * * Purpose: Given a pointer to an open file parser for a file * containing a score matrix (such as a PAM or BLOSUM * matrix), parse the file and create a new score matrix * object. The scores are expected to be for the alphabet * . * * The score matrix file is in the format that BLAST or * FASTA use. The first line is a header contains N * single-letter codes for the residues. Each of N * subsequent rows optionally contains a residue row label * (in the same order as the columns), followed by N * residue scores. (Older matrix files do not contain the * leading row label; newer ones do.) The residues may * appear in any order. They must minimally include the * canonical K residues (K=4 for DNA, K=20 for protein), * and may also contain none, some, or all degeneracy * codes. Any other residue code that is not in the Easel * digital alphabet (including, in particular, the '*' code * for a stop codon) is ignored by the parser. * * Returns: on success, and points to a newly allocated * score matrix. * * Returns on parsing error; in which case, is * returned , and errbuf> contains an informative * error message. * * Throws: on allocation error. */ int esl_scorematrix_Read(ESL_FILEPARSER *efp, const ESL_ALPHABET *abc, ESL_SCOREMATRIX **ret_S) { int status; ESL_SCOREMATRIX *S = NULL; int *map = NULL; /* maps col/row index to digital alphabet x */ char *tok; int toklen; int c, x; int row,col; /* Allocate the matrix */ if ((S = esl_scorematrix_Create(abc)) == NULL) { status = eslEMEM; goto ERROR; } /* Make sure we've got the comment character set properly in the fileparser. * Score matrices use #. */ esl_fileparser_SetCommentChar(efp, '#'); /* Look for the first non-blank, non-comment line in the file. That line * gives us the single-letter codes in the order that the file's using. */ if ((status = esl_fileparser_NextLine(efp)) != eslOK) ESL_XFAIL(eslEFORMAT, efp->errbuf, "file appears to be empty"); /* Read the characters: count them and store them in order in label[0..nc-1]. * nc cannot exceed Kp+1 in our expected alphabet (+1, for the stop character *) */ S->nc = 0; while ((status = esl_fileparser_GetTokenOnLine(efp, &tok, &toklen)) == eslOK) { if (S->nc >= abc->Kp) ESL_XFAIL(eslEFORMAT, efp->errbuf, "Header contains more residues than expected for alphabet"); if (toklen != 1) ESL_XFAIL(eslEFORMAT, efp->errbuf, "Header can only contain single-char labels; %s is invalid", tok); S->outorder[S->nc++] = *tok; } if (status != eslEOL) ESL_XFAIL(status, efp->errbuf, "Unexpected failure of esl_fileparser_GetTokenOnLine()"); S->outorder[S->nc] = '\0'; /* NUL terminate */ /* Verify that these labels for the score matrix seem plausible, given our alphabet. * This sets S->isval array: which residues we have scores for. * It also sets the map[] array, which maps coord in label[] to x in alphabet. */ ESL_ALLOC(map, sizeof(int) * S->nc); for (c = 0; c < S->nc; c++) { if (esl_abc_CIsValid(abc, S->outorder[c])) { x = esl_abc_DigitizeSymbol(abc, S->outorder[c]); map[c] = x; S->isval[x] = TRUE; } else ESL_XFAIL(eslEFORMAT, efp->errbuf, "Don't know how to deal with residue %c in matrix file", S->outorder[c]); } for (x = 0; x < abc->K; x++) if (! S->isval[x]) ESL_XFAIL(eslEFORMAT, efp->errbuf, "Expected to see a column for residue %c", abc->sym[x]); /* Read nc rows, one at a time; * on each row, read nc+1 or nc tokens, of which nc are scores (may lead with a label or not) */ for (row = 0; row < S->nc; row++) { if ((status = esl_fileparser_NextLine(efp)) != eslOK) ESL_XFAIL(eslEFORMAT, efp->errbuf, "Unexpectedly ran out of lines in file"); for (col = 0; col < S->nc; col++) { if ((status = esl_fileparser_GetTokenOnLine(efp, &tok, &toklen)) != eslOK) ESL_XFAIL(eslEFORMAT, efp->errbuf, "Unexpectedly ran out of fields on line"); if (col == 0 && *tok == S->outorder[row]) { col--; continue; } /* skip leading label */ S->s[map[row]][map[col]] = atoi(tok); } if ((status = esl_fileparser_GetTokenOnLine(efp, &tok, &toklen)) != eslEOL) ESL_XFAIL(eslEFORMAT, efp->errbuf, "Too many fields on line"); } if ((status = esl_fileparser_NextLine(efp)) != eslEOF) ESL_XFAIL(eslEFORMAT, efp->errbuf, "Too many lines in file. (Make sure it's square & symmetric. E.g. use NUC.4.4 not NUC.4.2)"); /* Annotate the score matrix */ if ((status = esl_strdup (efp->filename, -1, &(S->path))) != eslOK) goto ERROR; if ((status = esl_FileTail(efp->filename, FALSE, &(S->name))) != eslOK) goto ERROR; free(map); *ret_S = S; return eslOK; ERROR: esl_scorematrix_Destroy(S); if (map != NULL) free(map); *ret_S = NULL; return status; } /* Function: esl_scorematrix_Write() * Synopsis: Write a BLAST-compatible score matrix file. * * Purpose: Writes a score matrix to an open stream , in * format compatible with BLAST, FASTA, and other common * sequence alignment software. * * Returns: on success. * * Throws: on any system write error, such as filled disk. */ int esl_scorematrix_Write(FILE *fp, const ESL_SCOREMATRIX *S) { int a,b; int x,y; int nc = S->nc; /* The header line, with column labels for residues */ if (fprintf(fp, " ") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "score matrix write failed"); for (a = 0; a < nc; a++) { if (fprintf(fp, " %c ", S->outorder[a]) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "score matrix write failed"); } if (fprintf(fp, "\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "score matrix write failed"); /* The data */ for (a = 0; a < nc; a++) { if (fprintf(fp, "%c ", S->outorder[a]) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "score matrix write failed"); for (b = 0; b < nc; b++) { x = esl_abc_DigitizeSymbol(S->abc_r, S->outorder[a]); y = esl_abc_DigitizeSymbol(S->abc_r, S->outorder[b]); if (fprintf(fp, "%3d ", S->s[x][y]) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "score matrix write failed"); } if (fprintf(fp, "\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "score matrix write failed"); } return eslOK; } /*-------------- end, reading/writing matrices ------------------*/ /***************************************************************** *# 5. Implicit probabilistic basis, I: given bg. *****************************************************************/ static int set_degenerate_probs(const ESL_ALPHABET *abc, ESL_DMATRIX *P, double *fi, double *fj); struct lambda_params { const double *fi; const double *fj; const ESL_SCOREMATRIX *S; }; static int lambda_fdf(double lambda, void *params, double *ret_fx, double *ret_dfx) { struct lambda_params *p = (struct lambda_params *) params; int i,j; double tmp; *ret_fx = 0.; *ret_dfx = 0.; for (i = 0; i < p->S->K; i++) for (j = 0; j < p->S->K; j++) { tmp = p->fi[i] * p->fj[j] * exp(lambda * (double) p->S->s[i][j]); *ret_fx += tmp; *ret_dfx += tmp * (double) p->S->s[i][j]; } *ret_fx -= 1.0; return eslOK; } /* Function: esl_scorematrix_ProbifyGivenBG() * Synopsis: Obtain $P_{ij}$ for matrix with known $\lambda$ and background. * * Purpose: Given a score matrix and known query and target * background frequencies and respectively, calculate scale * and implicit target probabilities \citep{Altschul01}. * Optionally returns either (or both) in and . * * The implicit target probabilities are returned in a * newly allocated $Kp \times Kp$ , over both * the canonical (typically K=4 or K=20) residues in the * residue alphabet, and the degenerate residue codes. * Values involving degenerate residue codes are marginal * probabilities (i.e. summed over the degeneracy). * Only actual residue degeneracy can have nonzero values * for ; by convention, all values involving the * special codes for gap, nonresidue, and missing data * (, , ) are 0. * * If the caller wishes to convert this joint probability * matrix to conditionals, it can take advantage of the * fact that the degenerate probability is our * marginalized , and is . * i.e., . * Those X values are mx[i][esl_abc_GetUnknown(abc)]>, * mx[esl_abc_GetUnknown(abc)][j]>; equivalently, just use * code for X. * * By convention, i is always the query sequence, and j is * always the target. We do not assume symmetry in the * scoring system, though that is usually the case. * * Args: S - score matrix * fi - background frequencies for query sequence i * fj - background frequencies for target sequence j * opt_lambda - optRETURN: calculated $\lambda$ parameter * opt_P - optRETURN: implicit target probabilities $p_{ij}$; a KxK DMATRIX. * * Returns: on success, <*ret_lambda> contains the * calculated $\lambda$ parameter, and <*ret_P> points to * the target probability matrix (which is allocated here, * and must be free'd by caller with . * * Throws: on allocation error; * if matrix is invalid and has no solution for $\lambda$; * if the solver fails to find $\lambda$. * In these cases, <*ret_lambda> is 0.0, and <*ret_P> is . */ int esl_scorematrix_ProbifyGivenBG(const ESL_SCOREMATRIX *S, const double *fi, const double *fj, double *opt_lambda, ESL_DMATRIX **opt_P) { ESL_ROOTFINDER *R = NULL; ESL_DMATRIX *P = NULL; struct lambda_params p; double lambda_guess; double lambda; int i,j; double fx, dfx; int status; /* First, solve for lambda by rootfinding. */ /* Set up the data passed to the lambda_fdf function. */ p.fi = fi; p.fj = fj; p.S = S; /* Bracket the root. * It's important that we come at the root from the far side, where * f(lambda) is positive; else we may identify the root we don't want * at lambda=0. */ fx = -1.0; lambda_guess = 1. / (double) esl_scorematrix_Max(S); for (; lambda_guess < 50.; lambda_guess *= 2.0) { lambda_fdf(lambda_guess, &p, &fx, &dfx); if (fx > 0) break; } if (fx <= 0) ESL_XEXCEPTION(eslEINVAL, "Failed to bracket root for solving lambda"); /* Create a solver and find lambda by Newton/Raphson */ if (( R = esl_rootfinder_CreateFDF(lambda_fdf, &p) ) == NULL) { status = eslEMEM; goto ERROR; } if (( status = esl_root_NewtonRaphson(R, lambda_guess, &lambda)) != eslOK) goto ERROR; /* Now, given solution for lambda, calculate P */ if (opt_P != NULL) { if ((P = esl_dmatrix_Create(S->Kp, S->Kp)) == NULL) { status = eslEMEM; goto ERROR; } for (i = 0; i < S->K; i++) for (j = 0; j < S->K; j++) P->mx[i][j] = fi[i] * fj[j] * exp(lambda * (double) S->s[i][j]); set_degenerate_probs(S->abc_r, P, NULL, NULL); } esl_rootfinder_Destroy(R); if (opt_lambda) *opt_lambda = lambda; if (opt_P) *opt_P = P; return eslOK; ERROR: if (R) esl_rootfinder_Destroy(R); if (opt_lambda) *opt_lambda = 0.; if (opt_P) *opt_P = NULL; return status; } /* set_degenerate_probs() * * Used by both esl_scorematrix_Probify() and * esl_scorematrix_ProbifyGivenBG() to set degenerate residue * probabilities once probs for canonical residues are known. * * Input: P->mx[i][j] are joint probabilities p_ij for the canonical * alphabet 0..abc->K-1, but P matrix is allocated for Kp X Kp. * * Calculate marginal sums for all i,j pairs involving degeneracy * codes. Fill in [i][j'=K..Kp-1], [i'=K..Kp-1][j], and * [i'=K..Kp-1][j'=K..Kp-1] for degeneracies i',j'. Any p_ij involving * a gap (K), nonresidue (Kp-2), or missing data (Kp-1) character is * set to 0.0 by convention. * * Don't assume symmetry. * * If or background probability vectors are non-, set * them too. (Corresponding to the assumption of background = * marginal probs, rather than background being given.) This takes * advantage of the fact that P(X,i) is already the marginalized p_i, * and P(j,X) is p_j. */ static int set_degenerate_probs(const ESL_ALPHABET *abc, ESL_DMATRIX *P, double *fi, double *fj) { int i,j; /* indices into canonical codes */ int ip,jp; /* indices into degenerate codes */ /* sum to get [i=0..K] canonicals to [jp=K+1..Kp-3] degeneracies; * and [jp=K,Kp-2,Kp-1] set to 0.0 */ for (i = 0; i < abc->K; i++) { P->mx[i][abc->K] = 0.0; for (jp = abc->K+1; jp < abc->Kp-2; jp++) { P->mx[i][jp] = 0.0; for (j = 0; j < abc->K; j++) if (abc->degen[jp][j]) P->mx[i][jp] += P->mx[i][j]; } P->mx[i][abc->Kp-2] = 0.0; P->mx[i][abc->Kp-1] = 0.0; } esl_vec_DSet(P->mx[abc->K], abc->Kp, 0.0); /* gap row: all 0.0 by convention */ /* [ip][all] */ for (ip = abc->K+1; ip < abc->Kp-2; ip++) { /* [ip][j]: degenerate i, canonical j */ for (j = 0; j < abc->K; j++) { P->mx[ip][j] = 0.0; for (i = 0; i < abc->K; i++) if (abc->degen[ip][i]) P->mx[ip][j] += P->mx[i][j]; } P->mx[ip][abc->K] = 0.0; /* [ip][jp]: both positions degenerate */ for (jp = abc->K+1; jp < abc->Kp-2; jp++) { P->mx[ip][jp] = 0.0; for (j = 0; j < abc->K; j++) if (abc->degen[jp][j]) P->mx[ip][jp] += P->mx[ip][j]; } P->mx[ip][abc->Kp-2] = 0.0; P->mx[ip][abc->Kp-1] = 0.0; } esl_vec_DSet(P->mx[abc->Kp-2], abc->Kp, 0.0); /* nonresidue data * row, all 0.0 */ esl_vec_DSet(P->mx[abc->Kp-1], abc->Kp, 0.0); /* missing data ~ row, all 0.0 */ if (fi != NULL) { /* fi[i'] = p(i',X) */ fi[abc->K] = 0.0; for (ip = abc->K+1; ip < abc->Kp-2; ip++) fi[ip] = P->mx[ip][abc->Kp-3]; fi[abc->Kp-2] = 0.0; fi[abc->Kp-1] = 0.0; } if (fj != NULL) { /* fj[j'] = p(X,j')*/ fj[abc->K] = 0.0; for (jp = abc->K+1; jp < abc->Kp-2; jp++) fj[jp] = P->mx[abc->Kp-3][jp]; fj[abc->Kp-2] = 0.0; fj[abc->Kp-1] = 0.0; } return eslOK; } /*------------- end, implicit prob basis, bg known --------------*/ /***************************************************************** *# 6. Implicit probabilistic basis, II: bg unknown *****************************************************************/ /* This section implements one of the key ideas in Yu and Altschul, * PNAS 100:15688, 2003 [YuAltschul03], and Yu and Altschul, * Bioinformatics 21:902-911, 2005 [YuAltschul05]: * * Given a valid score matrix, calculate its probabilistic * basis (P_ij, f_i, f_j, and lambda), on the assumption that * the background probabilities are the marginals of P_ij. * * However, this procedure appears to be unreliable. * There are often numerous invalid solutions with negative * probabilities, and the Yu/Altschul Y function (that we've solving * for its root) is often discontinuous. Although Yu and Altschul say * they can just keep searching for solutions until a valid one is * found, and "this procedure presents no difficulties in practice", I * don't see how. * * For example, run the procedure on PAM190 and PAM200. For PAM190 * you will obtain a valid solution with lambda = 0.2301. For PAM200 * you will obtain an *invalid* solution with lambda = 0.2321, and * negative probabilities f_{ENT} (and all p_ij involving ENT and * the other 17 aa). There is a discontinuity in the function, but * it's not near these lambdas, it's at about lambda=0.040, so it's * not that we fell into a discontinuity: the bisection procedure on * lambda is working smoothly. And if you calculate a score matrix again * from the invalid PAM200 solution, you get PAM200 back, so it's not * that there's an obvious bug -- we do obtain a "solution" to PAM200, * just not one with positive probabilities. It's not obvious how * we could find a different solution to PAM200 than the invalid one! * * What we're going to do [xref J7/126, Apr 2011] is to deprecate * the Yu/Altschul procedure altogether. */ struct yualtschul_params { ESL_DMATRIX *S; /* pointer to the KxK score matrix w/ values cast to doubles */ ESL_DMATRIX *M; /* not a param per se: alloc'ed storage for M matrix provided to the objective function */ ESL_DMATRIX *Y; /* likewise, alloc'ed storage for Y (M^-1) matrix provided to obj function */ }; /* yualtschul_scorematrix_validate * See start of section 3, p. 903, YuAltschul05 * (Implementation could be more efficient here; don't really have * to sweep the entire matrix twice to do this.) */ static int yualtschul_scorematrix_validate(const ESL_SCOREMATRIX *S) { int i, j; int has_neg, has_pos; /* each row must have at least one positive and one negative score */ for (i = 0; i < S->K; i++) { has_neg = has_pos = FALSE; for (j = 0; j < S->K; j++) { if (S->s[i][j] > 0) has_pos = TRUE; if (S->s[i][j] < 0) has_neg = TRUE; } if (! has_pos || ! has_neg) return eslFAIL; } /* ditto for columns */ for (j = 0; j < S->K; j++) { has_neg = has_pos = FALSE; for (i = 0; i < S->K; i++) { if (S->s[i][j] > 0) has_pos = TRUE; if (S->s[i][j] < 0) has_neg = TRUE; } if (! has_pos || ! has_neg) return eslFAIL; } return eslOK; } /* upper bound bracketing lambda solution: eqn (12) in [YuAltschul05] */ static double yualtschul_upper_bound(const ESL_DMATRIX *Sd) { int i; double minimax; double maxlambda; /* minimax = c in YuAltschul05 p.903 = smallest of the max scores in each row/col */ minimax = esl_vec_DMax(Sd->mx[0], Sd->n); for (i = 1; i < Sd->n; i++) minimax = ESL_MIN(minimax, esl_vec_DMax(Sd->mx[i], Sd->n)); maxlambda = log((double) Sd->n) / minimax; /* eqn (12), YuAltschul05 */ return maxlambda; } static int yualtschul_solution_validate(const ESL_DMATRIX *P, const double *fi, const double *fj) { if ( esl_dmx_Min(P) < 0.0) return eslFAIL; if ( esl_vec_DMin(fi, P->n) < 0.0) return eslFAIL; if ( esl_vec_DMin(fj, P->n) < 0.0) return eslFAIL; return eslOK; } /* yualtschul_func() * * This is the objective function we try to find a root of. * Its prototype is dictated by the esl_rootfinder API. */ static int yualtschul_func(double lambda, void *params, double *ret_fx) { int status; struct yualtschul_params *p = (struct yualtschul_params *) params; ESL_DMATRIX *S = p->S; ESL_DMATRIX *M = p->M; ESL_DMATRIX *Y = p->Y; int i,j; /* the M matrix has entries M_ij = e^{lambda * s_ij} */ for (i = 0; i < S->n; i++) for (j = 0; j < S->n; j++) M->mx[i][j] = exp(lambda * S->mx[i][j]); /* the Y matrix is the inverse of M */ if ((status = esl_dmx_Invert(M, Y)) != eslOK) goto ERROR; /* We're trying to find the root of \sum_ij Y_ij - 1 = 0 */ *ret_fx = esl_dmx_Sum(Y) - 1.; return eslOK; ERROR: *ret_fx = 0.; return status; } /* yualtschul_engine() * * This function backcalculates the probabilistic basis for a score * matrix S, when S is a double-precision matrix. Providing this * as a separate "engine" and writing esl_scorematrix_Probify() * as a wrapper around it allows us to separately test inaccuracy * due to numerical performance of our linear algebra, versus * inaccuracy due to integer roundoff in integer scoring matrices. * * It is not uncommon for this to fail when S is derived from * integer scores. Because the scores may have been provided by the * user, and this may be our first chance to detect the "user error" * of an invalid matrix, this engine returns as a normal error * if it can't reach a valid solution. */ static int yualtschul_engine(ESL_DMATRIX *S, ESL_DMATRIX *P, double *fi, double *fj, double *ret_lambda) { int status; ESL_ROOTFINDER *R = NULL; struct yualtschul_params p; double lambda; double xl, xr; double fx = -1.0; int i,j; /* Set up a bisection method to find lambda */ p.S = S; p.M = p.Y = NULL; if ((p.M = esl_dmatrix_Create(S->n, S->n)) == NULL) { status = eslEMEM; goto ERROR; } if ((p.Y = esl_dmatrix_Create(S->n, S->n)) == NULL) { status = eslEMEM; goto ERROR; } if ((R = esl_rootfinder_Create(yualtschul_func, &p)) == NULL) { status = eslEMEM; goto ERROR; } /* Identify suitable brackets on lambda. */ xr = yualtschul_upper_bound(S); for (xl = xr; xl > 1e-10; xl /= 1.6) { if ((status = yualtschul_func(xl, &p, &fx)) != eslOK) goto ERROR; if (fx > 0.) break; } if (fx <= 0.) { status = eslEINVAL; goto ERROR; } for (; xr < 100.; xr *= 1.6) { if ((status = yualtschul_func(xr, &p, &fx)) != eslOK) goto ERROR; if (fx < 0.) break; } if (fx >= 0.) { status = eslEINVAL; goto ERROR; } /* Find lambda by bisection */ if (( status = esl_root_Bisection(R, xl, xr, &lambda)) != eslOK) goto ERROR; /* Find fi, fj from Y: fi are column sums, fj are row sums */ for (i = 0; i < S->n; i++) { fi[i] = 0.; for (j = 0; j < S->n; j++) fi[i] += p.Y->mx[j][i]; } for (j = 0; j < S->n; j++) { fj[j] = 0.; for (i = 0; i < S->n; i++) fj[j] += p.Y->mx[j][i]; } /* Find p_ij */ for (i = 0; i < S->n; i++) for (j = 0; j < S->n; j++) P->mx[i][j] = fi[i] * fj[j] * p.M->mx[i][j]; *ret_lambda = lambda; esl_dmatrix_Destroy(p.M); esl_dmatrix_Destroy(p.Y); esl_rootfinder_Destroy(R); return eslOK; ERROR: if (p.M) esl_dmatrix_Destroy(p.M); if (p.Y) esl_dmatrix_Destroy(p.Y); if (R) esl_rootfinder_Destroy(R); return status; } /* Function: esl_scorematrix_Probify() * Synopsis: Calculate the probabilistic basis of a score matrix. * * Purpose: Reverse engineering of a score matrix: given a "valid" * substitution matrix , obtain implied joint * probabilities $p_{ij}$, query composition $f_i$, target * composition $f_j$, and scale $\lambda$, by assuming that * $f_i$ and $f_j$ are the appropriate marginals of $p_{ij}$. * Optionally return any or all of these solutions in * <*opt_P>, <*opt_fi>, <*opt_fj>, and <*opt_lambda>. * * The calculation is run only on canonical residue scores * $0..K-1$ in S, to calculate joint probabilities for all * canonical residues. Joint and background probabilities * involving degenerate residues are then calculated by * appropriate marginalizations. See notes on * about how probabilities * involving degeneracy codes are calculated. * * This implements an algorithm described in * \citep{YuAltschul03} and \citep{YuAltschul05}. * * Although this procedure may succeed in many cases, * it is unreliable and should be used with great caution. * Yu and Altschul note that it can find invalid solutions * (negative probabilities), and although they say that one * can keep searching until a valid solution is found, * one can produce examples where this does not seem to be * the case. The caller MUST check return status, and * MUST expect . * * Args: S - score matrix * opt_P - optRETURN: Kp X Kp matrix of implied target probs $p_{ij}$ * opt_fi - optRETURN: vector of Kp $f_i$ background probs, 0..Kp-1 * opt_fj - optRETURN: vector of Kp $f_j$ background probs, 0..Kp-1 * opt_lambda - optRETURN: calculated $\lambda$ parameter * * Returns: on success, and , , , and * point to the results (for any of these that were passed non-). * * , , and , if requested, are new * allocations, and must be freed by the caller. * * Returns if the algorithm fails to determine a valid solution, * but the solution is still returned (and caller needs to free). * * Returns if input score matrix isn't valid (sensu YuAltschul05); * now , , are returned NULL and is returned * as 0. * * Throws: on allocation failure. * * Xref: SRE:J1/35; SRE:J7/126. */ int esl_scorematrix_Probify(const ESL_SCOREMATRIX *S, ESL_DMATRIX **opt_P, double **opt_fi, double **opt_fj, double *opt_lambda) { int status; ESL_DMATRIX *Sd = NULL; ESL_DMATRIX *P = NULL; double *fi = NULL; double *fj = NULL; double lambda; int i,j; /* Check the input matrix for validity */ if ( yualtschul_scorematrix_validate(S) != eslOK) { status = eslEINVAL; goto ERROR; } if (( Sd = esl_dmatrix_Create(S->K, S->K)) == NULL) {status = eslEMEM; goto ERROR; } if (( P = esl_dmatrix_Create(S->Kp, S->Kp)) == NULL) {status = eslEMEM; goto ERROR; } ESL_ALLOC(fi, sizeof(double) * S->Kp); ESL_ALLOC(fj, sizeof(double) * S->Kp); /* Construct a double-precision dmatrix from S. * I've tried integrating over the rounding uncertainty by * averaging over trials with values jittered by +/- 0.5, * but it doesn't appear to help. */ for (i = 0; i < S->K; i++) for (j = 0; j < S->K; j++) Sd->mx[i][j] = (double) S->s[i][j]; /* Reverse engineer the doubles */ if ((status = yualtschul_engine(Sd, P, fi, fj, &lambda)) != eslOK) goto ERROR; set_degenerate_probs(S->abc_r, P, fi, fj); /* Done. */ if (yualtschul_solution_validate(P, fi, fj) != eslOK) status = eslENORESULT; else status = eslOK; esl_dmatrix_Destroy(Sd); if (opt_P != NULL) *opt_P = P; else esl_dmatrix_Destroy(P); if (opt_fi != NULL) *opt_fi = fi; else free(fi); if (opt_fj != NULL) *opt_fj = fj; else free(fj); if (opt_lambda != NULL) *opt_lambda = lambda; return status; ERROR: if (Sd != NULL) esl_dmatrix_Destroy(Sd); if (P != NULL) esl_dmatrix_Destroy(P); if (fi != NULL) free(fi); if (fj != NULL) free(fj); if (opt_P != NULL) *opt_P = NULL; if (opt_fi != NULL) *opt_fi = NULL; if (opt_fj != NULL) *opt_fj = NULL; if (opt_lambda != NULL) *opt_lambda = 0.; return status; } /*---------- end, implicit prob basis, bg unknown ---------------*/ /***************************************************************** * 7. Experiment driver *****************************************************************/ #ifdef eslSCOREMATRIX_EXPERIMENT #include #include #include "easel.h" #include "esl_alphabet.h" #include "esl_dmatrix.h" #include "esl_getopts.h" #include "esl_scorematrix.h" #include "esl_vectorops.h" static ESL_OPTIONS options[] = { /* name type default env range togs reqs incomp help docgrp */ {"-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show help and usage", 0}, {"-l", eslARG_REAL, "0.3466", NULL, NULL, NULL, NULL, NULL, "set base lambda (units of score mx) to ", 0}, {"-s", eslARG_REAL, "1.0", NULL, NULL, NULL, NULL, NULL, "additional scale factor applied to lambda", 0}, {"-t", eslARG_REAL, "1.37", NULL, NULL, NULL, NULL, NULL, "set WAG time (branch length) to ", 0}, {"--yfile", eslARG_OUTFILE, NULL, NULL, NULL, NULL, NULL, NULL, "save xy file of Yu/Altschul root eqn to ", 0}, {"--mfile", eslARG_OUTFILE, NULL, NULL, NULL, NULL, NULL, NULL, "save WAG score matrix to ", 0}, { 0,0,0,0,0,0,0,0,0,0}, }; static char usage[] = "[-options]"; static char banner[] = "Yu/Altschul experiment driver for scorematrix module"; /* yualtschul_graph_dump() * Dump an XY plot of (\sum Y -1) vs. lambda for a score matrix. * X-axis of graph starts at , ends at , stepping by . */ static int yualtschul_graph_dump(FILE *ofp, ESL_SCOREMATRIX *S, double scale, double lambda0, double lambda1, double stepsize) { struct yualtschul_params p; int a,b; double fx; double lambda; /* Set up a bisection method to find lambda */ p.S = esl_dmatrix_Create(S->K, S->K); p.M = esl_dmatrix_Create(S->K, S->K); p.Y = esl_dmatrix_Create(S->K, S->K); for (a = 0; a < S->K; a++) for (b = 0; b < S->K; b++) p.S->mx[a][b] = (double) S->s[a][b]; for (lambda = lambda0; lambda <= lambda1; lambda += stepsize) { yualtschul_func(lambda/scale, &p, &fx); fprintf(ofp, "%f %f\n", lambda, fx); } fprintf(ofp, "&\n"); fprintf(ofp, "%f 0.0\n", lambda0); fprintf(ofp, "%f 0.0\n", lambda1); fprintf(ofp, "&\n"); esl_dmatrix_Destroy(p.S); esl_dmatrix_Destroy(p.M); esl_dmatrix_Destroy(p.Y); return 0; } int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); /* protein matrices 20x20 */ ESL_DMATRIX *Q = esl_dmatrix_Create(abc->K, abc->K); /* WAG rate matrix */ ESL_DMATRIX *P0 = esl_dmatrix_Create(abc->K, abc->K); /* p_ij joint probabilities calculated from WAG */ double *wagpi = malloc(sizeof(double) * abc->K); ESL_SCOREMATRIX *S0 = esl_scorematrix_Create(abc); /* score matrix calculated from WAG p_ij's */ double lambda0 = esl_opt_GetReal(go, "-l"); double t = esl_opt_GetReal(go, "-t"); double scale = esl_opt_GetReal(go, "-s"); char *yfile = esl_opt_GetString(go, "--yfile"); char *mfile = esl_opt_GetString(go, "--mfile"); ESL_DMATRIX *P = NULL; /* p_ij's from Yu/Altschul reverse eng of S0 */ double *fi = NULL; double *fj = NULL; double lambda; double D; int status; /* Calculate an integer score matrix from a probabilistic rate matrix (WAG) */ esl_scorematrix_SetWAG(S0, lambda0/scale, t); esl_composition_WAG(wagpi); printf("WAG matrix calculated at t=%.3f, lambda=%.4f (/%.1f)\n", t, lambda0, scale); /* Save the matrix, if asked */ if (mfile) { FILE *ofp = NULL; if ( (ofp = fopen(mfile, "w")) == NULL) esl_fatal("failed to open %s for writing scorematrix", mfile); ESL_DASSERT1(( S0->Kp >= 20 )); // the strcpy below is fine. The assertion tries to convince static analyzers of that. strcpy(S0->outorder, "ARNDCQEGHILKMFPSTWYV"); esl_scorematrix_Write(ofp, S0); fclose(ofp); } /* Because of integer roundoff, the actual probability basis is a little different */ esl_scorematrix_ProbifyGivenBG(S0, wagpi, wagpi, &lambda, NULL); printf("Integer roundoff shifts implicit lambda (given wagpi's) to %.4f (/%.1f)\n", lambda*scale, scale); printf("Scores in matrix range from %d to %d\n", esl_scorematrix_Min(S0), esl_scorematrix_Max(S0)); esl_scorematrix_RelEntropy(S0, wagpi, wagpi, lambda, &D); printf("Relative entropy: %.3f bits\n", D); if (yfile) { FILE *ofp = NULL; if ( (ofp = fopen(yfile, "w")) == NULL) esl_fatal("failed to open XY file %s for writing\n", yfile); yualtschul_graph_dump(ofp, S0, scale, 0.01, 1.0, 0.0001); fclose(ofp); printf("XY plot of Yu/Altschul rootfinding saved to : %s\n", yfile); } status = esl_scorematrix_Probify(S0, &P, &fi, &fj, &lambda); printf("Yu/Altschul reverse engineering gives lambda = %.4f (/%.1f)\n", lambda*scale, scale); //printf("fi's are: \n"); esl_vec_DDump(stdout, fi, S0->K, abc->sym); if (status != eslOK) printf("however, the solution is INVALID!\n"); else printf("and the joint and marginals are a valid probabilistic basis.\n"); free(fj); free(fi); esl_scorematrix_Destroy(S0); esl_dmatrix_Destroy(P); esl_dmatrix_Destroy(P0); esl_dmatrix_Destroy(Q); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; } #endif /* eslSCOREMATRIX_EXPERIMENT */ /*------------------ end, experiment driver ---------------------*/ /***************************************************************** * 8. Utility programs *****************************************************************/ /* Reformat a score matrix file into Easel internal digital alphabet order, suitable for making * one of the static data structures in our section of preloaded matrices. */ #ifdef eslSCOREMATRIX_UTILITY1 /* gcc -g -Wall -o utility -I. -L. -DeslSCOREMATRIX_UTILITY1 esl_scorematrix.c -leasel -lm ./utility BLOSUM62 */ #include "easel.h" #include "esl_alphabet.h" #include "esl_scorematrix.h" #include "esl_fileparser.h" int main(int argc, char **argv) { char *infile = argv[1]; ESL_ALPHABET *abc; ESL_FILEPARSER *efp; ESL_SCOREMATRIX *S; int x,y; abc = esl_alphabet_Create(eslAMINO); if (esl_fileparser_Open(infile, NULL, &efp) != eslOK) esl_fatal("Failed to open %s\n", infile); if (esl_scorematrix_Read(efp, abc, &S) != eslOK) esl_fatal("parse failed: %s", efp->errbuf); printf(" /*"); for (y = 0; y < abc->Kp; y++) printf(" %c ", abc->sym[y]); printf(" */\n"); for (x = 0; x < abc->Kp; x++) { printf(" { "); for (y = 0; y < abc->Kp; y++) printf("%3d, ", S->s[x][y]); printf(" }, /* %c */\n", abc->sym[x]); } esl_scorematrix_Destroy(S); esl_fileparser_Close(efp); esl_alphabet_Destroy(abc); return eslOK; } #endif /*eslSCOREMATRIX_UTILITY1*/ /* Utility 2: joint or conditional probabilities from BLOSUM62 (depending on how compiled) */ #ifdef eslSCOREMATRIX_UTILITY2 /* gcc -g -Wall -o utility2 -I. -L. -DeslSCOREMATRIX_UTILITY2 esl_scorematrix.c -leasel -lm ./utility2 */ #include "easel.h" #include "esl_alphabet.h" #include "esl_dmatrix.h" #include "esl_scorematrix.h" int main(int argc, char **argv) { ESL_ALPHABET *abc = esl_alphabet_Create(eslAMINO); ESL_SCOREMATRIX *S = esl_scorematrix_Create(abc); ESL_DMATRIX *Q = NULL; double *fa = NULL; double *fb = NULL; double slambda; int a,b; esl_scorematrix_Set("BLOSUM62", S); esl_scorematrix_Probify(S, &Q, &fa, &fb, &slambda); #if 0 esl_scorematrix_JointToConditionalOnQuery(abc, Q); /* Q->mx[a][b] is now P(b | a) */ #endif esl_dmatrix_Dump(stdout, Q, abc->sym, abc->sym); esl_dmatrix_Destroy(Q); esl_scorematrix_Destroy(S); esl_alphabet_Destroy(abc); return eslOK; } #endif /*eslSCOREMATRIX_UTILITY2*/ /***************************************************************** * 9. Unit tests. *****************************************************************/ #ifdef eslSCOREMATRIX_TESTDRIVE #include "esl_dirichlet.h" static void utest_ReadWrite(ESL_ALPHABET *abc, ESL_SCOREMATRIX *S) { char tmpfile[16] = "esltmpXXXXXX"; FILE *fp = NULL; ESL_SCOREMATRIX *S2 = NULL; ESL_FILEPARSER *efp = NULL; if (esl_tmpfile_named(tmpfile, &fp) != eslOK) esl_fatal("failed to open tmp file"); if (esl_scorematrix_Write(fp, S) != eslOK) esl_fatal("failed to write test matrix"); fclose(fp); if (esl_fileparser_Open(tmpfile, NULL, &efp) != eslOK) esl_fatal("failed to open tmpfile containing BLOSUM62 matrix"); if (esl_scorematrix_Read(efp, abc, &S2) != eslOK) esl_fatal("failed to read tmpfile containing BLOSUM62 matrix"); if (esl_scorematrix_Compare(S, S2) != eslOK) esl_fatal("the two test matrices aren't identical"); remove(tmpfile); esl_fileparser_Close(efp); esl_scorematrix_Destroy(S2); return; } static void utest_ProbifyGivenBG(ESL_SCOREMATRIX *S0, ESL_DMATRIX *P0, double *wagpi, double lambda0) { char *msg = "ProbifyGivenBG() unit test failed"; ESL_DMATRIX *P = NULL; double sum = 0.0; double lambda; int a,b; if (esl_scorematrix_ProbifyGivenBG(S0, wagpi, wagpi, &lambda, &P) != eslOK) esl_fatal(msg); if (esl_DCompare(lambda0, lambda, 1e-3) != eslOK) esl_fatal("lambda is wrong"); for (a = 0; a < 20; a++) /* you can't just call esl_dmx_Sum(P), because P includes */ for (b = 0; b < 20; b++) /* marginalized degeneracies */ sum += P->mx[a][b]; if (esl_DCompare(sum, 1.0, 1e-9) != eslOK) esl_fatal("P doesn't sum to 1"); for (a = 0; a < 20; a++) /* for the same reason, you can't dmatrix_Compare P and P0 */ for (b = 0; b < 20; b++) if (esl_DCompare(P0->mx[a][b], P->mx[a][b], 1e-2) != eslOK) esl_fatal("P is wrong"); esl_dmatrix_Destroy(P); return; } /* The scores->pij reverse engineering engine works with scores in doubles, * so we can separate effects of rounding to integers in standard * score matrices. */ static void utest_yualtschul(ESL_DMATRIX *P0, double *wagpi) { char *msg = "reverse engineering engine test failed"; ESL_DMATRIX *S = NULL; /* original score matrix, in double form, not rounded to ints (calculated from P, fi, fj) */ ESL_DMATRIX *P = NULL; /* backcalculated P_ij joint probabilities */ double *fi = NULL; /* backcalculated f_i query composition */ double *fj = NULL; /* backcalculated f'_j target composition */ double lambda0; /* true lambda */ double lambda; /* backcalculated lambda */ double sum = 0.0; int i,j; /* Allocations */ if (( S = esl_dmatrix_Create(20, 20)) == NULL) esl_fatal(msg); if (( P = esl_dmatrix_Create(20, 20)) == NULL) esl_fatal(msg); if ((fi = malloc(sizeof(double) * 20)) == NULL) esl_fatal(msg); if ((fj = malloc(sizeof(double) * 20)) == NULL) esl_fatal(msg); /* Make a WAG-based score matrix in double-precision, without rounding to integers */ lambda0 = 0.3; for (i = 0; i < 20; i++) for (j = 0; j < 20; j++) S->mx[i][j] = log(P0->mx[i][j] / (wagpi[i] * wagpi[j])) / lambda0; /* Reverse engineer it in double precision */ if ( yualtschul_engine(S, P, fi, fj, &lambda) != eslOK) esl_fatal("reverse engineering engine failed"); /* Validate the solution (expect more accuracy from this than from integer scores) */ if (esl_DCompare(lambda0, lambda, 1e-4) != eslOK) esl_fatal("failed to get right lambda"); for (i = 0; i < 20; i++) /* you can't just call esl_dmx_Sum(P), because P includes */ for (j = 0; j < 20; j++) /* marginalized degeneracies */ sum += P->mx[i][j]; if (esl_DCompare(sum, 1.0, 1e-6) != eslOK) esl_fatal("reconstructed P doesn't sum to 1"); for (i = 0; i < 20; i++) /* for the same reason, you can't dmatrix_Compare P and P0 */ for (j = 0; j < 20; j++) if (esl_DCompare(P0->mx[i][j], P->mx[i][j], 1e-2) != eslOK) esl_fatal("failed to recover correct P_ij"); for (i = 0; i < 20; i++) { if (esl_DCompare(fi[i], fj[i], 1e-6) != eslOK) esl_fatal("background fi, fj not the same"); if (esl_DCompare(wagpi[i], fi[i], 1e-3) != eslOK) esl_fatal("failed to reconstruct WAG backgrounds"); } free(fj); free(fi); esl_dmatrix_Destroy(S); esl_dmatrix_Destroy(P); return; } /* utest_Probify() * This tests Probify on a matrix that was calculated from probabilities in the first * place. It verifies that the reconstructed Pij matrix matches the original Pij's * that the score matrix was built from. */ static void utest_Probify(ESL_SCOREMATRIX *S0, ESL_DMATRIX *P0, double *wagpi, double lambda0) { ESL_DMATRIX *P = NULL; double *fi = NULL; double *fj = NULL; double lambda; /* reconstructed lambda */ double sum = 0.0; int i,j; if (esl_scorematrix_Probify(S0, &P, &fi, &fj, &lambda) != eslOK) esl_fatal("reverse engineering failed"); /* Validate the solution, gingerly (we expect significant error due to integer roundoff) */ if (esl_DCompare(lambda0, lambda, 0.01) != eslOK) esl_fatal("failed to get right lambda"); for (i = 0; i < 20; i++) /* you can't just call esl_dmx_Sum(P), because P includes */ for (j = 0; j < 20; j++) /* marginalized degeneracies */ sum += P->mx[i][j]; if (esl_DCompare(sum, 1.0, 1e-6) != eslOK) esl_fatal("reconstructed P doesn't sum to 1"); for (i = 0; i < 20; i++) /* for the same reason, you can't dmatrix_Compare P and P0 */ for (j = 0; j < 20; j++) if (esl_DCompare(P0->mx[i][j], P->mx[i][j], 0.1) != eslOK) esl_fatal("failed to recover correct P_ij"); free(fj); free(fi); esl_dmatrix_Destroy(P); return; } /* utest_ProbifyBLOSUM() * This tests Probify on a score matrix where the original Pij's are treated as * unknown. It verifies that if you create a new score matrix from the reconstructed * Pij's, you get the original score matrix back. BLOSUM62 makes a good example, * hence the name. */ static void utest_ProbifyBLOSUM(ESL_SCOREMATRIX *BL62) { char *msg = "failure in ProbifyBLOSUM() unit test"; ESL_DMATRIX *P = NULL; double *fi = NULL; double *fj = NULL; double lambda; ESL_SCOREMATRIX *S2 = NULL; if (( S2 = esl_scorematrix_Clone(BL62)) == NULL) esl_fatal(msg); if (esl_scorematrix_Probify(BL62, &P, &fi, &fj, &lambda) != eslOK) esl_fatal(msg); if (esl_scorematrix_SetFromProbs(S2, lambda, P, fi, fj) != eslOK) esl_fatal(msg); if (esl_scorematrix_CompareCanon(BL62, S2) != eslOK) esl_fatal(msg); free(fj); free(fi); esl_scorematrix_Destroy(S2); esl_dmatrix_Destroy(P); return; } #endif /*eslSCOREMATRIX_TESTDRIVE*/ /***************************************************************** * 10. Test driver. *****************************************************************/ /* gcc -g -Wall -I. -L. -o test -DeslSCOREMATRIX_TESTDRIVE esl_scorematrix.c -leasel -lm ./test */ #ifdef eslSCOREMATRIX_TESTDRIVE #include "easel.h" #include "esl_scorematrix.h" int main(int argc, char **argv) { ESL_ALPHABET *abc = NULL; /* amino acid alphabet */ ESL_SCOREMATRIX *BL62= NULL; /* BLOSUM62 matrix */ ESL_SCOREMATRIX *S0 = NULL; /* original score matrix (calculated from P, fi, fj) */ ESL_DMATRIX *P0 = NULL; /* original P_ij joint probabilities */ ESL_DMATRIX *Q = NULL; /* WAG rate matrix */ double lambda0; /* true lambda used to construct S */ double t; int i,j; static double wagpi[20]; /* Allocations */ if ((abc = esl_alphabet_Create(eslAMINO)) == NULL) esl_fatal("allocation of alphabet failed"); if ((BL62= esl_scorematrix_Create(abc)) == NULL) esl_fatal("allocation of BLOSUM62 failed"); if ((S0 = esl_scorematrix_Create(abc)) == NULL) esl_fatal("allocation of scorematrix failed"); if ((P0 = esl_dmatrix_Create(abc->K, abc->K)) == NULL) esl_fatal("P allocation failed"); if ((Q = esl_dmatrix_Create(abc->K, abc->K)) == NULL) esl_fatal("Q allocation failed"); /* Make a BLOSUM matrix */ if ( esl_scorematrix_Set("BLOSUM62", BL62) != eslOK) esl_fatal("failed to set a BLOSUM matrix"); /* Make a WAG-based score matrix with small lambda. */ lambda0 = 0.00635; t = 2.0; esl_scorematrix_SetWAG(S0, lambda0, t); esl_composition_WAG(wagpi); /* Redo some calculations to get the known probabilistic basis of that S */ if ( esl_rmx_SetWAG(Q, wagpi) != eslOK) esl_fatal("failed to set WAG"); if ( esl_dmx_Exp(Q, t, P0) != eslOK) esl_fatal("failed to exponentiate WAG"); for (i = 0; i < 20; i++) for (j = 0; j < 20; j++) P0->mx[i][j] *= wagpi[i]; /* P_ij = P(j|i) pi_i */ /* The unit test battery */ utest_ReadWrite(abc, BL62); utest_ReadWrite(abc, S0); utest_ProbifyGivenBG(S0, P0, wagpi, lambda0); utest_yualtschul(P0, wagpi); utest_Probify(S0, P0, wagpi, lambda0); utest_ProbifyBLOSUM(BL62); esl_dmatrix_Destroy(Q); esl_dmatrix_Destroy(P0); esl_scorematrix_Destroy(BL62); esl_scorematrix_Destroy(S0); esl_alphabet_Destroy(abc); return 0; } #endif /*eslSCOREMATRIX_TESTDRIVE*/ /***************************************************************** * 11. Example program *****************************************************************/ #ifdef eslSCOREMATRIX_EXAMPLE /*::cexcerpt::scorematrix_example::begin::*/ #include "easel.h" #include "esl_alphabet.h" #include "esl_fileparser.h" #include "esl_getopts.h" #include "esl_dmatrix.h" #include "esl_vectorops.h" #include "esl_scorematrix.h" static ESL_OPTIONS options[] = { /* name type default env range toggles reqs incomp help docgroup*/ { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 }, { "--dna", eslARG_NONE, FALSE, NULL, NULL, "--dna,--amino", NULL, NULL, "use DNA alphabet", 0 }, { "--amino", eslARG_NONE, "TRUE", NULL, NULL, "--dna,--amino", NULL, NULL, "use protein alphabet", 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, }; static char usage[] = "[-options] "; static char banner[] = "example of using easel scorematrix routines"; int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage); char *scorefile = esl_opt_GetArg(go, 1); ESL_ALPHABET *abc = NULL; ESL_FILEPARSER *efp = NULL; ESL_SCOREMATRIX *S = NULL; ESL_DMATRIX *P1 = NULL; /* implicit probability basis, bg unknown */ ESL_DMATRIX *P2 = NULL; /* implicit probability basis, bg known */ double *fi = NULL; double *fj = NULL; double lambda, D, E; int vstatus; if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA); else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO); /* Input a score matrix from a file. */ if ( esl_fileparser_Open(scorefile, NULL, &efp) != eslOK) esl_fatal("failed to open score file %s", scorefile); if ( esl_scorematrix_Read(efp, abc, &S) != eslOK) esl_fatal("failed to read matrix from %s:\n %s", scorefile, efp->errbuf); esl_fileparser_Close(efp); /* Try to reverse engineer it to get implicit probabilistic model. This may fail! */ vstatus = esl_scorematrix_Probify(S, &P1, &fi, &fj, &lambda); if (vstatus == eslOK) { /* Print some info, and the joint probabilities. */ esl_scorematrix_RelEntropy (S, fi, fj, lambda, &D); esl_scorematrix_ExpectedScore(S, fi, fj, &E); printf("By Yu/Altschul (2003,2005) procedure:\n"); printf("Lambda = %.4f\n", lambda); printf("Relative entropy = %.4f bits\n", D); printf("Expected score = %.4f bits\n", E * lambda * eslCONST_LOG2R); printf("p_ij's are:\n"); esl_dmatrix_Dump(stdout, P1, abc->sym, abc->sym); printf("fi's are:\n"); esl_vec_DDump(stdout, fi, S->K, abc->sym); printf("fj's are:\n"); esl_vec_DDump(stdout, fj, S->K, abc->sym); printf("============================================================\n\n"); } else { printf("Yu/Altschul procedure FAILS to find a valid implicit probability basis!\n"); printf("Lambda = %.4f\n", lambda); printf("p_ij's are:\n"); esl_dmatrix_Dump(stdout, P1, abc->sym, abc->sym); printf("fi's are:\n"); esl_vec_DDump(stdout, fi, S->K, abc->sym); printf("fj's are:\n"); esl_vec_DDump(stdout, fj, S->K, abc->sym); printf("============================================================\n\n"); esl_composition_BL62(fi); esl_composition_BL62(fj); } /* Now reverse engineer it again, this time using "known" background probs */ esl_scorematrix_ProbifyGivenBG(S, fi, fj, &lambda, &P2); esl_scorematrix_RelEntropy (S, fi, fj, lambda, &D); esl_scorematrix_ExpectedScore(S, fi, fj, &E); printf("By solving for lambda from given background frequencies:\n"); printf("Lambda = %.4f\n", lambda); printf("Relative entropy = %.4f bits\n", D); printf("Expected score = %.4f bits\n", E * lambda * eslCONST_LOG2R); printf("p_ij's are:\n"); esl_dmatrix_Dump(stdout, P2, abc->sym, abc->sym); printf("fi's are:\n"); esl_vec_DDump(stdout, fi, S->K, abc->sym); printf("fj's are:\n"); esl_vec_DDump(stdout, fj, S->K, abc->sym); printf("============================================================\n\n"); /* Now recalculate a score matrix from the probabilistic basis */ printf("Before:\n"); esl_scorematrix_Write(stdout, S); printf("After:\n"); esl_scorematrix_SetFromProbs(S, lambda, P2, fi, fj); esl_scorematrix_Write(stdout, S); free(fi); free(fj); esl_dmatrix_Destroy(P1); esl_dmatrix_Destroy(P2); esl_scorematrix_Destroy(S); esl_alphabet_Destroy(abc); esl_getopts_Destroy(go); return 0; } /*::cexcerpt::scorematrix_example::end::*/ #endif /*eslSCOREMATRIX_EXAMPLE*/