/* Phylogenetic trees. * * SRE, Tue May 2 13:54:30 2006 [St. Louis] */ #ifndef eslTREE_INCLUDED #define eslTREE_INCLUDED #include #include "esl_dmatrix.h" #include "esl_random.h" /* Object: ESL_TREE * * All trees are represented as rooted trees, starting from * node 0. For N taxa, there are N-1 internal nodes, numbered * 0..N-2. Taxa on leaves are numbered 0..N-1, and represented * in and as negative numbers. * */ typedef struct { int N; /* number of taxa */ /* (Mandatory) information for the internal nodes of a rooted tree. * There are N-1 nodes, numbered 0..N-2, with the root at 0, * so each array below is indexed [0..N-2]. * When an internal node has a left or right branch to a taxon, * / are <= 0, negative ; if they're to * be used as array indices, flip their sign. * There is no ambiguity between taxon 0/root node 0, because * a taxon can't be a parent, and the root node can't be a child. * For an unrooted tree, by convention, taxon 0 is the outgroup: T->left[0] = 0, * and T->rd[0] = 0.0. */ int *parent; /* index of parent of node: values are 0..N-2; parent of root 0 = 0 */ int *left; /* index of left child: values are -(N-1)..0=taxa; 1..N-2=nodes */ int *right; /* index of right child: values are -(N-1)..0=taxa; 1..N-2=nodes */ double *ld; /* left branch length under node: values are >= 0 */ double *rd; /* right branch length under node: values are >= 0 */ /* in linkage trees, ld[x]=rd[x]= "height" (linkage value) of node, not branch lengths */ /* Derived (optional) information, that we can reconstruct if * we need to from the mandatory info above. */ int *taxaparent; /* for taxa [0..N-1]: index of its parent node, 0..N-2. [esl_tree_SetTaxaParents()] */ int *cladesize; /* for nodes [0..N-2]: # taxa in this clade, 1..N [esl_tree_SetCladesizes()] */ /* Optional information */ char **taxonlabel; /* labels for taxa: [0..N-1] array of char strings */ char **nodelabel; /* labels for nodes: [0..N-2] array of char strings */ /* Tree mode options. */ int is_linkage_tree; /* TRUE if this is a linkage tree; if FALSE, it's an additive tree */ /* Tree output options. */ int show_unrooted; /* TRUE to output 'root' as a trifurcation (a la PHYLIP) */ int show_node_labels; /* TRUE to output labels for interior nodes */ int show_root_branchlength; /* TRUE to show 0.0 branch length to root node (a la TreeAlign) */ int show_branchlengths; /* TRUE to output branch lengths */ int show_quoted_labels; /* TRUE to output ALL labels as quoted labels */ int show_numeric_taxonlabels;/* TRUE to output taxa labels as their 0..N-1 indices if no other taxonlabel is present */ /* Memory allocation information, when growing a tree (on input, for example) */ int nalloc; /* current allocated # of taxa */ } ESL_TREE; /* UPGMA, average-link, minimum-link, and maximum-link clustering are * all implemented by one algorithm, cluster_engine(), in esl_tree.c. * We define some flags (within the scope of the tree module) to * control the behavior, as we call the algorithm engine from four * different API functions. */ #define eslUPGMA 0 #define eslWPGMA 1 #define eslSINGLE_LINKAGE 2 #define eslCOMPLETE_LINKAGE 3 /* 1. The ESL_TREE object. */ extern ESL_TREE *esl_tree_Create(int ntaxa); extern ESL_TREE *esl_tree_CreateGrowable(int nalloc); extern ESL_TREE *esl_tree_CreateFromString(char *s); extern int esl_tree_Grow(ESL_TREE *T); extern int esl_tree_SetTaxaParents(ESL_TREE *T); extern int esl_tree_SetCladesizes(ESL_TREE *T); extern int esl_tree_SetTaxonlabels(ESL_TREE *T, char **names); extern int esl_tree_RenumberNodes(ESL_TREE *T); extern int esl_tree_VerifyUltrametric(ESL_TREE *T); extern int esl_tree_Validate(ESL_TREE *T, char *errbuf); extern void esl_tree_Destroy(ESL_TREE *T); /* 2. Newick format i/o */ extern int esl_tree_WriteNewick(FILE *fp, ESL_TREE *T); extern int esl_tree_ReadNewick(FILE *fp, char *errbuf, ESL_TREE **ret_T); /* 3. Tree comparison algorithms. */ extern int esl_tree_Compare(ESL_TREE *T1, ESL_TREE *T2); /* 4. Clustering algorithms for distance-based tree construction. */ extern int esl_tree_UPGMA(ESL_DMATRIX *D, ESL_TREE **ret_T); extern int esl_tree_WPGMA(ESL_DMATRIX *D, ESL_TREE **ret_T); extern int esl_tree_SingleLinkage(ESL_DMATRIX *D, ESL_TREE **ret_T); extern int esl_tree_CompleteLinkage(ESL_DMATRIX *D, ESL_TREE **ret_T); /* 5. Generating simulated trees. */ extern int esl_tree_Simulate(ESL_RANDOMNESS *r, int N, ESL_TREE **ret_T); extern int esl_tree_ToDistanceMatrix(ESL_TREE *T, ESL_DMATRIX **ret_D); #endif /*eslTREE_INCLUDED*/