/* * * Sequence handler library by Huzefa Rangwala * Date : 03.01.2007 * * * */ #include /*********************************************************/ /* ! \brief Initializes the gk_seq_t variable \param A pointer to gk_seq_t itself \returns null */ /***********************************************************************/ void gk_seq_init(gk_seq_t *seq) { seq->len = 0; seq->sequence = NULL; seq->pssm = NULL; seq->psfm = NULL; seq->name = NULL; } /***********************************************************************/ /*! \brief This function creates the localizations for the various sequences \param string i.e amino acids, nucleotides, sequences \returns gk_i2cc2i_t variable */ /*********************************************************************/ gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet) { int nsymbols; gk_idx_t i; gk_i2cc2i_t *t; nsymbols = strlen(alphabet); t = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common"); t->n = nsymbols; t->i2c = gk_cmalloc(256, "gk_i2c_create_common"); t->c2i = gk_imalloc(256, "gk_i2c_create_common"); gk_cset(256, -1, t->i2c); gk_iset(256, -1, t->c2i); for(i=0;ii2c[i] = alphabet[i]; t->c2i[(int)alphabet[i]] = i; } return t; } /*********************************************************************/ /*! \brief This function reads a pssm in the format of gkmod pssm \param file_name is the name of the pssm file \returns gk_seq_t */ /********************************************************************/ gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename) { gk_seq_t *seq; gk_idx_t i, j, ii; size_t ntokens, nbytes, len; FILE *fpin; gk_Tokens_t tokens; static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*"; static int PSSMWIDTH = 20; char *header, line[MAXLINELEN]; gk_i2cc2i_t *converter; header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header"); converter = gk_i2cc2i_create_common(AAORDER); gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes); len --; seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM"); gk_seq_init(seq); seq->len = len; seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM"); seq->pssm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM"); seq->psfm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM"); seq->nsymbols = PSSMWIDTH; seq->name = gk_getbasename(filename); fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM"); /* Read the header line */ if (fgets(line, MAXLINELEN-1, fpin) == NULL) errexit("Unexpected end of file: %s\n", filename); gk_strtoupper(line); gk_strtokenize(line, " \t\n", &tokens); for (i=0; isequence[i] = converter->c2i[(int)tokens.list[1][0]]; for (j=0; jpssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]); seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]); } gk_freetokenslist(&tokens); i++; } seq->len = i; /* Reset the length if certain characters were skipped */ gk_free((void **)&header, LTERM); gk_fclose(fpin); return seq; } /**************************************************************************/ /*! \brief This function frees the memory allocated to the seq structure. \param gk_seq_t \returns nothing */ /**************************************************************************/ void gk_seq_free(gk_seq_t *seq) { gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols); gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols); gk_free((void **)&seq->name, &seq->sequence, LTERM); //gk_free((void **)&seq, LTERM); gk_free((void **) &seq, LTERM); }