/*
*
* Sequence handler library by Huzefa Rangwala
* Date : 03.01.2007
*
*
*
*/
#include "GKlib.h"
/*********************************************************/
/* ! \brief Initializes the gk_seq_t variable
\param A pointer to gk_seq_t itself
\returns null
*/
/***********************************************************************/
void gk_seq_init(gk_seq_t *seq)
{
seq->len = 0;
seq->sequence = NULL;
seq->pssm = NULL;
seq->psfm = NULL;
seq->name = NULL;
}
/***********************************************************************/
/*! \brief This function creates the localizations for the various sequences
\param string i.e amino acids, nucleotides, sequences
\returns gk_i2cc2i_t variable
*/
/*********************************************************************/
gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet)
{
int nsymbols;
gk_idx_t i;
gk_i2cc2i_t *t;
nsymbols = strlen(alphabet);
t = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common");
t->n = nsymbols;
t->i2c = gk_cmalloc(256, "gk_i2c_create_common");
t->c2i = gk_imalloc(256, "gk_i2c_create_common");
gk_cset(256, -1, t->i2c);
gk_iset(256, -1, t->c2i);
for(i=0;ii2c[i] = alphabet[i];
t->c2i[(int)alphabet[i]] = i;
}
return t;
}
/*********************************************************************/
/*! \brief This function reads a pssm in the format of gkmod pssm
\param file_name is the name of the pssm file
\returns gk_seq_t
*/
/********************************************************************/
gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename)
{
gk_seq_t *seq;
gk_idx_t i, j, ii;
size_t ntokens, nbytes, len;
FILE *fpin;
gk_Tokens_t tokens;
static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*";
static int PSSMWIDTH = 20;
char *header, line[MAXLINELEN];
gk_i2cc2i_t *converter;
header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header");
converter = gk_i2cc2i_create_common(AAORDER);
gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes);
len --;
seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM");
gk_seq_init(seq);
seq->len = len;
seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM");
seq->pssm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
seq->psfm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
seq->nsymbols = PSSMWIDTH;
seq->name = gk_getbasename(filename);
fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM");
/* Read the header line */
if (fgets(line, MAXLINELEN-1, fpin) == NULL)
errexit("Unexpected end of file: %s\n", filename);
gk_strtoupper(line);
gk_strtokenize(line, " \t\n", &tokens);
for (i=0; isequence[i] = converter->c2i[(int)tokens.list[1][0]];
for (j=0; jpssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]);
seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]);
}
gk_freetokenslist(&tokens);
i++;
}
seq->len = i; /* Reset the length if certain characters were skipped */
gk_free((void **)&header, LTERM);
gk_fclose(fpin);
return seq;
}
/**************************************************************************/
/*! \brief This function frees the memory allocated to the seq structure.
\param gk_seq_t
\returns nothing
*/
/**************************************************************************/
void gk_seq_free(gk_seq_t *seq)
{
gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols);
gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols);
gk_free((void **)&seq->name, &seq->sequence, LTERM);
//gk_free((void **)&seq, LTERM);
gk_free((void **) &seq, LTERM);
}