#include "config.h" #include #include #include #include #include #include #include #include #include #include "kseq.h" KSEQ_INIT(int, read) #include "parasail.h" #include "parasail/cpuid.h" #include "parasail/memory.h" #include "parasail/matrix_lookup.h" #include "func_verify_rowcols.h" static int verbose = 0; typedef struct gap_score { int open; int extend; } gap_score_t; gap_score_t gap_scores[] = { {10,1}, {10,2}, {14,2}, {40,2}, {INT_MIN,INT_MIN} }; static inline void parse_sequences( const char *filename, char ***strings_, unsigned long **sizes_, unsigned long *count_) { FILE* fp; kseq_t *seq = NULL; int l = 0; char **strings = NULL; unsigned long *sizes = NULL; unsigned long count = 0; unsigned long memory = 1000; fp = fopen(filename, "r"); if(fp == NULL) { perror("fopen"); exit(1); } strings = malloc(sizeof(char*) * memory); sizes = malloc(sizeof(unsigned long) * memory); seq = kseq_init(fileno(fp)); while ((l = kseq_read(seq)) >= 0) { strings[count] = strdup(seq->seq.s); if (NULL == strings[count]) { perror("strdup"); exit(1); } sizes[count] = seq->seq.l; ++count; if (count >= memory) { char **new_strings = NULL; unsigned long *new_sizes = NULL; memory *= 2; new_strings = realloc(strings, sizeof(char*) * memory); if (NULL == new_strings) { perror("realloc"); exit(1); } strings = new_strings; new_sizes = realloc(sizes, sizeof(unsigned long) * memory); if (NULL == new_sizes) { perror("realloc"); exit(1); } sizes = new_sizes; } } kseq_destroy(seq); fclose(fp); *strings_ = strings; *sizes_ = sizes; *count_ = count; } static inline unsigned long binomial_coefficient( unsigned long n, unsigned long k) { /* from http://blog.plover.com/math/choose.html */ unsigned long r = 1; unsigned long d; if (k > n) { return 0; } for (d = 1; d <= k; d++) { r *= n--; r /= d; } return r; } static inline void k_combination2( unsigned long pos, unsigned long *a, unsigned long *b) { double s; double i = floor(sqrt(2.0 * pos)) - 1.0; if (i <= 1.0) { i = 1.0; } s = i * (i - 1.0) / 2.0; while (pos - s >= i) { s += i; i += 1; } *a = (unsigned long)(pos - s); *b = (unsigned long)(i); } static inline int diff_array( unsigned long size, int *a, int *b) { unsigned long i = 0; for (i=0; iname; if (verbose) printf("\t%s\n", matrixname); for (gap_index=0; INT_MIN!=gap_scores[gap_index].open; ++gap_index) { int open = gap_scores[gap_index].open; int extend = gap_scores[gap_index].extend; if (gap.open != INT_MIN && gap.extend != INT_MIN) { open = gap.open; extend = gap.extend; } if (verbose) printf("\t\topen=%d extend=%d\n", open, extend); reference_function = functions[0].pointer; for (function_index=1; NULL!=functions[function_index].pointer; ++function_index) { if (verbose) printf("\t\t\t%s\n", functions[function_index].name); unsigned long saturated = 0; #pragma omp parallel for for (pair_index=0; pair_indexsaturated) { /* no point in comparing a result that saturated */ parasail_result_free(reference_result); parasail_result_free(result); #pragma omp atomic saturated += 1; continue; } if (reference_result->score != result->score) { #pragma omp critical(printer) { printf("%s(%lu,%lu,%d,%d,%s) wrong score (%d!=%d)\n", functions[function_index].name, a, b, open, extend, matrixname, reference_result->score, result->score); } } if (diff_array( sizes[b], reference_result->score_row, result->score_row)) { #pragma omp critical(printer) { printf("%s(%lu,%lu,%d,%d,%s) bad score row\n", functions[function_index].name, a, b, open, extend, matrixname); } } if (diff_array( sizes[a], reference_result->score_col, result->score_col)) { #pragma omp critical(printer) { printf("%s(%lu,%lu,%d,%d,%s) bad score col\n", functions[function_index].name, a, b, open, extend, matrixname); } } if (reference_result->matches_row && diff_array( sizes[b], reference_result->matches_row, result->matches_row)) { #pragma omp critical(printer) { printf("%s(%lu,%lu,%d,%d,%s) bad matches row\n", functions[function_index].name, a, b, open, extend, matrixname); } } if (reference_result->matches_col && diff_array( sizes[a], reference_result->matches_col, result->matches_col)) { #pragma omp critical(printer) { printf("%s(%lu,%lu,%d,%d,%s) bad matches col\n", functions[function_index].name, a, b, open, extend, matrixname); } } if (reference_result->similar_row && diff_array( sizes[b], reference_result->similar_row, result->similar_row)) { #pragma omp critical(printer) { printf("%s(%lu,%lu,%d,%d,%s) bad similar row\n", functions[function_index].name, a, b, open, extend, matrixname); } } if (reference_result->similar_col && diff_array( sizes[a], reference_result->similar_col, result->similar_col)) { #pragma omp critical(printer) { printf("%s(%lu,%lu,%d,%d,%s) bad similar col\n", functions[function_index].name, a, b, open, extend, matrixname); } } if (reference_result->length_row && diff_array( sizes[b], reference_result->length_row, result->length_row)) { #pragma omp critical(printer) { printf("%s(%lu,%lu,%d,%d,%s) bad length row\n", functions[function_index].name, a, b, open, extend, matrixname); } } if (reference_result->length_col && diff_array( sizes[a], reference_result->length_col, result->length_col)) { #pragma omp critical(printer) { printf("%s(%lu,%lu,%d,%d,%s) bad length col\n", functions[function_index].name, a, b, open, extend, matrixname); } } parasail_result_free(reference_result); parasail_result_free(result); } if (verbose && saturated) { printf("%s %d %d %s saturated %lu times\n", functions[function_index].name, open, extend, matrixname, saturated); } } if (gap.open != INT_MIN && gap.extend != INT_MIN) { /* user-specified gap, don't loop */ break; } } } } int main(int argc, char **argv) { unsigned long i = 0; unsigned long seq_count = 0; unsigned long limit = 0; char **sequences = NULL; unsigned long *sizes = NULL; char *endptr = NULL; char *filename = NULL; int c = 0; int test_scores = 1; int test_stats = 0; char *matrixname = NULL; const parasail_matrix_t *matrix = NULL; gap_score_t gap = {INT_MIN,INT_MIN}; while ((c = getopt(argc, argv, "f:m:n:o:e:vsS")) != -1) { switch (c) { case 'f': filename = optarg; break; case 'm': matrixname = optarg; break; case 'n': errno = 0; seq_count = strtol(optarg, &endptr, 10); if (errno) { perror("strtol"); exit(1); } break; case 'o': errno = 0; gap.open = strtol(optarg, &endptr, 10); if (errno) { perror("strtol gap.open"); exit(1); } break; case 'e': errno = 0; gap.extend = strtol(optarg, &endptr, 10); if (errno) { perror("strtol gap.extend"); exit(1); } break; case 'v': verbose = 1; break; case 's': test_stats = 1; break; case 'S': test_scores = 0; break; case '?': if (optopt == 'f' || optopt == 'n') { fprintf(stderr, "Option -%c requires an argument.\n", optopt); } else if (isprint(optopt)) { fprintf(stderr, "Unknown option `-%c'.\n", optopt); } else { fprintf(stderr, "Unknown option character `\\x%x'.\n", optopt); } exit(1); default: fprintf(stderr, "default case in getopt\n"); exit(1); } } if (filename) { parse_sequences(filename, &sequences, &sizes, &seq_count); } else { fprintf(stderr, "no filename specified\n"); exit(1); } /* select the matrix */ if (matrixname) { matrix = parasail_matrix_lookup(matrixname); if (NULL == matrix) { fprintf(stderr, "Specified substitution matrix not found.\n"); exit(1); } } limit = binomial_coefficient(seq_count, 2); printf("%lu choose 2 is %lu\n", seq_count, limit); #if HAVE_SSE2 if (parasail_can_use_sse2()) { if (test_scores) { check_functions(parasail_nw_rowcol_sse2, sequences, sizes, limit, matrix, gap); check_functions(parasail_sg_rowcol_sse2, sequences, sizes, limit, matrix, gap); check_functions(parasail_sw_rowcol_sse2, sequences, sizes, limit, matrix, gap); } if (test_stats) { check_functions(parasail_nw_stats_rowcol_sse2, sequences, sizes, limit, matrix, gap); check_functions(parasail_sg_stats_rowcol_sse2, sequences, sizes, limit, matrix, gap); check_functions(parasail_sw_stats_rowcol_sse2, sequences, sizes, limit, matrix, gap); } } #endif #if HAVE_SSE41 if (parasail_can_use_sse41()) { if (test_scores) { check_functions(parasail_nw_rowcol_sse41, sequences, sizes, limit, matrix, gap); check_functions(parasail_sg_rowcol_sse41, sequences, sizes, limit, matrix, gap); check_functions(parasail_sw_rowcol_sse41, sequences, sizes, limit, matrix, gap); } if (test_stats) { check_functions(parasail_nw_stats_rowcol_sse41, sequences, sizes, limit, matrix, gap); check_functions(parasail_sg_stats_rowcol_sse41, sequences, sizes, limit, matrix, gap); check_functions(parasail_sw_stats_rowcol_sse41, sequences, sizes, limit, matrix, gap); } } #endif #if HAVE_AVX2 if (parasail_can_use_avx2()) { if (test_scores) { check_functions(parasail_nw_rowcol_avx2, sequences, sizes, limit, matrix, gap); check_functions(parasail_sg_rowcol_avx2, sequences, sizes, limit, matrix, gap); check_functions(parasail_sw_rowcol_avx2, sequences, sizes, limit, matrix, gap); } if (test_stats) { check_functions(parasail_nw_stats_rowcol_avx2, sequences, sizes, limit, matrix, gap); check_functions(parasail_sg_stats_rowcol_avx2, sequences, sizes, limit, matrix, gap); check_functions(parasail_sw_stats_rowcol_avx2, sequences, sizes, limit, matrix, gap); } } #endif #if HAVE_KNC { if (test_scores) { check_functions(parasail_nw_rowcol_knc, sequences, sizes, limit, matrix, gap); check_functions(parasail_sg_rowcol_knc, sequences, sizes, limit, matrix, gap); check_functions(parasail_sw_rowcol_knc, sequences, sizes, limit, matrix, gap); } if (test_stats) { check_functions(parasail_nw_stats_rowcol_knc, sequences, sizes, limit, matrix, gap); check_functions(parasail_sg_stats_rowcol_knc, sequences, sizes, limit, matrix, gap); check_functions(parasail_sw_stats_rowcol_knc, sequences, sizes, limit, matrix, gap); } } #endif if (test_scores) { check_functions(parasail_nw_rowcol_disp, sequences, sizes, limit, matrix, gap); check_functions(parasail_sg_rowcol_disp, sequences, sizes, limit, matrix, gap); check_functions(parasail_sw_rowcol_disp, sequences, sizes, limit, matrix, gap); } if (test_stats) { check_functions(parasail_nw_stats_rowcol_disp, sequences, sizes, limit, matrix, gap); check_functions(parasail_sg_stats_rowcol_disp, sequences, sizes, limit, matrix, gap); check_functions(parasail_sw_stats_rowcol_disp, sequences, sizes, limit, matrix, gap); } for (i=0; i