/* Easel's foundation. * * Contents: * 1. Exception and fatal error handling. * 2. Memory allocation/deallocation conventions. * 3. Standard banner for Easel miniapplications. * 4. Improved replacements for some C library functions. * 5. Portable drop-in replacements for nonstandard C functions. * 6. Additional string functions, esl_str*() * 7. File path/name manipulation, including tmpfiles. * 8. Typed comparison functions. * 9. Unit tests. * 10. Test driver. * 11. Examples. */ #include "esl_config.h" #include #include #include #include #include #include #include #ifdef HAVE_UNISTD_H #include #endif #ifdef _POSIX_VERSION #include #include #endif #ifdef HAVE_MPI #include /* MPI_Abort() may be used in esl_fatal() or other program killers */ #endif #include "easel.h" #include /***************************************************************** * 1. Exception and fatal error handling. *****************************************************************/ static esl_exception_handler_f esl_exception_handler = NULL; /* Function: esl_fail() * Synopsis: Handle a normal failure code/message before returning to caller. * * Purpose: A "failure" is a normal error that we want to handle * without terminating the program; we're going to return * control to the caller with a nonzero error code and * (optionally) an informative error message formatted * in . * * is called internally by the * and macros (see easel.h). The reason to * have the failure macros call such a simple little * function is to give us a convenient debugging * breakpoint. For example, in a <_Validate()> routine that * needs to do a normal return to a caller, you can set a * breakpoint in to see exactly where the * validation failed. */ void esl_fail(char *errbuf, const char *format, ...) { if (format) { va_list ap; /* Check whether we are running as a daemon so we can do the * right thing about logging instead of printing errors */ if (getppid() != 1) { // we aren't running as a daemon, so print the error normally va_start(ap, format); if (errbuf) vsnprintf(errbuf, eslERRBUFSIZE, format, ap); va_end(ap); } else vsyslog(LOG_ERR, format, ap); // SRE: TODO: check this. // looks wrong. I think it needs va_start(), va_end(). // also see two more occurrences, below. } } /* Function: esl_exception() * Synopsis: Throw an exception. * * Purpose: Throw an exception. An "exception" is defined by Easel * as an internal error that shouldn't happen and/or is * outside the user's control; as opposed to "failures", that * are to be expected, and within user control, and * therefore normal. By default, exceptions are fatal. * A program that wishes to be more robust can register * a non-fatal exception handler. * * Easel programs normally call one of the exception-handling * wrappers or , which * handle the overhead of passing in , , * and . is rarely called directly. * * If no custom exception handler has been registered, the * default behavior is to print a brief message to * then , resulting in a nonzero exit code from the * program. Depending on what , , * , and the -formatted * are, this output looks like: * * Fatal exception (source file foo.c, line 42): * Something wicked this way came. * * Additionally, in an MPI parallel program, the default fatal * handler aborts all processes (with ), not just * the one that called . * * Args: errcode - Easel error code, such as eslEINVAL. See easel.h. * use_errno - if TRUE, also use perror() to report POSIX errno message. * sourcefile - Name of offending source file; normally __FILE__. * sourceline - Name of offending source line; normally __LINE__. * format - formatted exception message, followed * by any additional necessary arguments for that * message. * * Returns: void. * * Throws: No abnormal error conditions. (Who watches the watchers?) */ void esl_exception(int errcode, int use_errno, char *sourcefile, int sourceline, char *format, ...) { va_list argp; #ifdef HAVE_MPI int mpiflag; #endif if (esl_exception_handler != NULL) { // If the custom exception handler tries to print to stderr/stdout, the error may get eaten if we're running as a daemon // Not sure how to prevent that, since we can't control what custom handlers get written. va_start(argp, format); (*esl_exception_handler)(errcode, use_errno, sourcefile, sourceline, format, argp); va_end(argp); return; } else { /* Check whether we are running as a daemon so we can do the right thing about logging instead of printing errors */ if (getppid() != 1) { // we're not running as a daemon, so print the error normally fprintf(stderr, "Fatal exception (source file %s, line %d):\n", sourcefile, sourceline); va_start(argp, format); vfprintf(stderr, format, argp); va_end(argp); fprintf(stderr, "\n"); if (use_errno && errno) perror("system error"); fflush(stderr); } else vsyslog(LOG_ERR, format, argp); #ifdef HAVE_MPI MPI_Initialized(&mpiflag); /* we're assuming we can do this, even in a corrupted, dying process...? */ if (mpiflag) MPI_Abort(MPI_COMM_WORLD, 1); #endif abort(); } } /* Function: esl_exception_SetHandler() * Synopsis: Register a different exception handling function. * * Purpose: Register a different exception handling function, * . When an exception occurs, the handler * receives at least four arguments: , , * , and . * * is an Easel error code, such as * . See for a list of all codes. * * is TRUE for POSIX system call failures. The * handler may then use POSIX to format/print an * additional message, using or . * * is the name of the Easel source code file * in which the exception occurred, and is * the line number. * * is a -formatted string, followed by * a containing any additional arguments that * formatted message needs. Your custom exception handler * will probably use or to format * its error message. * * Args: handler - ptr to your custom exception handler. * * Returns: void. * * Throws: (no abnormal error conditions) */ void esl_exception_SetHandler(void (*handler)(int errcode, int use_errno, char *sourcefile, int sourceline, char *format, va_list argp)) { esl_exception_handler = handler; } /* Function: esl_exception_ResetDefaultHandler() * Synopsis: Restore default exception handling. * * Purpose: Restore default exception handling, which is to print * a simple error message to then (see * . * * An example where this might be useful is in a program * that only temporarily wants to catch one or more types * of normally fatal exceptions. * * If the default handler is already in effect, this * call has no effect (is a no-op). * * Args: (void) * * Returns: (void) * * Throws: (no abnormal error conditions) */ void esl_exception_ResetDefaultHandler(void) { esl_exception_handler = NULL; } /* Function: esl_nonfatal_handler() * Synopsis: A trivial example of a nonfatal exception handler. * * Purpose: This serves two purposes. First, it is the simplest * example of a nondefault exception handler. Second, this * is used in test harnesses, when they have * turned on to test that thrown errors * are handled properly when a nonfatal error handler is * registered by the application. * * Args: errcode - Easel error code, such as eslEINVAL. See easel.h. * use_errno - TRUE on POSIX system call failures; use * sourcefile - Name of offending source file; normally __FILE__. * sourceline - Name of offending source line; normally __LINE__. * format - formatted exception message. * argp - containing any additional necessary arguments for * the message. * * Returns: void. * * Throws: (no abnormal error conditions) */ void esl_nonfatal_handler(int errcode, int use_errno, char *sourcefile, int sourceline, char *format, va_list argp) { return; } /* Function: esl_fatal() * Synopsis: Kill a program immediately, for a "violation". * * Purpose: Kill a program for a "violation". In general this should only be used * in development or testing code, not in production * code. The main use of is in unit tests. * Another use is in assertions used in dev code. * * The only other case (and the only case that should be allowed in * production code) is in a true "function" (a function that returns * its answer, rather than an Easel error code), where Easel error * conventions can't be used (because it can't return an error code), * AND the error is guaranteed to be a coding error. For an example, * see , which triggers a violation if the code * checks for an option that isn't in the code. * * In an MPI-parallel program, the entire job is * terminated; all processes are aborted (, * not just the one that called . * * If caller is feeling lazy and just wants to terminate * without any informative message, use . * * Args: format - formatted exception message, followed * by any additional necessary arguments for that * message. * * Returns: (void) * * Throws: (no abnormal error conditions) */ void esl_fatal(const char *format, ...) { va_list argp; #ifdef HAVE_MPI int mpiflag; #endif /* Check whether we are running as a daemon so we can do the right thing about logging instead of printing errors */ if (getppid() != 1) { // we're not running as a daemon, so print the error normally va_start(argp, format); vfprintf(stderr, format, argp); va_end(argp); fprintf(stderr, "\n"); fflush(stderr); } else vsyslog(LOG_ERR, format, argp); #ifdef HAVE_MPI MPI_Initialized(&mpiflag); if (mpiflag) MPI_Abort(MPI_COMM_WORLD, 1); #endif exit(1); } /*---------------- end, error handling conventions --------------*/ /***************************************************************** * 2. Memory allocation/deallocation conventions. *****************************************************************/ /* Function: esl_free() * Synopsis: free(), while allowing ptr to be NULL. * Incept: SRE, Fri Nov 3 17:12:01 2017 * * Purpose: Easel uses a convention of initializing ptrs to be NULL * before allocating them. When cleaning up after errors, a * routine can check for non-NULL ptrs to know what to * free(). Easel code is slightly cleaner if we have a * free() that no-ops on NULL ptrs. */ void esl_free(void *p) { if (p) free(p); } /* Function: esl_Free2D() * * Purpose: Free a 2D pointer array

, where first dimension is * . (That is, the array is .) * Tolerates any of the pointers being NULL, to allow * sparse arrays. * * Returns: void. * * DEPRECATED. Replace with esl_arr2_Destroy() */ void esl_Free2D(void **p, int dim1) { int i; if (p != NULL) { for (i = 0; i < dim1; i++) if (p[i] != NULL) free(p[i]); free(p); } return; } /* Function: esl_Free3D() * * Purpose: Free a 3D pointer array

, where first and second * dimensions are ,. (That is, the array is * .) Tolerates any of the * pointers being NULL, to allow sparse arrays. * * Returns: void. * * DEPRECATED. Replace with esl_arr3_Destroy() */ void esl_Free3D(void ***p, int dim1, int dim2) { int i, j; if (p != NULL) { for (i = 0; i < dim1; i++) if (p[i] != NULL) { for (j = 0; j < dim2; j++) if (p[i][j] != NULL) free(p[i][j]); free(p[i]); } free(p); } } /*------------- end, memory allocation conventions --------------*/ /***************************************************************** * 3. Standard banner for Easel miniapplications. *****************************************************************/ /* Function: esl_banner() * Synopsis: print standard Easel application output header * * Purpose: Print the standard Easel command line application banner * to , constructing it from (the name of the * program) and a short one-line description . * For example, * * might result in: * * \begin{cchunk} * # compstruct :: compare RNA structures * # Easel 0.1 (February 2005) * # Copyright (C) 2004-2007 HHMI Janelia Farm Research Campus * # Freely licensed under the Janelia Software License. * # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - * \end{cchunk} * * would typically be an application's * , rather than a fixed string. This allows the * program to be renamed, or called under different names * via symlinks. Any path in the is discarded; * for instance, if is "/usr/local/bin/esl-compstruct", * "esl-compstruct" is used as the program name. * * Note: * Needs to pick up preprocessor #define's from easel.h, * as set by ./configure: * * symbol example * ------ ---------------- * EASEL_VERSION "0.1" * EASEL_DATE "May 2007" * EASEL_COPYRIGHT "Copyright (C) 2004-2007 HHMI Janelia Farm Research Campus" * EASEL_LICENSE "Freely licensed under the Janelia Software License." * * Returns: on success. * * Throws: on allocation error. * on write error. */ int esl_banner(FILE *fp, const char *progname, char *banner) { char *appname = NULL; int status; if ((status = esl_FileTail(progname, FALSE, &appname)) != eslOK) return status; if (fprintf(fp, "# %s :: %s\n", appname, banner) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(fp, "# Easel %s (%s)\n", EASEL_VERSION, EASEL_DATE) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(fp, "# %s\n", EASEL_COPYRIGHT) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(fp, "# %s\n", EASEL_LICENSE) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (fprintf(fp, "# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n") < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (appname) free(appname); return eslOK; ERROR: if (appname) free(appname); return status; } /* Function: esl_usage() * Synopsis: print standard Easel application usage help line * * Purpose: Given a usage string and the name of the program * , output a standardized usage/help * message. is minimally a one line synopsis like * "[options] ", but it may extend to multiple * lines to explain the command line arguments in more * detail. It should not describe the options; that's the * job of the getopts module, and its * function. * * This is used by the Easel miniapps, and may be useful in * other applications as well. * * As in , the is typically passed * as , and any path prefix is ignored. * * For example, if is , * then * * \begin{cchunk} * esl_usage(stdout, argv[0], "[options] "> * \end{cchunk} * * produces * * \begin{cchunk} * Usage: esl-compstruct [options] * \end{cchunk} * * Returns: on success. * * Throws: on allocation failure. * on write failure. */ int esl_usage(FILE *fp, const char *progname, char *usage) { char *appname = NULL; int status; if ( (status = esl_FileTail(progname, FALSE, &appname)) != eslOK) return status; if (fprintf(fp, "Usage: %s %s\n", appname, usage) < 0) ESL_XEXCEPTION_SYS(eslEWRITE, "write failed"); if (appname) free(appname); return eslOK; ERROR: if (appname) free(appname); return status; } /* Function: esl_dataheader() * Synopsis: Standard #-prefixed header lines for output data table * * Purpose: Print column headers for a space-delimited, fixed-column-width * data table to . * * Takes a variable number of argument pairs. Each pair is * . The absolute value of is the max * width of the column.

and a filename * , and returns the new full pathname through * . If does not already end in the * appropriate delimiter (e.g. / for UNIX), one is added. * * If is NULL, then is just the same as * . Similarly, if already appears to be a * full path (because its first character is a /), then * is ignored and is the same as * . It wouldn't normally make sense for a caller to * call this function with such arguments. * * may be a relative path. For example, * if is "/usr/local" and is "lib/myapp/data", * will be "/usr/local/lib/myapp/data". * * Returns: on success, and puts the path * in ; this string is allocated here, * and must be free'd by caller with . * * Throws: on allocation failure. * on bad argument. * In either case, is returned NULL. * * Xref: squid's FileConcat(). */ int esl_FileConcat(const char *dir, const char *file, char **ret_path) { char *path = NULL; int nd, nf; int status; if (ret_path != NULL) *ret_path = NULL; if (file == NULL) ESL_EXCEPTION(eslEINVAL, "null file"); nd = (dir != NULL)? strlen(dir) : 0; nf = strlen(file); ESL_ALLOC(path, sizeof(char) * (nd+nf+2)); if (dir == NULL) /* 1. silly caller didn't give a path */ strcpy(path, file); else if (*file == eslDIRSLASH) /* 2. is already a path? */ strcpy(path, file); else if (dir[nd-1] == eslDIRSLASH) /* 3. (dir is / terminated) */ sprintf(path, "%s%s", dir, file); else /* 4. / (usual case) */ sprintf(path, "%s%c%s", dir, eslDIRSLASH, file); *ret_path = path; return eslOK; ERROR: if (path != NULL) free(path); if (ret_path != NULL) *ret_path = NULL; return status; } /* Function: esl_FileNewSuffix() * * Purpose: Add a file suffix to ; or if * already has a suffix, replace it with . A suffix is * usually 2-4 letters following a '.' character. Returns * an allocated string containing the result in . * * For example, if is "foo" and is "ssi", * returns "foo.ssi". If is "foo.db" and * is "idx", returns "foo.idx". You can remove a suffix * too; if is "foo.db", and is "", the * result is "foo". * * Caller can either ask for <*ret_newpath> to be a new * allocation by passing <*ret_newpath = NULL>, or can * provide a ptr to a preallocated space. * * Returns: on success, and is set * string ".". Caller is * responsible for free'ing this string, whether it * provided it as preallocated space or asked for a new * allocation. * * Throws: on allocation failure. * * Xref: squid's FileAddSuffix(). */ int esl_FileNewSuffix(const char *filename, const char *sfx, char **ret_newpath) { char *new = *ret_newpath; // caller either provides memory, or asks for allocation w/ char *lastdot; int nf; int status; lastdot = strrchr(filename, '.'); /* check for suffix to replace */ if (lastdot != NULL && strchr(lastdot, eslDIRSLASH) != NULL) lastdot = NULL; /*foo.1/filename case - don't be fooled.*/ nf = (lastdot == NULL)? strlen(filename) : lastdot-filename; if (! new) ESL_ALLOC(new, sizeof(char) * (nf+strlen(sfx)+2)); /* '.' too */ strncpy(new, filename, nf); *(new+nf) = '.'; strcpy(new+nf+1, sfx); *ret_newpath = new; return eslOK; ERROR: if (!(*ret_newpath) && new) free(new); return status; } /* Function: esl_FileEnvOpen() * * Purpose: Looks for a file in a colon-separated list of * directories that is configured in an environment variable * . The first occurrence of file in this directory * list is opened read-only. The open file ptr is returned * through , and the full path name to the file * that was opened is returned through . * Caller can pass NULL in place of or * if it is not interested in one or both of these. * * Does not look in the current directory unless "." is * explicitly in the directory list provided by . * * Note: One reason to pass back to the caller is that * sometimes we're opening the first in a group of files * (for example, a database and its SSI index), and we want * to make sure that after we find the main file, the * caller can look for the auxiliary file(s) in exactly the * same directory. * * Examples: % setenv BLASTDB /nfs/databases/blast-db:/nfs/databases/nr/ * * FILE *fp; * char *path; * int status; * status = esl_FileEnvOpen("swiss42", "BLASTDB", &fp, &path); * * Returns: on success, and provides and ; * is opened here, and must be 'd by caller; * is allocated here, and must be 'd by caller. * * Returns if the file not found in any directory, * or if does not contain any directories to look in. * * Throws: on allocation error. * * Xref: squid's EnvFileOpen(). */ int esl_FileEnvOpen(const char *fname, const char *env, FILE **opt_fp, char **opt_path) { FILE *fp; char *dirlist; /* :-separated list of directories */ char *s, *s2; /* ptrs into elems in env list */ char *path = NULL; int np; int status; fp = NULL; if (opt_fp != NULL) *opt_fp = NULL; if (opt_path != NULL) *opt_path = NULL; if (env == NULL) return eslENOTFOUND; if ((s = getenv(env)) == NULL) return eslENOTFOUND; if (esl_strdup(s, -1, &dirlist) != eslOK) return eslEMEM; np = strlen(fname) + strlen(s) + 2; /* upper bound on full path len */ ESL_ALLOC(path, sizeof(char) * np); s = dirlist; while (s != NULL) { if ((s2 = strchr(s, ':')) != NULL) { *s2 = '\0'; s2++;} /* ~=strtok() */ sprintf(path, "%s%c%s", s, eslDIRSLASH, fname); /* // won't hurt */ if ((fp = fopen(path, "r")) != NULL) break; s = s2; } if (fp == NULL) { free(path); free(dirlist); return eslENOTFOUND; } if (opt_path != NULL) { *opt_path = path; } else free(path); if (opt_fp != NULL) { *opt_fp = fp; } else fclose(fp); free(dirlist); return eslOK; ERROR: if (path != NULL) free(path); if (fp != NULL) fclose(fp); if (dirlist != NULL) free(dirlist); if (opt_path != NULL) *opt_path = NULL; if (opt_fp != NULL) *opt_fp = NULL; return status; } /* Function: esl_tmpfile() * * Purpose: Open a secure temporary handle and return it in * . The file is opened in read-write mode () * with permissions 0600, as an atomic operation using the * POSIX function. * * The argument is a modifiable string that must * end in "XXXXXX" (for example, "esltmpXXXXXX"). The * is used to construct a unique tmpfile name. * * Note that this string must be modifiable; do not declare * it nor because these will not work on some * compilers. Something like that explicitly allocates storage will * suffice. * * The file is opened in a standard temporary file * directory. The path is obtained from the environment * variable ; failing that, from the environment * variable ; and failing that, is used. If the * process is running or , then the * environment variables are ignored, and the temp file is * always created in . * * The created tmpfile is not persistent and is not visible * to a directory listing. The caller may the * and do cycles of reading and/or writing, but * once the is closed, the file disappears. The * caller does not need to or it (and * in fact, cannot do so, because it does not know the * tmpfile's name). * * This function is a secure replacement for ANSI C * , which is said to be insecurely implemented on * some platforms. * * Returns: on success, and now points to a new * stream for the opened tempfile. * * Throws: if a system call (including the call) * fails, and and is returned NULL. One possible * problem is if the temporary directory doesn't exist or * is not writable. This is considered to be a system * error, not a user error, so Easel handles it as an exception. * * Xref: STL11/85. Substantially copied from David Wheeler, * "Secure Programming for Linux and Unix HOWTO", * http://www.dwheeler.com/secure-programs/Secure-Programs-HOWTO/introduction.html. * Copyright (C) 1999-2001 David A. Wheeler. * Licensed under the MIT license; see Appendix C of the HOWTO. * Thanks, David, for the clearest explanation of the issues * that I've seen. * * I also referred to H. Chen, D. Dean, and D. Wagner, * "Model checking one million lines of C code", * In: Network and Distributed System Security Symposium, pp 171-185, * San Diego, CA, February 2004; * http://www.cs.ucdavis.edu/~hchen/paper/ndss04.pdf. * Wheeler's implementation obeys Chen et al's "Property 5", * governing secure use of tempfiles. */ int esl_tmpfile(char *basename6X, FILE **ret_fp) { char *tmpdir = NULL; char *path = NULL; FILE *fp = NULL; int fd; int status; mode_t old_mode; /* Determine what tmp directory to use, and construct the * file name. */ if (getuid() == geteuid() && getgid() == getegid()) { tmpdir = getenv("TMPDIR"); if (tmpdir == NULL) tmpdir = getenv("TMP"); } if (tmpdir == NULL) tmpdir = "/tmp"; if ((status = esl_FileConcat(tmpdir, basename6X, &path)) != eslOK) goto ERROR; old_mode = umask(077); if ((fd = mkstemp(path)) < 0) ESL_XEXCEPTION(eslESYS, "mkstemp() failed."); umask(old_mode); if ((fp = fdopen(fd, "w+b")) == NULL) ESL_XEXCEPTION(eslESYS, "fdopen() failed."); if (unlink(path) < 0) ESL_XEXCEPTION(eslESYS, "unlink() failed."); *ret_fp = fp; free(path); return eslOK; ERROR: if (path != NULL) free(path); if (fp != NULL) fclose(fp); *ret_fp = NULL; return status; } /* Function: esl_tmpfile_named() * * Purpose: Open a persistent temporary file relative to the current * working directory. The file name is constructed from the * argument, which must be a modifiable string * ending in the six characters "XXXXXX". These are * replaced by a unique character string by a call to POSIX * . For example, might be * on input, and on return; or, to * put the tmp file in a subdirectory under the current * working directory, something like * on input resulting in something like * on return. The tmpfile is opened * for reading and writing (in mode with permissions * 0600) and the opened handle is returned through * . * * The created tmpfile is persistent: it will be visible in * a directory listing, and will remain after program * termination unless the caller explicitly removes it by a * or call. * * To use this function securely, if you reopen the * tmpfile, you must only reopen it for reading, not * writing, and you must not trust the contents. * * Because the will be modified, it cannot be * a string constant (especially on a picky compiler like * gcc). You have to declare it with something like * * not * * because a compiler is allowed to make the <*tmpfile> version * a constant. * * Returns: on success, contains the name of the * tmpfile, and contains a new stream for the * opened file. * * on failure, and is returned NULL and * the contents of are undefined. The most * common reason for a failure will be that the caller does * not have write permission for the directory that * is in. Easel handles this as a normal (user) * failure, not an exception, because these permissions are * most likely in the user's control (in contrast to * , which always uses a system * that should always be user-writable on a properly * configured POSIX system). * * Xref: STL11/85. */ int esl_tmpfile_named(char *basename6X, FILE **ret_fp) { FILE *fp; mode_t old_mode; int fd; *ret_fp = NULL; old_mode = umask(077); if ((fd = mkstemp(basename6X)) < 0) return eslFAIL; umask(old_mode); if ((fp = fdopen(fd, "w+b")) == NULL) return eslFAIL; *ret_fp = fp; return eslOK; } /* Function: esl_getcwd() * Synopsis: Gets the path for the current working directory. * * Purpose: Returns the path for the current working directory * in <*ret_cwd>, as reported by POSIX . * <*ret_cmd> is allocated here and must be freed by * the caller. * * Returns: on success, and <*ret_cwd> points to * the pathname of the current working directory. * * If is unavailable on this system, * returns and <*ret_cwd> is . * * If the pathname length exceeds a set limit (16384 char), * returns and <*ret_cwd> is . * * Throws: on allocation failure; <*ret_cwd> is . * on getcwd() failure; <*ret_cwd> is . * * Xref: J7/54. */ int esl_getcwd(char **ret_cwd) { char *cwd = NULL; int status = eslOK; #ifdef _POSIX_VERSION int nalloc = 256; int maxalloc = 16384; do { ESL_ALLOC(cwd, sizeof(char) * nalloc); if (getcwd(cwd, nalloc) == NULL) { if (errno != ERANGE) ESL_XEXCEPTION(eslESYS, "unexpected getcwd() error"); if (nalloc * 2 > maxalloc) { status = eslERANGE; goto ERROR; } free(cwd); cwd = NULL; nalloc *= 2; } } while (cwd == NULL); *ret_cwd = cwd; return status; ERROR: if (cwd) free(cwd); *ret_cwd = NULL; return status; #else *ret_cwd = NULL; return eslEUNIMPLEMENTED; #endif } /*----------------- end of file path/name functions ------------------------*/ /***************************************************************** * 8. Typed comparison routines. *****************************************************************/ /* Function: esl_{DF}Compare() * OBSOLETE. Use esl_{DF}CompareNew() instead. * * Purpose: Compare two floating point scalars and for approximate equality. * Return if equal, if not. * * Equality is defined by being within a relative * epsilon , as <2*fabs(a-b)/(a+b)> $\leq$ . * Additionally, we catch the special cases where * and/or are 0 or -0. If both are, return ; if * one is, check that the absolute value of the other is * $\leq$ . * * and work on and * scalars, respectively. */ int esl_DCompare(double a, double b, double tol) { if (isinf(a) && isinf(b)) return eslOK; if (isnan(a) && isnan(b)) return eslOK; if (!isfinite(a) || !isfinite(b)) return eslFAIL; if (a == b) return eslOK; if (fabs(a) == 0. && fabs(b) <= tol) return eslOK; if (fabs(b) == 0. && fabs(a) <= tol) return eslOK; if (2.*fabs(a-b) / fabs(a+b) <= tol) return eslOK; return eslFAIL; } int esl_FCompare(float a, float b, float tol) { if (isinf(a) && isinf(b)) return eslOK; if (isnan(a) && isnan(b)) return eslOK; if (!isfinite(a) || !isfinite(b)) return eslFAIL; if (a == b) return eslOK; if (fabs(a) == 0. && fabs(b) <= tol) return eslOK; if (fabs(b) == 0. && fabs(a) <= tol) return eslOK; if (2.*fabs(a-b) / fabs(a+b) <= tol) return eslOK; return eslFAIL; } /* Function: esl_DCompareAbs() * OBSOLETE. Use esl_{DF}CompareNew() instead. * * Purpose: Compare two floating point scalars and for * approximate equality, by absolute difference. Return * if equal, if not. * * Equality is defined as for finite * ; or , when either value is not * finite. * * Generally it is preferable to compare floating point * numbers for equality using relative difference: see * , and also Knuth's Seminumerical * Algorithms. However, cases arise where absolute * difference comparison is preferred. One such case is in * comparing the log probability values of DP matrices, * where numerical error tends to accumulate on an absolute * scale, dependent more on the number of terms than on * their magnitudes. DP cells with values that happen to be * very close to zero can have high relative differences. */ int esl_DCompareAbs(double a, double b, double tol) { if (isinf(a) && isinf(b)) return eslOK; if (isnan(a) && isnan(b)) return eslOK; if (!isfinite(a) || !isfinite(b)) return eslFAIL; if (fabs(a-b) <= tol) return eslOK; return eslFAIL; } int esl_FCompareAbs(float a, float b, float tol) { if (isinf(a) && isinf(b)) return eslOK; if (isnan(a) && isnan(b)) return eslOK; if (!isfinite(a) || !isfinite(b)) return eslFAIL; if (fabs(a-b) <= tol) return eslOK; return eslFAIL; } /* Function: esl_{DF}CompareNew() * Synopsis: Compare floating point values for approximate equality, better version. * Incept: SRE, Thu 19 Jul 2018 [Benasque] * * Purpose: Return if and are approximately equal within * relative tolerance tolerance and absolute * tolerance ; if not. * * Equality is defined as $|x0-x| < |x0|*r_tol + a_tol$. * * is the reference value: the true value or the * better estimate. For example, in an iterative * optimization, if you are comparing a new (better) * estimate $x_i$ to a previous (worse) estimate $x_{i-1}$, * is the new, is the old. * * Tolerances and must be $\geq 0$. For a * strictly relative tolerance test, use ; for * strict absolute tolerance, use . * * "Approximate equality" in floating point math: here be * dragons. Usually you want to compare floating point * values by their relative difference . An * of $1e-5$ essentially means they agree up to their first * five digits, regardless of absolute magnitude. However, * relative difference fails for . Using both * and , there is a switch at <|x0| = a_tol * / r_tol>: above this |x0|, dominates, and below * it, does. You typically want , * so the switchover only happens close to zero. * * In floating point math, the smallest possible |x0-x| is * on the order of |x0| times machine epsilon, where * is 2.2e-16 and is 1.2e-7, so * it does not make sense to set smaller than this. * (If you do, the function will require exact equality.) * * Special values follow IEEE754 floating point exact * comparison rules; and have no * effect. Any comparison involving is * . Equal-signed infinities are : * , <-inf==-inf>. * Note that has value 0, not TRUE, so you don't want to * write code like ; you want to * test explicitly against . * * Args: x0 - reference value to compare against (either true, or better estimate) * x - test value * r_tol - relative tolerance * a_tol - absolute tolerance * * Returns: if and are approximately equal. * if not. * * Xref: H5/116 */ int esl_DCompareNew(double x0, double x, double r_tol, double a_tol) { if (isfinite(x0)) { if (fabs(x0 - x) <= r_tol * fabs(x0) + a_tol) return eslOK; } else { if (x0 == x) return eslOK; } // inf=inf, -inf=-inf; -inf!=inf, NaN!=(inf,-inf,NaN) return eslFAIL; } int esl_FCompareNew(float x0, float x, float r_tol, float a_tol) { if (isfinite(x0)) { if (fabs(x0 - x) <= r_tol * fabs(x0) + a_tol) return eslOK; } else { if (x0 == x) return eslOK; } return eslFAIL; } /* Function: esl_CCompare() * Synopsis: Compare two optional strings for equality. * * Purpose: Compare two optional strings and * for equality. * * If they're non- and identical up to their * -terminator, return . * * If they're both (unset), return . * * Otherwise, they're not identical; return . */ int esl_CCompare(char *s1, char *s2) { if (s1 == NULL && s2 == NULL) return eslOK; if (s1 == NULL || s2 == NULL) return eslFAIL; if (strcmp(s1, s2) != 0) return eslFAIL; return eslOK; } /*-------------- end, typed comparison routines --------------------*/ /***************************************************************** * 9. Unit tests. *****************************************************************/ #ifdef eslEASEL_TESTDRIVE static void utest_IsInteger(void) { char *goodones[] = { " 99 " }; char *badones[] = { "", " 99 foo " }; int ngood = sizeof(goodones) / sizeof(char *); int nbad = sizeof(badones) / sizeof(char *); int i; for (i = 0; i < ngood; i++) if (! esl_str_IsInteger(goodones[i])) esl_fatal("esl_str_IsInteger() should have recognized %s", goodones[i]); for (i = 0; i < nbad; i++) if ( esl_str_IsInteger(badones[i])) esl_fatal("esl_str_IsInteger() should not have recognized %s", badones[i]); } static void utest_IsReal(void) { char *goodones[] = { "99", " \t 99", "-99.00", "+99.00e-12", "+0xabc.defp-12", " +INFINITY", "-nan" }; char *badones[] = { "", "FIBB_BOVIN/67-212", /* testing for a fixed bug, 17 Dec 2012, reported by ER */ }; int ngood = sizeof(goodones) / sizeof(char *); int nbad = sizeof(badones) / sizeof(char *); int i; for (i = 0; i < ngood; i++) if (! esl_str_IsReal(goodones[i])) esl_fatal("esl_str_IsReal() should have recognized %s", goodones[i]); for (i = 0; i < nbad; i++) if ( esl_str_IsReal(badones[i])) esl_fatal("esl_str_IsReal() should not have recognized %s", badones[i]); } static void utest_strmapcat(void) { char *msg = "esl_strmapcat() unit test failed"; ESL_DSQ inmap[128]; char *pfx = "testing testing"; char *append = "one two three"; char *bad = "1 2 three"; char *dest; int64_t L1; esl_pos_t L2; int x; /* a simple input map, for testing */ for (x = 0; x < 128; x++) inmap[x] = eslDSQ_ILLEGAL; for (x = 'a'; x < 'z'; x++) inmap[x] = x; for (x = 'A'; x < 'Z'; x++) inmap[x] = x; inmap[' '] = eslDSQ_IGNORED; inmap[0] = '?'; L1 = strlen(pfx); L2 = strlen(append); if ( ( esl_strdup (pfx, L1, &dest)) != eslOK) esl_fatal(msg); if ( ( esl_strmapcat(inmap, &dest, &L1, append, L2)) != eslOK) esl_fatal(msg); if ( strcmp(dest, "testing testingonetwothree") != 0) esl_fatal(msg); free(dest); L1 = -1; L2 = -1; if ( ( esl_strdup (pfx, L1, &dest)) != eslOK) esl_fatal(msg); if ( ( esl_strmapcat(inmap, &dest, &L1, append, L2)) != eslOK) esl_fatal(msg); if ( strcmp(dest, "testing testingonetwothree") != 0) esl_fatal(msg); free(dest); L1 = 0; dest = NULL; if ( ( esl_strmapcat(inmap, &dest, &L1, pfx, -1)) != eslOK) esl_fatal(msg); if ( ( esl_strmapcat(inmap, &dest, &L1, append, -1)) != eslOK) esl_fatal(msg); if ( strcmp(dest, "testingtestingonetwothree") != 0) esl_fatal(msg); free(dest); if ( ( esl_strdup(pfx, -1, &dest)) != eslOK) esl_fatal(msg); L1 = 8; if ( ( esl_strmapcat(inmap, &dest, &L1, bad, -1)) != eslEINVAL) esl_fatal(msg); if ( strcmp(dest, "testing ??three") != 0) esl_fatal(msg); free(dest); } static void utest_strtok(void) { char msg[] = "esl_strtok() unit test failed"; char *teststring; char *s; char *tok; int toklen; char endc; if (esl_strdup("This is\t a sentence.", -1, &teststring) != eslOK) esl_fatal(msg); s = teststring; if (esl_strtok(&s, " ", &tok) != eslOK) esl_fatal(msg); if (strcmp(tok, "This") != 0) esl_fatal(msg); if (*s != 'i') esl_fatal(msg); if (esl_strtok_adv(&s, " \t", &tok, &toklen, &endc) != eslOK) esl_fatal(msg); if (strcmp(tok, "is") != 0) esl_fatal(msg); if (*s != ' ') esl_fatal(msg); if (toklen != 2) esl_fatal(msg); if (endc != '\t') esl_fatal(msg); if (esl_strtok_adv(&s, "\n", &tok, NULL, NULL) != eslOK) esl_fatal(msg); if (strcmp(tok, " a sentence.") != 0) esl_fatal(msg); if (*s != '\0') esl_fatal(msg); free(teststring); } static void utest_sprintf(void) { char msg[] = "unit tests for esl_[v]sprintf() failed"; int num = 99; char *what = "beer"; char *s = NULL; if (esl_sprintf(&s, "%d bottles of %s", num, what) != eslOK) esl_fatal(msg); if (strcmp(s, "99 bottles of beer") != 0) esl_fatal(msg); free(s); if (esl_sprintf(&s, NULL) != eslOK) esl_fatal(msg); if (s != NULL) esl_fatal(msg); } static void utest_FileExists(void) { char msg[] = "FileExists unit test failed"; char tmpfile[32] = "esltmpXXXXXX"; FILE *fp = NULL; #ifdef _POSIX_VERSION struct stat st; mode_t mode; #endif /* create a tmpfile */ if (esl_tmpfile_named(tmpfile, &fp) != eslOK) esl_fatal(msg); fprintf(fp, "Unit test.\n"); fclose(fp); if (! esl_FileExists(tmpfile)) esl_fatal(msg); #ifdef _POSIX_VERSION /* The FileExists doesn't just test existence; it also checks read permission */ if (stat(tmpfile, &st) != 0) esl_fatal(msg); mode = st.st_mode & ~S_IRUSR; if (chmod(tmpfile, mode) != 0) esl_fatal(msg); if (esl_FileExists(tmpfile)) esl_fatal(msg); #endif remove(tmpfile); if (esl_FileExists(tmpfile)) esl_fatal(msg); } static void utest_tmpfile_named(void) { char msg[] = "tmpfile_named unit test failed"; char tmpfile[32] = "esltmpXXXXXX"; FILE *fp = NULL; char buf[256]; if (esl_tmpfile_named(tmpfile, &fp) != eslOK) esl_fatal(msg); fprintf(fp, "Unit test.\n"); fclose(fp); if ((fp = fopen(tmpfile, "r")) == NULL) esl_fatal(msg); if (fgets(buf, 256, fp) == NULL) esl_fatal(msg); if (strcmp(buf, "Unit test.\n") != 0) esl_fatal(msg); fclose(fp); remove(tmpfile); } static void utest_compares(void) { char msg[] = "easel utest_compares failed"; // if (esl_DCompare(-eslINFINITY, eslINFINITY, 1e-5) != eslFAIL) esl_fatal(msg); /* -inf != inf */ // if (esl_DCompare(eslNaN, eslNaN, 1e-5) != eslFAIL) esl_fatal(msg); /* NaN fails in any comparison */ if (esl_DCompare(0., eslNaN, 1e-12) != eslFAIL) esl_fatal(msg); if (esl_DCompare(eslNaN, 0., 1e-12) != eslFAIL) esl_fatal(msg); // if (esl_DCompare(eslINFINITY, eslINFINITY, 1e-12) != eslFAIL) esl_fatal(msg); if (esl_DCompareNew(-eslINFINITY, eslINFINITY, 1e-12, 1e-16) != eslFAIL) esl_fatal(msg); // -inf != inf if (esl_DCompareNew(eslINFINITY, eslINFINITY, 1e-12, 1e-16) != eslOK) esl_fatal(msg); // inf = inf, even though rel and abs diff = inf! if (esl_DCompareNew(-eslINFINITY,-eslINFINITY, 1e-12, 1e-16) != eslOK) esl_fatal(msg); if (esl_DCompareNew(eslNaN, eslNaN, 1e-12, 1e-16) != eslFAIL) esl_fatal(msg); // NaN fails in any comparison if (esl_DCompareNew(0., eslNaN, 1e-12, 1e-16) != eslFAIL) esl_fatal(msg); if (esl_DCompareNew(eslNaN, 0., 1e-12, 1e-16) != eslFAIL) esl_fatal(msg); if (esl_DCompareNew(0., 1e-17, 1e-12, 1e-16) != eslOK) esl_fatal(msg); /* exact comparisons with zero tolerance: eslOK unless a NaN is involved */ if (esl_DCompareNew(0., 0.0, 0.0, 0.0) != eslOK) esl_fatal(msg); if (esl_DCompareNew(eslINFINITY, eslINFINITY, 0.0, 0.0) != eslOK) esl_fatal(msg); if (esl_DCompareNew(-eslINFINITY, -eslINFINITY, 0.0, 0.0) != eslOK) esl_fatal(msg); if (esl_DCompareNew(eslNaN, eslNaN, 0.0, 0.0) != eslFAIL) esl_fatal(msg); /* float versions */ if (esl_FCompareNew(-eslINFINITY, eslINFINITY, 1e-6, 1e-10) != eslFAIL) esl_fatal(msg); if (esl_FCompareNew(eslINFINITY, eslINFINITY, 1e-6, 1e-10) != eslOK) esl_fatal(msg); if (esl_FCompareNew(-eslINFINITY,-eslINFINITY, 1e-6, 1e-10) != eslOK) esl_fatal(msg); if (esl_FCompareNew(eslNaN, eslNaN, 1e-6, 1e-10) != eslFAIL) esl_fatal(msg); if (esl_FCompareNew(0., eslNaN, 1e-6, 1e-10) != eslFAIL) esl_fatal(msg); if (esl_FCompareNew(eslNaN, 0., 1e-6, 1e-10) != eslFAIL) esl_fatal(msg); if (esl_FCompareNew(0., 1e-11, 1e-6, 1e-10) != eslOK) esl_fatal(msg); if (esl_FCompareNew(0., 0.0, 0.0, 0.0) != eslOK) esl_fatal(msg); if (esl_FCompareNew(eslINFINITY, eslINFINITY, 0.0, 0.0) != eslOK) esl_fatal(msg); if (esl_FCompareNew(-eslINFINITY, -eslINFINITY, 0.0, 0.0) != eslOK) esl_fatal(msg); if (esl_FCompareNew(eslNaN, eslNaN, 0.0, 0.0) != eslFAIL) esl_fatal(msg); } #endif /*eslEASEL_TESTDRIVE*/ /***************************************************************** * 10. Test driver. *****************************************************************/ #ifdef eslEASEL_TESTDRIVE /* gcc -g -Wall -o easel_utest -I. -L. -DeslEASEL_TESTDRIVE easel.c -leasel -lm * ./easel_utest */ #include "easel.h" int main(void) { #ifdef eslTEST_THROWING esl_exception_SetHandler(&esl_nonfatal_handler); #endif utest_IsInteger(); utest_IsReal(); utest_strmapcat(); utest_strtok(); utest_sprintf(); utest_FileExists(); utest_tmpfile_named(); utest_compares(); return eslOK; } #endif /*eslEASEL_TESTDRIVE*/ /***************************************************************** * 11. Examples. *****************************************************************/ #ifdef eslEASEL_EXAMPLE /*::cexcerpt::easel_example_tmpfiles::begin::*/ /* gcc -g -Wall -o example -I. -L. -DeslEASEL_EXAMPLE_TMPFILES easel.c -leasel -lm * ./example */ #include "easel.h" int main(void) { char tmpfile1[32] = "esltmpXXXXXX"; /* a transient, secure tmpfile: 6 X's are important */ char tmpfile2[32] = "esltmpXXXXXX"; /* a named tmpfile */ FILE *fp = NULL; char buf[256]; /* Example of using a secure, unnamed tmpfile. * Note, the new tmpfile is automatically deleted, so to cleanup, just fclose() the FILE */ esl_tmpfile(tmpfile1, &fp); fprintf(fp, "Hello world!\n"); rewind(fp); if (fgets(buf, 256, fp) == NULL) esl_fatal("bad fread()"); printf("first temp file says: %s\n", buf); fclose(fp); /* Example of reasonably securely using a named tmpfile. * To cleanup, must both fclose() the FILE and remove() the file by name */ esl_tmpfile_named(tmpfile2, &fp); fprintf(fp, "Hello insecure world!\n"); fclose(fp); /* tmpfile2 now exists on disk and can be closed/reopened */ fp = fopen(tmpfile2, "r"); if (fgets(buf, 256, fp) == NULL) esl_fatal("bad fread()"); printf("second temp file says: %s\n", buf); fclose(fp); remove(tmpfile2); /* disk file cleanup necessary with this version. */ return eslOK; } /*::cexcerpt::easel_example_tmpfiles::end::*/ #endif /*eslEASEL_EXAMPLE*/