/* Statistical routines for gamma distributions. * * Contents: * 1. Routines for evaluating densities and distributions * 2. Generic API routines: for general interface w/ histogram module * 3. Dumping plots for files * 4. Sampling * 5. ML fitting to complete data * 6. Unit tests * 7. Test driver * 8. Example * * Xref: STL10/65 * * To do: * - Fit*() functions should return eslEINVAL on n=0, eslENORESULT * on failure due to small n. Compare esl_gumbel. xref J12/93. * SRE, Wed Nov 27 11:18:19 2013 */ #include #include #include #include #include "easel.h" #include "esl_histogram.h" #include "esl_random.h" #include "esl_stats.h" #include "esl_gamma.h" static int tau_by_moments_binned(ESL_HISTOGRAM *g, double mu, double *ret_tau, double *ret_mean, double *ret_logsum); static double tau_function(double tau, double mean, double logsum); /**************************************************************************** * 1. Routines for evaluating densities and distributions ****************************************************************************/ /* Function: esl_gam_pdf() * * Purpose: Calculates the gamma PDF $P(X=x)$ given value , * location parameter , scale parameter , and shape * parameter . */ double esl_gam_pdf(double x, double mu, double lambda, double tau) { double y = lambda * (x - mu); double gamtau; double val; if (y < 0.) return 0.; esl_stats_LogGamma(tau, &gamtau); val = ((tau*log(lambda) + (tau-1.)*log(x-mu)) - gamtau) - y; return exp(val); } /* Function: esl_gam_logpdf() * * Purpose: Calculates log of the probability density function * for the gamma, $\log P(X=x)$, given value , * location parameter , scale parameter , and * shape parameter . */ double esl_gam_logpdf(double x, double mu, double lambda, double tau) { double y = lambda * (x - mu); double gamtau; double val; if (x < 0.) return -eslINFINITY; esl_stats_LogGamma(tau, &gamtau); val = ((tau*log(lambda) + (tau-1.)*log(x-mu)) - gamtau) - y; return val; } /* Function: esl_gam_cdf() * * Purpose: Calculates the cumulative distribution function * for the gamma, $P(X \leq x)$, given value , * location parameter , scale parameter , and * shape parameter . * * (For $\mu=0$, $\lambda = 1$, this is the * incomplete Gamma function $P(\tau,x)$.) */ double esl_gam_cdf(double x, double mu, double lambda, double tau) { double y = lambda * (x - mu); double val; if (y <= 0.) return 0.; esl_stats_IncompleteGamma(tau, y, &val, NULL); return val; } /* Function: esl_gam_logcdf() * * Purpose: Calculates the log of the cumulative distribution function * for the gamma, $\log P(X \leq x)$, given value , location * parameter , scale parameter , and shape * parameter . */ double esl_gam_logcdf(double x, double mu, double lambda, double tau) { double y = lambda * (x - mu); double val; if (y <= 0.) return -eslINFINITY; esl_stats_IncompleteGamma(tau, y, &val, NULL); return log(val); } /* Function: esl_gam_surv() * * Purpose: Calculates the survival function for the gamma, $P(X > x)$, * given value , location parameter , scale parameter * , and shape parameter . */ double esl_gam_surv(double x, double mu, double lambda, double tau) { double y = lambda * (x - mu); double val; if (y <= 0.) return 1.0; esl_stats_IncompleteGamma(tau, y, NULL, &val); return val; } /* Function: esl_gam_logsurv() * * Purpose: Calculates the log of the survival function for the gamma, * $\log P(X > x)$, given value , location parameter , * scale parameter , and shape parameter . * * Relies on , which has limited * dynamic range. Any result of < -700 or so will be -infinity. * To fix this, we need a log version of . */ double esl_gam_logsurv(double x, double mu, double lambda, double tau) { double y = lambda * (x - mu); double val; if (y <= 0.) return 0.; esl_stats_IncompleteGamma(tau, y, NULL, &val); return log(val); } /* Function: esl_gam_invcdf() * * Purpose: Calculates the inverse CDF for a gamma with location * parameter , scale parameter and shape * parameter , returning the value at which the * CDF is

. * * This inverse CDF is solved by a computationally expensive, * brute force bisection search on the CDF of . */ double esl_gam_invcdf(double p, double mu, double lambda, double tau) { double x1, x2, xm; /* low, high guesses at x */ double f2, fm; double tol = 1e-6; x1 = 0.; x2 = tau / lambda; do { /* bracket */ x2 = x2*2.; f2 = esl_gam_cdf(x2, mu, lambda, tau); } while (f2 < p); do { /* bisection */ xm = (x1+x2)/ 2.; fm = esl_gam_cdf(xm, mu, lambda, tau); if (fm > p) x2 = xm; else if (fm < p) x1 = xm; else return xm; /* unlikely exact fm==p */ } while ( (x2-x1)/(x1+x2) > tol); xm = (x1+x2)/2.; return xm; } /*-------------------- end densities & distributions ------------------------*/ /**************************************************************************** * 2. Generic API routines: for general interface w/ histogram module ****************************************************************************/ /* Function: esl_gam_generic_pdf() * * Purpose: Generic-API wrapper around , taking * a void ptr to a double array containing $\mu$, $\lambda$, * $\tau$ parameters. */ double esl_gam_generic_pdf(double x, void *params) { double *p = (double *) params; return esl_gam_pdf(x, p[0], p[1], p[2]); } /* Function: esl_gam_generic_cdf() * * Purpose: Generic-API wrapper around , taking * a void ptr to a double array containing $\mu$, $\lambda$, * $\tau$ parameters. */ double esl_gam_generic_cdf(double x, void *params) { double *p = (double *) params; return esl_gam_cdf(x, p[0], p[1], p[2]); } /* Function: esl_gam_generic_surv() * * Purpose: Generic-API wrapper around , taking * a void ptr to a double array containing $\mu$, $\lambda$, * $\tau$ parameters. */ double esl_gam_generic_surv(double x, void *params) { double *p = (double *) params; return esl_gam_surv(x, p[0], p[1], p[2]); } /* Function: esl_gam_generic_invcdf() * * Purpose: Generic-API wrapper around , taking * a void ptr to a double array containing $\mu$, $\lambda$, * $\tau$ parameters. */ double esl_gam_generic_invcdf(double x, void *params) { double *p = (double *) params; return esl_gam_invcdf(x, p[0], p[1], p[2]); } /*------------------------ end generic API ---------------------------------*/ /**************************************************************************** * 3. Dumping plots for files ****************************************************************************/ /* Function: esl_gam_Plot() * * Purpose: Plot some gamma distribution function (for instance, * ) for parameters , , and , for * a range of values x from to in steps of ; * output to an open stream in xmgrace XY input format. * * Returns: on success. * * Throws: on any system write error, such as a filled disk. */ int esl_gam_Plot(FILE *fp, double mu, double lambda, double tau, double (*func)(double x, double mu, double lambda, double tau), double xmin, double xmax, double xstep) { double x; for (x = xmin; x <= xmax; x += xstep) if (fprintf(fp, "%f\t%g\n", x, (*func)(x, mu, lambda, tau)) < 0) ESL_EXCEPTION_SYS(eslEWRITE, "gamma plot write failed"); if (fprintf(fp, "&\n") < 0) ESL_EXCEPTION_SYS(eslEWRITE, "gamma plot write failed"); return eslOK; } /*-------------------- end plot dumping routines ---------------------------*/ /**************************************************************************** * 4. Sampling ****************************************************************************/ /* Function: esl_gam_Sample() * * Purpose: Sample a gamma-distributed random variate. */ double esl_gam_Sample(ESL_RANDOMNESS *r, double mu, double lambda, double tau) { double x; do { x = esl_rnd_Gamma(r, tau); x = mu + x / lambda; // a roundoff error of 1 + \epsilon = 1 is possible here } while (x == mu); // ... so avoid it. return x; } /*--------------------------- end sampling ---------------------------------*/ /**************************************************************************** * 5. ML fitting to complete data ****************************************************************************/ /* gam_sufficient_stats() * * The mean \frac{\sum_i x_i}{n} and the mean of the logs \frac{\sum_i * log(x_i)}{n} are all we need for calculating the NLL for the gamma, * and its first and second derivatives. * * Throws if any of the x[i] are < \mu. */ static int gam_sufficient_stats(double *x, int n, double mu, double *ret_xbar, double *ret_logxbar) { double xbar = 0.; double logxbar = 0.; int i; int status; for (i = 0; i < n; i++) // this is naive summation, which is numerically unstable for large sums; { // ... double precision should be enough; see esl_vec_DSum() for Kahan compensated summation if you need more if (x[i] - mu < 0) ESL_XEXCEPTION(eslEINVAL, "x[i] < mu in esl_gamma::gamma_sufficient_stats()"); xbar += x[i] - mu; logxbar += (x[i] - mu == 0.0) ? -36. : log(x[i] - mu); // -36. is ~ log(DBL_EPSILON); safeguard against roundoff to 0 } *ret_xbar = xbar / n; *ret_logxbar = logxbar / n; return eslOK; ERROR: *ret_xbar = -eslINFINITY; *ret_logxbar = -eslINFINITY; return status; } /* gam_nll() * * This is the one-parameter (tau only) form of the average NLL per * sample; the objective function for the generalized Newton, obtained * by substituting \lambda = \tau / xbar. See esl_gamma.md notes. * * The NLL can be negative! The gamma is a continuous probability * distribution. * * Throws if tau < 0. */ static int gam_nll(double xbar, double logxbar, double tau, double *ret_nll) { double logp = 0.; double loggamtau; int status; if ((status = esl_stats_LogGamma(tau, &loggamtau)) != eslOK) goto ERROR; logp = tau * log(tau) - tau * log(xbar) - loggamtau + (tau - 1.0) * logxbar - tau; *ret_nll = -logp; return eslOK; ERROR: *ret_nll = eslINFINITY; return status; } /* gam_fitting_engine() * * Implements generalized Newton optimization algorithm * [Minka00,Minka02]; given precalculated means and * and known , obtain ML estimates of the lambda and tau * parameters and return them in <*ret_lambda>, <*ret_tau>. * * Throws if it fails to converge; <*ret_lambda> * and <*ret_tau> are 0.0. * Throws if we try to evaluate a bad value, which * I think can only happen on some sort of internal coding error. */ static int gam_fitting_engine(double xbar, double logxbar, double mu, double *ret_lambda, double *ret_tau) { int max_iterations = 100; int iter = 0; double fx = eslINFINITY; double psi, trigamma; double tau, old_tau; double old_fx; int status; tau = 0.5 / (log(xbar) - logxbar); // initial rough estimate for tau, combining Stirling approx for log Gamma(x) with f'(nll)=0 do { old_fx = fx; old_tau = tau; /* Update tau using generalized Newton, based on fitting g(\tau) = a + b \tau + c \log \tau around current \tau */ if (( status = esl_stats_Psi(tau, &psi)) != eslOK) goto ERROR; if (( status = esl_stats_Trigamma(tau, &trigamma)) != eslOK) goto ERROR; tau = 1.0 / (1.0/tau + (logxbar - log(xbar) + log(tau) - psi) / ( tau - tau * tau * trigamma)); if (( status = gam_nll(xbar, logxbar, tau, &fx)) != eslOK) goto ERROR; iter++; } while ( iter < max_iterations && (esl_DCompare(old_tau, tau, 1e-6, 1e-6) != eslOK || esl_DCompare(old_fx, fx, 1e-6, 1e-6) != eslOK) ); if (iter == max_iterations) ESL_XEXCEPTION(eslENOHALT, "generalized Newton failed to converge in ML Gamma fitting"); *ret_lambda = tau / xbar; *ret_tau = tau; return eslOK; ERROR: *ret_lambda = -eslINFINITY; *ret_tau = -eslINFINITY; return status; } /* Function: esl_gam_FitComplete() * * Purpose: Given complete data consisting of samples , * and a known location parameter , determine and return * maximum likelihood parameters and . * * Args: x - complete gamma-distributed data [0..n-1] * n - number of samples in * mu - known location parameter * ret_lambda - RETURN: ML estimate of lambda * ret_tau - RETURN: ML estimate of tau * * Returns: on success. * * Throws: if convergence fails; * if data cannot be gamma distributed (some ). * ( is also code-possible, but theory-impossible.) */ int esl_gam_FitComplete(double *x, int n, double mu, double *ret_lambda, double *ret_tau) { double xbar; double logxbar; double lambda, tau; int status; if ((status = gam_sufficient_stats(x, n, mu, &xbar, &logxbar)) != eslOK) goto ERROR; if ((status = gam_fitting_engine(xbar, logxbar, mu, &lambda, &tau)) != eslOK) goto ERROR; *ret_lambda = lambda; *ret_tau = tau; return eslOK; ERROR: *ret_lambda = -eslINFINITY; *ret_tau = -eslINFINITY; return status; } /* Function: esl_gam_FitCountHistogram() * Synopsis: Fit a gamma distribution to $c_i$ counts * Incept: SRE, Mon 13 Jun 2022 * * Purpose: Given counts for , with , * fit a gamma approximation to this histogram. * * Counts are doubles, not integers; this allows us to use * weighted counts, and it also allows the total number of * counts to range up to ~9e15 (the maximum exact integer * representation in a double, 2^53). * * Args: c - observed counts c[1..n]; c[0]=0; other c[i>0] >= 0 * n - maximum i in c[i] * ret_lamnda - RETURN: estimated mu * ret_tau - RETURN: estimated sigma * * Returns: on success * * Throws: for problems like c[0] != 0, c[i] < 0, or zero total counts. * if generalized Newton optimization fails to converge (very unlikely) * is code-possible, theory-impossible */ int esl_gam_FitCountHistogram(double *ct, int n, double mu, double *ret_lambda, double *ret_tau) { double xbar = 0.; // \bar{x}; mean x value double logxbar = 0.; // \overline{\log x}; mean of log x double ntot = 0.; int mu_i = lround(mu); double v; int i; int status; if (ceil(mu) != mu) ESL_XEXCEPTION(eslEINVAL, "esl_gam_FitCountHistogram assumes mu is an exact integer representation (e.g. 0.0)"); for (i = 0; i <= mu_i; i++) if (ct[i] != 0.) ESL_XEXCEPTION(eslEINVAL, "esl_gam_FitCountHistogram assumes all ct[i] for i <= mu are 0.0"); for (i = mu_i+1; i <= n; i++) if (ct[i] > 0.) { v = (double) i - mu; xbar += ct[i] * v; logxbar += ct[i] * log(v); ntot += ct[i]; } else if (ct[i] < 0.) ESL_XEXCEPTION(eslEINVAL, "count ct[%d] < 0", i); if (ntot <= 0.) ESL_XEXCEPTION(eslEINVAL, "count histogram ct[] has no counts"); xbar /= ntot; logxbar /= ntot; return gam_fitting_engine(xbar, logxbar, mu, ret_lambda, ret_tau); ERROR: *ret_lambda = -eslINFINITY; *ret_tau = -eslINFINITY; return status; } /* Function: esl_gam_FitCompleteBinned() * * Purpose: Fit a complete gamma distribution to the observed * binned data in a histogram , where each * bin i holds some number of observed samples x with values from * lower bound l to upper bound u (that is, $l < x \leq u$); * determine and return maximum likelihood estimates for the * parameters $\mu, \lambda, \tau$ and * return them in <*ret_mu>, <*ret_lambda>, <*ret_tau>. * * Unlike the case where the * ML fit optimizes $\sum_i n_i \log P(a_i \leq x < b_i)$ * where $a_i \leq b_i$ are the bounds of bin i with * occupancy $n_i$, here we take the approximation that * $c_i = a_i + 0.5*(b_i-a_i)$ and optimize $\log P(a_i * \leq x < b_i) \simeq \log(w) + \log P(x=c_i)$. * * Since $b_i-a_i = w$ is fixed, optimizing the above * becomes equivalent to optimizing $\sum_i n_i * log P(x=c_i)$. * * The optimization is then equivalent to the non-binned case, * but subsituting in averages such as $\sum_i x(i)$ by * $\sum_i n_i*c_i i$, and so forth. * * If the binned data in were set to focus on * a tail by virtual censoring, the "complete" gamma is * fitted to this tail. The caller then also needs to * remember what fraction of the probability mass was in this * tail. * * Args: g - histogram * ret_mu - RETURN: given by the histogram * ret_lambda - RETURN: ML estimate of lambda * ret_tau - RETURN: ML estimate of tau * * Returns: on success. * * Throws: if bracketing or bisection fails; * if data cannot be gamma distributed (some , * or zero variance in x). * * Returns: on success. * * Throws: if dataset is true-censored. * * Note: This is an older implementation than FitComplete(). Here we * use rootfinding to find tau. FitComplete() uses * generalized Newton optimization [Minka00,Minka02], which * is much more efficient (and way cooler). */ int esl_gam_FitCompleteBinned(ESL_HISTOGRAM *g, double *ret_mu, double *ret_lambda, double *ret_tau) { double mu = 0.; double mean, logsum; int i; double c, fc; double a, fa; double b, fb; double tol = 1e-6; int maxit = 100; int status; if (g->dataset_is == COMPLETE) { if (g->is_rounded) mu = esl_histogram_Bin2LBound(g, g->imin); else mu = g->xmin; } else if (g->dataset_is == VIRTUAL_CENSORED) /* i.e., we'll fit to tail */ mu = g->phi; else if (g->dataset_is == TRUE_CENSORED) ESL_EXCEPTION(eslEINVAL, "can't fit true censored dataset"); if ((status = tau_by_moments_binned(g, mu, &c, &mean, &logsum) != eslOK)) goto ERROR; a = b = c; if (c == 1.0) { *ret_mu = mu; *ret_lambda = c / mean; *ret_tau = c; return eslOK; } fc = tau_function(c, mean, logsum); /* Rootfinding, 1.: bracketing the root with points a,b. */ if (fc > 0.) /* fx>0 means tau is too small, search right */ { for (i = 0; i < maxit; i++) /* max iterations */ { b = a * 2.; fb = tau_function(b, mean, logsum); if (fb < 0.) break; /* a,b now bracket */ a = b; /* else fb>0, so b is a better left bracket than a */ } if (i == maxit) ESL_XEXCEPTION(eslENOHALT, "failed to bracket"); } else if (fc < 0.) /* fx<0 means tau is too large, search left */ { for (i = 0; i < maxit; i++) { a = b/2.; fa = tau_function(a, mean, logsum); if (fa > 0.) break; /* a,b now bracket */ b = a; /* else fa<0, so a is a better right bracket than b */ } if (i == maxit) ESL_XEXCEPTION(eslENOHALT, "failed to bracket"); } /* Rootfinding, 2.: Bisection search. * We have the root in interval (a,b). */ for (i = 0; i < maxit; i++) { c = (a+b)/2.; /* bisection */ fc = tau_function(c, mean, logsum); if (fc > 0.) a = c; else if (fc < 0.) b = c; else break; /* unlikely event that we nail it */ if ((b-a) <= tol) { c = (a+b)/2.; break; } } if (i == maxit) ESL_XEXCEPTION(eslENOHALT, "bisection search failed"); *ret_mu = mu; *ret_lambda = (mean > 0.)? c / mean : 0.0; *ret_tau = c; return eslOK; ERROR: *ret_mu = 0.; *ret_lambda = 0.; *ret_tau = 0.; return status; } /* tau_function() * * This is the rootfinding equation for tau... * \ref{eqn:gamma_tau_root} in the documentation. * mean is 1/N \sum (x_i - \mu) * logsum is 1/N \sum \log (x_i - \mu) * These are both independent of tau, and dependent * on all data points, so we require the caller to * precalculate them for us. * * This is a decreasing function of tau: * the return value is > 0 when tau is too small, * and < 0 when tau is too large. */ static double tau_function(double tau, double mean, double logsum) { double psitau; esl_stats_Psi(tau, &psitau); return ( ((log(tau) - psitau) - log(mean)) + logsum ); } /* tau_by_moments_binned() * * Obtain an initial estimate for tau by matching moments. Also * returns mean and logsum, which we need for ML fitting. To obtain a * lambda estimate, use lambda = tau / mean. * * note: the whole method relies on the property log(sum) >= logsum; * which works if all points are valid, that is positive; * log(0) = -inf is not a valid point, * and the inequality (Jensen's inequality) does not hold. */ static int tau_by_moments_binned(ESL_HISTOGRAM *g, double mu, double *ret_tau, double *ret_mean, double *ret_logsum) { int i; double ai, bi, ci; double sum, mean, var, logsum; double tol = 1e-6; sum = mean = var = logsum = 0.; for (i = g->cmin+1; i <= g->imax; i++) /* for each occupied bin */ { if (g->obs[i] == 0) continue; ai = esl_histogram_Bin2LBound(g,i); bi = esl_histogram_Bin2UBound(g,i); ci = ai + 0.5 * (bi-ai); if (ci < mu) ESL_EXCEPTION(eslEINVAL, "No point can be < mu in gamma data"); sum += (double)g->obs[i]; mean += (double)g->obs[i] * (ci-mu); /* mean is temporarily just the sum */ logsum += (ci>mu)? (double)g->obs[i] * log(ci-mu):0.0; var += (double)g->obs[i] * (ci-mu) * (ci-mu); /* var is temporarily the sum of squares */ } var = (sum > 1.)? (var - mean*mean/sum) / (sum-1.) : 0.0; /* now var is the variance */ mean /= (sum > 0.)? sum : 1.; /* and now mean is the mean */ logsum /= (sum > 0.)? sum : 1.; if (ret_tau != NULL) *ret_tau = (var < tol || mean == 0.)? 1. : mean * mean / var; if (ret_mean != NULL) *ret_mean = mean; if (ret_logsum != NULL) *ret_logsum = logsum; return eslOK; } /**************************************************************************** * 6. Unit tests ****************************************************************************/ #ifdef eslGAMMA_TESTDRIVE /* utest_FitComplete() * * This test fails stochastically, because it generates a random data * sample from random choices of lambda and tau. So we run it with a * fixed RNG seed in production code ( is normally * FALSE). * * The 0.5 lower limit is set for the random params because if you let * \tau get small, you get very peaky distributions at the origin, and * numerical error in fitting them [SRE:H13/23]. */ static void utest_FitComplete(ESL_RANDOMNESS *rng, int allow_badluck) { char msg[] = "esl_gamma::FitComplete unit test failed"; double *x = NULL; int n = 1000000; double mu = 10.0; // don't use 0., in case I've idiotically assumed mu=0 somewhere double lambda, tau; // don't randomly initialize these until *after* the allow_badluck RNG reinit double lambda_e, tau_e; // new fit double xbar, logxbar; double nll, nll_e; int i; int status; if (! allow_badluck) esl_randomness_Init(rng, 42); lambda = 0.5 + esl_rnd_UniformPositive(rng) * 9.5; // (0.5,10.0) tau = 0.5 + esl_rnd_UniformPositive(rng) * 9.5; ESL_ALLOC(x, sizeof(double) * n); for (i = 0; i < n; i++) x[i] = esl_gam_Sample(rng, mu, lambda, tau); if ((status = gam_sufficient_stats(x, n, mu, &xbar, &logxbar)) != eslOK) esl_fatal(msg); if ((status = esl_gam_FitComplete(x, n, mu, &lambda_e, &tau_e)) != eslOK) esl_fatal(msg); if ((status = gam_nll(xbar, logxbar, tau, &nll)) != eslOK) esl_fatal(msg); if ((status = gam_nll(xbar, logxbar, tau_e, &nll_e)) != eslOK) esl_fatal(msg); #if 0 printf("true mu = %f\n", mu); printf("true lambda = %f\n", lambda); printf("true tau = %f\n", tau); printf("sample mean = %f\n", xbar); printf("nll at true = %f\n", nll); printf("fit lambda = %f\n", lambda_e); printf("fit tau = %f\n", tau_e); printf("nll at fit = %f\n", nll_e); printf("lambda error = %f rel %f abs\n", (lambda_e-lambda)/lambda, lambda_e-lambda); printf("tau error = %f rel %f abs\n", (tau_e-tau)/tau, tau_e-tau); printf("nll diff = %f\n", nll - nll_e); // this difference should be nonnegative; nll_e <= nll. #endif if ( esl_DCompare(lambda, lambda_e, 1e-2, 1e-2) != eslOK) esl_fatal(msg); if ( esl_DCompare(tau, tau_e, 1e-2, 1e-2) != eslOK) esl_fatal(msg); if ( nll < nll_e) esl_fatal(msg); // it's a convex optimization. free(x); return; ERROR: esl_fatal(msg); } /* utest_FitCountHistogram() * * (This test has no stochastic failures; should always succeed.) */ static void utest_FitCountHistogram(void) { char msg[] = "esl_gamma::FitCountHistogram unit test failed"; double lambda = 0.0053; // an arbitrary lambda double tau = 1.9; // ... and an arbitrary tau double mu = 10.; int mu_i = lround(mu); int maxL = 100000; int totn = 10000000; double *ct = NULL; double lambda_e, tau_e; int i; int status; ESL_ALLOC(ct, sizeof(double) * (maxL+1)); for (i = 0; i <= mu_i; i++) ct[i] = 0.; for (i = mu_i+1; i <= maxL; i++) ct[i] = totn * esl_gam_pdf((double) i, mu, lambda, tau); if (( status = esl_gam_FitCountHistogram(ct, maxL, mu, &lambda_e, &tau_e)) != eslOK) esl_fatal(msg); if ( esl_DCompare(lambda, lambda_e, 1e-3, 1e-3) != eslOK) esl_fatal(msg); if ( esl_DCompare(tau, tau_e, 1e-3, 1e-3) != eslOK) esl_fatal(msg); free(ct); return; ERROR: esl_fatal(msg); } #endif //eslGAMMA_TESTDRIVE /**************************************************************************** * 6. Test driver ****************************************************************************/ #ifdef eslGAMMA_TESTDRIVE #include "esl_getopts.h" static ESL_OPTIONS options[] = { /* name type default env range toggles reqs incomp help docgroup*/ { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "show brief help on version and usage", 0 }, { "-s", eslARG_INT, "0", NULL, NULL, NULL, NULL, NULL, "set random number seed to ", 0 }, { "-x", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "allow bad luck (stochastic failures)", 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, }; static char usage[] = "[-options]"; static char banner[] = "test driver for lognormal module"; int main(int argc, char **argv) { ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 0, argc, argv, banner, usage); ESL_RANDOMNESS *rng = esl_randomness_Create(esl_opt_GetInteger(go, "-s")); int allow_badluck = esl_opt_GetBoolean(go, "-x"); // if a utest can fail just by chance, let it, instead of suppressing fprintf(stderr, "## %s\n", argv[0]); fprintf(stderr, "# rng seed = %" PRIu32 "\n", esl_randomness_GetSeed(rng)); utest_FitCountHistogram(); /* tests that can stochastically fail go last, because they default to reinit'ing RNG w/ fixed seed */ utest_FitComplete(rng, allow_badluck); fprintf(stderr, "# status = ok\n"); esl_randomness_Destroy(rng); esl_getopts_Destroy(go); return eslOK; } #endif //eslGAMMA_TESTDRIVE /**************************************************************************** * Example main() ****************************************************************************/ #ifdef eslGAMMA_EXAMPLE /*::cexcerpt::gam_example::begin::*/ #include #include "easel.h" #include "esl_random.h" #include "esl_histogram.h" #include "esl_gamma.h" int main(int argc, char **argv) { double mu = -5.0; double lambda = 2.0; double tau = 0.7; ESL_HISTOGRAM *h = esl_histogram_CreateFull(mu, 100., 0.1); ESL_RANDOMNESS *r = esl_randomness_Create(0); int n = 10000; double elam, etau; int i; double x; double *data; int ndata; /* Take gamma-distributed random samples. */ for (i = 0; i < n; i++) { x = esl_gam_Sample(r, mu, lambda, tau); esl_histogram_Add(h, x); } esl_histogram_GetData(h, &data, &ndata); /* Plot the empirical (sampled) and expected survivals */ esl_histogram_PlotSurvival(stdout, h); esl_gam_Plot(stdout, mu, lambda, tau, &esl_gam_surv, h->xmin, h->xmax, 0.1); /* ML fit to complete data, and plot fitted survival curve */ esl_gam_FitComplete(data, ndata, mu, &elam, &etau); esl_gam_Plot(stdout, mu, elam, etau, &esl_gam_surv, h->xmin, h->xmax, 0.1); esl_randomness_Destroy(r); esl_histogram_Destroy(h); return 0; } /*::cexcerpt::gam_example::end::*/ #endif /*eslGAMMA_EXAMPLE*/