New classes for error fucntion (and related functions) calculations and for calculating the bivariate probability distribution. Error function is robust and does not have some of the problems seen in the Apache error function.

+/* + * Class BivariateDistribution + */ +package org.lcsim.math.probability; + +/** + * Calculate the probability integral for a set of bins in the x-y plane + * of a bivariate normal distribution (i.e., a 2D Gaussian probability). + * + * The evaluation of the probability integrals is described in: + * + * Alan Genz, "Numerical Computation of Rectangular Bivariate and Trivariate + * Normal and t Probabilities" in Statistics and Computing 14, 151 (2004). + * + * The integration code is adapted from the FORTRAN source at: + * + * http://www.math.wsu.edu/faculty/genz/homepage + * + * @author Richard Partridge + */ +public class BivariateDistribution { + + private int _nx; + private int _ny; + private double _xmin; + private double _ymin; + private double _dx; + private double _dy; + private double[] _h; + private double[] _k; + + // Weights and coordinates for 6 point Gauss-Legendre integration + private double[] _w6 = {0.1713244923791705, 0.3607615730481384, 0.4679139345726904}; + private double[] _x6 = {0.9324695142031522, 0.6612093864662647, 0.2386191860831970}; + + // Weights and coordinates for 12 point Gauss-Legendre integration + private double[] _w12 = {.04717533638651177, 0.1069393259953183, 0.1600783285433464, + 0.2031674267230659, 0.2334925365383547, 0.2491470458134029}; + private double[] _x12 = {0.9815606342467191, 0.9041172563704750, 0.7699026741943050, + 0.5873179542866171, 0.3678314989981802, 0.1252334085114692}; + + // Weights and coordinates for 20 point Gauss-Legendre integration + private double[] _w20 = {.01761400713915212, .04060142980038694, .06267204833410906, + .08327674157670475, 0.1019301198172404, 0.1181945319615184, + 0.1316886384491766, 0.1420961093183821, 0.1491729864726037, + 0.1527533871307259}; + private double[] _x20 = {0.9931285991850949, 0.9639719272779138, 0.9122344282513259, + 0.8391169718222188, 0.7463319064601508, 0.6360536807265150, + 0.5108670019508271, 0.3737060887154196, 0.2277858511416451, + 0.07652652113349733}; + + /** + * Set the locations of the x-coordinate bins + * + * @param nx number of x coordinate bins + * @param xmin minimum x coordinate + * @param dx width of x coordinate bins + */ + public void xBins(int nx, double xmin, double dx) { + _nx = nx; + _xmin = xmin; + _dx = dx; + _h = new double[_nx + 1]; + } + + /** + * Set the locations of the y-coordinate bins + * + * @param ny number of y coordinate bins + * @param ymin minimum y coordinate + * @param dy width of y coordinate bins + */ + public void yBins(int ny, double ymin, double dy) { + _ny = ny; + _ymin = ymin; + _dy = dy; + _k = new double[_ny + 1]; + } + + /** + * Integrate the Gaussian probability distribution over each x-y bins, + * which must be defined before calling this method. + * + * The output is a double array that gives the binned probability + * distribution. The first array index is used to indicate the bin in x + * and the second array index is used to indicate the bin in y. + * + * @param x0 mean x coordinate of Gaussian distribution + * @param y0 mean y coordinate of Gaussian distribution + * @param sigx x coordinate standard deviation + * @param sigy y coordinate standard deviation + * @param rho x-y correlation coefficient + * @return probability distribution + */ + public double[][] Calculate(double x0, double y0, double sigx, double sigy, + double rho) { + + // Calculate the scaled x coordinate for each bin edge + for (int i = 0; i < _nx + 1; i++) { + _h[i] = (_xmin + i * _dx - x0) / sigx; + } + + // Calculate the scaled y coordinate for each bin edge + for (int j = 0; j < _ny + 1; j++) { + _k[j] = (_ymin + j * _dy - y0) / sigy; + } + + // Create the array that will hold the binned probabilities + double[][] bi = new double[_nx][_ny]; + + // Loop over the bin vertices + for (int i = 0; i < _nx + 1; i++) { + for (int j = 0; j < _ny + 1; j++) { + + // Calculate the probability for x>h and y>k for this vertex + double prob = GenzCalc(_h[i], _k[j], rho); + + // Add or subtract this probability from the affected bins. + // The bin probability for bin (0,0) is the sum of the Genz + // probabilities for the (0,0) and (1,1) vertices MINUS the + // sum of the probabilities for the (0,1) and (1,0) vertices + if (i > 0 && j > 0) { + bi[i - 1][j - 1] += prob; + } + if (i > 0 && j < _ny) { + bi[i - 1][j] -= prob; + } + if (i < _nx && j > 0) { + bi[i][j - 1] -= prob; + } + if (i < _nx && j < _ny) { + bi[i][j] += prob; + } + } + } + + return bi; + } + + private double GenzCalc(double dh, double dk, double rho) { + + double twopi = 2. * Math.PI; + + // Declare the Gauss-Legendre constants + int ng; + double[] w; + double[] x; + + if (Math.abs(rho) < 0.3) { + // for rho < 0.3 use 6 point Gauss-Legendre integration + ng = 3; + w = _w6; + x = _x6; + } else if (Math.abs(rho) < 0.75) { + // for 0.3 < rho < 0.75 use 12 point Gauss-Legendre integration + ng = 6; + w = _w12; + x = _x12; + } else { + // for rho > 0.75 use 20 point Gauss-Legendre integration + ng = 10; + w = _w20; + x = _x20; + } + + // Initialize the probability and some local variables + double bvn = 0.; + double h = dh; + double k = dk; + double hk = h * k; + + // For rho < 0.925, integrate equation 3 in the Genz paper + if (Math.abs(rho) < 0.925) { + + // More or less direct port of Genz code follows + // It is fairly easy to match this calculation against equation 3 of + // Genz's paper if you take into account that you need to change + // variables so the integration argument spans the range -1 to 1 + double hs = (h * h + k * k) / 2.; + double asr = Math.asin(rho); + double sn; + for (int i = 0; i < ng; i++) { + sn = Math.sin(asr * (1 - x[i]) / 2.); + bvn += w[i] * Math.exp((sn * hk - hs) / (1 - sn * sn)); + sn = Math.sin(asr * (1 + x[i]) / 2.); + bvn += w[i] * Math.exp((sn * hk - hs) / (1 - sn * sn)); + } + // The factor of asr/2 comes from changing variables so the + // integration is over the range -1 to 1 instead of 0 - asin(rho) + bvn = bvn * asr / (2. * twopi) + Erf.phi(-h) * Erf.phi(-k); + + } else { + // rho > 0.925 - integrate equation 6 in Genz paper with the + // extra term in the Taylor expansion given in equation 7. + // The rest of this code is pretty dense and is a pretty direct + // port of Genz's code. + + if (rho < 0.) { + k = -k; + hk = -hk; + } + + if (Math.abs(rho) < 1.) { + + double as = (1 - rho) * (1 + rho); + double a = Math.sqrt(as); + double bs = (h - k) * (h - k); + double c = (4. - hk) / 8.; + double d = (12. - hk) / 16.; + double asr = -(bs / as + hk) / 2.; + + if (asr > -100.) { + bvn = a * Math.exp(asr) * + (1. - c * (bs - as) * (1. - d * bs / 5.) / 3. + + c * d * as * as / 5.); + } + + if (-hk < 100.) { + double b = Math.sqrt(bs); + bvn -= Math.exp(-hk / 2.) * Math.sqrt(twopi) * Erf.phi(-b / a) * + b * (1 - c * bs * (1 - d * bs / 5.) / 3.); + } + + a = a / 2.; + for (int i = 0; i < ng; i++) { + for (int j = 0; j < 2; j++) { + int is = -1; + if (j > 0) { + is = 1; + } + double xs = Math.pow(a * (is * x[i] + 1), 2); + double rs = Math.sqrt(1 - xs); + asr = -(bs / xs + hk) / 2; + + if (asr > -100) { + double sp = (1 + c * xs * (1 + d * xs)); + double ep = Math.exp(-hk * (1 - rs) / (2 * (1 + rs))) / rs; + bvn += a * w[i] * Math.exp(asr) * (ep - sp); + } + } + } + + bvn = -bvn / twopi; + } + + if (rho > 0) { + bvn = bvn + Erf.phi(-Math.max(h, k)); + } else { + bvn = -bvn; + if (k > h) { + bvn += Erf.phi(k) - Erf.phi(h); + } + } + } + + return Math.max(0, Math.min(1, bvn)); + } +}

lcsim/src/org/lcsim/math/probability

Erf.java added at 1.1

diff -N Erf.java
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ Erf.java	31 Mar 2009 17:10:38 -0000	1.1
@@ -0,0 +1,283 @@

+/*
+ *  Class Erf
+ * 
+ */
+package org.lcsim.math.probability;
+
+/**
+ *
+ * Calculates the following probability integrals:
+ *<p>
+ *   erf(x) <br>
+ *   erfc(x) = 1 - erf(x) <br>
+ *   phi(x) = 0.5 * erfc(-x/sqrt(2)) <br>
+ *   phic(x) = 0.5 * erfc(x/sqrt(2))
+ *<p>
+ * Note that phi(x) gives the probability for an observation smaller than x for
+ * a Gaussian probability distribution with zero mean and unit standard
+ * deviation, while phic(x) gives the probability for an observation larger
+ * than x.
+ *<p>
+ * The algorithms for erf(x) and erfc(x) are based on Schonfelder's work.
+ * See J.L. Schonfelder, "Chebyshev Expansions for the Error and Related
+ * Functions", Math. Comp. 32, 1232 (1978).  The calculations of phi(x)
+ * and phic(x) are trivially calculated using the erfc(x) algorithm.
+ *<p>
+ * Schonfelder's algorithms are advertised to provide "30 digit" accurracy.
+ * Since this level of accuracy exceeds the machine precision for doubles,
+ * summation terms whose relative weight is below machine precision are
+ * dropped.
+ *<p>
+ * In this algorithm, we calculate
+ *<p>
+ *   erf(x) = x* y(t) for |x| < 2 <br>
+ *   erf(x) = 1 - exp(-x*x) * y(t) / x for x >= 2 <br>
+ *   erfc(|x|) = exp(-x*x)*y(|x|) <br>
+ *<p>
+ * The functions y(x) are expanded in terms of Chebyshev polynomials, where
+ * there is a different set of coefficients a[r] for each of the above 3 cases.
+ *<p>
+ *   y(x) = Sum'( a[r] * T(r, t) )
+ *<p>
+ * The notation Sum' indicates that the r = 0 term is divided by 2.
+ *<p>
+ * The variable t is defined as
+ *<p>
+ *   t = ( x*x - 2 ) / 2   for erf(x) with x < 2 <br>
+ *   t = ( 21 - 2*x*x ) / (5 + 2*x*x)   for erf(x) with x >= 2 <br>
+ *   t = ( 4*|x| - 15 ) / ( 4*|x| + 15 )   for erfc(x)
+ *<p>
+ * The code and implementation are based on Alan Genz's FORTRAN source code
+ * that can be found at http://www.math.wsu.edu/faculty/genz/homepage.
+ *<p>
+ * Genz's code was a bit tricky to "reverse engineer", so we go through the
+ * way these calculations are performed in some detail.  Rather than calculate
+ * y(x) directly,  he calculates
+ *<p>
+ *   bm = Sum( a[r] * U(r, t) )    r = 0 : N <br>
+ *   bp = Sum( a[r] * U(r-2, t) )  r = 2 : N
+ *<p>
+ * where U(r, t) are Chebyshev polynomials of the second kind.  The coefficients
+ * a[r] decrease with r, and the value of N is chosen where a[N] / a[0] is
+ * ~10^-16, reflecting the machine precision for doubles.
+ *<p>
+ * The Chebyshev polynomials of the second kind U(r, t) are calculated using the
+ * recursion relation:
+ *<p>
+ *   U(r, t) = 2 * t * U(r-1, t) - U(r-2, t)
+ *<p>
+ * Genz uses the identity
+ *<p>
+ *   T(r, t) = ( U(r, t) - U(r-2, t) ) / 2
+ *<p>
+ * to calculate y(x)
+ *<p>
+ *   y(x) = ( bm - bp ) / 2.
+ *<p>
+ * Note that we get the correct contributions for the r = 0 and r = 1 terms by
+ * ignoring these terms in the bp sum, including getting the desired factor
+ * of 1/2 in the contribution from the r = 0 term.
+ *
+ * @author Richard Partridge
+ */
+public class Erf {
+
+    private static double rtwo = 1.414213562373095048801688724209e0;
+
+    //  Coefficients for the erf(x) calculation with |x| < 2
+    private static double[] a1 = {
+        1.483110564084803581889448079057e0,
+        -3.01071073386594942470731046311e-1,
+        6.8994830689831566246603180718e-2,
+        -1.3916271264722187682546525687e-2,
+        2.420799522433463662891678239e-3,
+        -3.65863968584808644649382577e-4,
+        4.8620984432319048282887568e-5,
+        -5.749256558035684835054215e-6,
+        6.11324357843476469706758e-7,
+        -5.8991015312958434390846e-8,
+        5.207009092068648240455e-9,
+        -4.23297587996554326810e-10,
+        3.1881135066491749748e-11,
+        -2.236155018832684273e-12,
+        1.46732984799108492e-13,
+        -9.044001985381747e-15,
+        5.25481371547092e-16};
+
+    //  Coefficients for the err(x) calculation with x > 2
+    private static double[] a2 = {
+      1.077977852072383151168335910348e0,
+      -2.6559890409148673372146500904e-2,
+      -1.487073146698099509605046333e-3,
+      -1.38040145414143859607708920e-4,
+      -1.1280303332287491498507366e-5,
+      -1.172869842743725224053739e-6,
+      -1.03476150393304615537382e-7,
+      -1.1899114085892438254447e-8,
+      -1.016222544989498640476e-9,
+      -1.37895716146965692169e-10,
+      -9.369613033737303335e-12,
+      -1.918809583959525349e-12,
+      -3.7573017201993707e-14,
+      -3.7053726026983357e-14,
+      2.627565423490371e-15,
+      -1.121322876437933e-15,
+      1.84136028922538e-16};
+
+    //  Coefficients for the erfc(x) calculation
+    private static double[] a3 = {
+        6.10143081923200417926465815756e-1,
+        -4.34841272712577471828182820888e-1,
+        1.76351193643605501125840298123e-1,
+        -6.0710795609249414860051215825e-2,
+        1.7712068995694114486147141191e-2,
+        -4.321119385567293818599864968e-3,
+        8.54216676887098678819832055e-4,
+        -1.27155090609162742628893940e-4,
+        1.1248167243671189468847072e-5,
+        3.13063885421820972630152e-7,
+        -2.70988068537762022009086e-7,
+        3.0737622701407688440959e-8,
+        2.515620384817622937314e-9,
+        -1.028929921320319127590e-9,
+        2.9944052119949939363e-11,
+        2.6051789687266936290e-11,
+        -2.634839924171969386e-12,
+        -6.43404509890636443e-13,
+        1.12457401801663447e-13,
+        1.7281533389986098e-14,
+        -4.264101694942375e-15,
+        -5.45371977880191e-16,
+        1.58697607761671e-16,
+        2.0899837844334e-17,
+        -5.900526869409e-18};
+
+    /**
+     * Calculate the error function
+     * 
+     * @param x argument
+     * @return error function
+     */
+    public static double erf(double x) {
+
+        //  Initialize
+        double xa = Math.abs(x);
+        double erf;
+
+        //  Case 1: |x| < 2
+        if (xa < 2.) {
+
+            //  First calculate 2*t
+            double tt = x*x - 2.;
+
+            //  Initialize the recursion variables.
+            double bm = 0.;
+            double b = 0.;
+            double bp = 0.;
+
+            //  Calculate bm and bp as defined above
+            for (int i = 16; i >= 0; i--) {
+                bp = b;
+                b = bm;
+                bm = tt * b - bp + a1[i];
+            }
+
+            //  Finally, calculate erfc using the Chebyshev polynomial identity
+            erf = x * (bm - bp) / 2.;
+
+        } else {
+
+            //  Case 2: |x| >= 2
+
+            //  First calculate 2*t
+            double tt = (42. - 4 * xa*xa) / (5. + 2 * xa*xa);
+
+            //  Initialize the recursion variables.
+            double bm = 0.;
+            double b = 0.;
+            double bp = 0.;
+
+            //  Calculate bm and bp as defined above
+            for (int i = 16; i >= 0; i--) {
+                bp = b;
+                b = bm;
+                bm = tt * b - bp + a2[i];
+            }
+
+            //  Finally, calculate erfc using the Chebyshev polynomial identity
+            erf = 1. - Math.exp(-x * x) * (bm - bp) / (2. * xa);
+
+            //  Take care of negative argument for case 2
+            if (x < 0.) erf = -erf;
+        }
+
+        //  Finished both cases!
+        return erf;
+    }
+
+    /**
+     * Calculate the error function complement
+     * @param x argument
+     * @return error function complement
+     */
+    public static double erfc(double x) {
+
+        //  Initialize
+        double xa = Math.abs(x);
+        double erfc;
+
+        //  Set phi to 0 when the argument is too big
+        if (xa > 100.) {
+            erfc = 0.;
+        } else {
+
+            //  First calculate 2*t
+            double tt = (8. * xa - 30.) / (4. * xa + 15.);
+
+            //  Initialize the recursion variables.
+            double bm = 0.;
+            double b = 0.;
+            double bp = 0.;
+
+            //  Calculate bm and bp as defined above
+            for (int i = 24; i >= 0; i--) {
+                bp = b;
+                b = bm;
+                bm = tt * b - bp + a3[i];
+            }
+
+            //  Finally, calculate erfc using the Chebyshev polynomial identity
+            erfc = Math.exp(-x * x) * (bm - bp) / 2.;
+        }
+
+        //  Cacluate erfc for negative arguments
+        if (x < 0.) erfc = 2. - erfc;
+
+        return erfc;
+    }
+
+    /**
+     * Calcualate the probability for an observation smaller than x for a
+     * Gaussian probability distribution with zero mean and unit standard
+     * deviation
+     * 
+     * @param x argument
+     * @return probability integral
+     */
+    public static double phi(double x) {
+        return 0.5 * erfc( -x / rtwo);
+    }
+
+    /**
+     * Calculate the probability for an observation larger than x for a
+     * Gaussian probability distribution with zero mean and unit standard
+     * deviation
+     *
+     * @param x argument
+     * @return probability integral
+     */
+    public static double phic(double x) {
+        return 0.5 * erfc(x / rtwo);
+    }
+}
+

Commit in `lcsim/src/org/lcsim/math/probability` on MAIN
`BivariateDistribution.java`	+258	added 1.1
`Erf.java`	+283	added 1.1
	+541