1 /* 2 * Copyright (C) 2018 Alberto Irurueta Carro (alberto@irurueta.com) 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.irurueta.navigation.geodesic; 17 18 /** 19 * An accumulator for sums. 20 * This allows many double precision numbers to be added together with twice the normal 21 * precision. Thus, the effective precision of the sum is 106 bits or about 32 decimal places. 22 * The implementation follows J. R. Shewchuk, 23 * <a href="https://doi.org/10.1007/PL00009321">Adaptive Precision Floating-Point Arithmetic and Fast Robust Geometric 24 * Predicates</a>, Discrete & Computational Geometry 18(3) 305–363 (1997). 25 * In the documentation of the member functions, <i>sum</i> stands for the value currently held in the 26 * accumulator. 27 */ 28 public class Accumulator { 29 30 /** 31 * s + t accumulators for the sum. 32 */ 33 private double s; 34 35 /** 36 * s + t accumulators for the sum. 37 */ 38 private double t; 39 40 /** 41 * Constructor from a double. 42 * 43 * @param y set <i>sum</i> = <i>y</i>. 44 */ 45 public Accumulator(final double y) { 46 s = y; 47 t = 0; 48 } 49 50 /** 51 * Constructor from another Accumulator. 52 * 53 * @param a set <i>sum</i> = <i>a</i>. 54 */ 55 public Accumulator(final Accumulator a) { 56 s = a.s; 57 t = a.t; 58 } 59 60 /** 61 * Sets the value to a double. 62 * 63 * @param y set <i>sum</i> = <i>y</i>. 64 */ 65 public void set(final double y) { 66 s = y; 67 t = 0; 68 } 69 70 /** 71 * Returns the value held in the accumulator. 72 * 73 * @return <i>sum</i>. 74 */ 75 public double getSum() { 76 return s; 77 } 78 79 /** 80 * Returns the result of adding a number to <i>sum</i> (but don't change <i>sum</i>). 81 * 82 * @param y the number to be added to the sum. 83 * @return <i>sum</i> + <i>y</i>. 84 */ 85 public double sum(final double y) { 86 final var a = new Accumulator(this); 87 a.add(y); 88 return a.s; 89 } 90 91 /** 92 * Add a number to the accumulator. 93 * 94 * @param y set <i>sum</i> += <i>y</i>. 95 */ 96 public void add(double y) { 97 // Here's Shewchuk's solution... 98 99 // hold exact sum as [s, t, u] 100 double u; 101 102 // accumulate starting at least significant end 103 var r = GeoMath.sum(y, t); 104 y = r.getFirst(); 105 u = r.getSecond(); 106 107 r = GeoMath.sum(y, s); 108 s = r.getFirst(); 109 t = r.getSecond(); 110 111 // Start is mS, mT decreasing and non-adjacent. Sum is now (s + t + u) exactly with s, t, u 112 // non-adjacent and in decreasing order (except for possible zeros). The following code tries 113 // to normalize the result. 114 // Ideally, we want mS = round(s + t + u) and mU = round(s + t + u - mS). The following does an 115 // approximate job (and maintains the decreasing non-adjacent property). Here are two "failures" 116 // using 3-bit floats: 117 118 // Case 1: mS is not equal to round(s + t + u) -- off by 1 ulp 119 // [12, -1] - 8 -> [4, 0, -1] -> [4, -1] = 3 should be [3, 0] = 3 120 121 // Case 2: mS + mT is not as close to s + t + u as it should be 122 // [64, 5] + 4 -> [64, 8, 1] -> [64, 8] = 72 (off by 1) 123 // should be [80, -7] = 73 (exact) 124 125 // "Fixing" these problems is probably not worth the expense. The representation inevitably 126 // leads to small errors in the accumulated values. 127 // The additional errors illustrated here amount to 1 ulp of the less significant word during 128 // each addition to the Accumulator and an additional possible error of 1 ulp in the reported sum. 129 130 // Incidentally, the "ideal" representation described above is not canonical, because 131 // mS = round(mS + mT) may not be true. For example with 3-bit floats: 132 133 // [128, 16] + 1 -> [160, -16] -- 160 = round(145). 134 // But [160, 0] - 16 -> [128, 16] -- 128 = round(144). 135 136 if (s == 0) { 137 // this implies t == 0, so result is u. 138 s = u; 139 } else { 140 // otherwise just accumulate u to t. 141 t += u; 142 } 143 } 144 145 /** 146 * Negate an accumulator. 147 * Set <i>sum</i> = −<i>sum</i>. 148 */ 149 public void negate() { 150 s = -s; 151 t = -t; 152 } 153 }