1 /* 2 * Copyright (C) 2012 Alberto Irurueta Carro (alberto@irurueta.com) 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.irurueta.numerical; 17 18 import com.irurueta.algebra.ArrayUtils; 19 20 import java.util.Arrays; 21 22 /** 23 * Class to estimate the most likely value from a series of samples assumed to 24 * be normally distributed. 25 * This implementation will compute a histogram of the probability distribution 26 * function of all the provided samples. 27 * The probability distribution function is computed by aggregating all the 28 * samples as a series of Gaussian functions with a small sigma and centered at 29 * the exact value of the sample. 30 */ 31 public class HistogramMaximumLikelihoodEstimator extends MaximumLikelihoodEstimator { 32 33 /** 34 * Default number of bins to be used on the histogram. 35 */ 36 public static final int DEFAULT_NUMBER_OF_BINS = 100; 37 38 /** 39 * Minimum number of bins allowed on the histogram. 40 */ 41 public static final int MIN_NUMBER_OF_BINS = 2; 42 43 /** 44 * Value to be considered as the machine precision. 45 */ 46 public static final double EPS = 1e-9; 47 48 /** 49 * Number of bins to be used on the histogram. The larger the value the 50 * more precise results will be but more data will be needed to obtain 51 * statistically meaningful data so that enough samples are present on each 52 * bin. 53 */ 54 private int numberOfBins; 55 56 /** 57 * Number of samples contained on each bin. The number of samples is not 58 * an integer because the Gaussians of each sample will only have a value 59 * equal to one on their respective centers, but at other locations, only 60 * the fractional part is added to each bin. Using Gaussians on each sample 61 * behave as an interpolation of the location of each sample around a 62 * certain value. 63 */ 64 private double[] bins; 65 66 /** 67 * Array containing the Gaussian values corresponding to each bin for a 68 * single sample. The values of each array are updated for each sample and 69 * aggregated to the values of the bins array. 70 */ 71 private double[] gaussian; 72 73 /** 74 * Empty constructor. 75 */ 76 public HistogramMaximumLikelihoodEstimator() { 77 super(); 78 numberOfBins = DEFAULT_NUMBER_OF_BINS; 79 bins = null; 80 gaussian = null; 81 } 82 83 /** 84 * Constructor. 85 * 86 * @param gaussianSigma Gaussian sigma to be used on each sample. 87 * @param numberOfBins Number of bins to be used on the histogram. 88 * @throws IllegalArgumentException Raised if provided Gaussian sigma is 89 * negative or zero, or if provided number of bins is smaller than the 90 * minimum allowed number of bins. 91 */ 92 public HistogramMaximumLikelihoodEstimator(final double gaussianSigma, final int numberOfBins) { 93 super(gaussianSigma); 94 internalSetNumberOfBins(numberOfBins); 95 bins = null; 96 gaussian = null; 97 } 98 99 /** 100 * Constructor. 101 * 102 * @param inputData Array containing input data where most likely value must 103 * be estimated from. 104 * @param gaussianSigma Gaussian sigma to be used on each sample. 105 * @param numberOfBins Number of bins to be used on the histogram. 106 * @throws IllegalArgumentException Raised if provided Gaussian sigma is 107 * negative or zero, or if provided number of bins is smaller than the 108 * minimum allowed number of bins. 109 */ 110 public HistogramMaximumLikelihoodEstimator(final double[] inputData, final double gaussianSigma, 111 final int numberOfBins) { 112 super(inputData, gaussianSigma); 113 internalSetNumberOfBins(numberOfBins); 114 bins = null; 115 gaussian = null; 116 } 117 118 /** 119 * Constructor. 120 * 121 * @param minValue Minimum value assumed to be contained within input data 122 * array. 123 * @param maxValue Maximum value assumed to be contained within input data 124 * array. 125 * @param inputData Array containing input data where most likely value must 126 * be estimated from. 127 * @param gaussianSigma Gaussian sigma to be used on each sample. 128 * @param numberOfBins Number of bins to be used on the histogram. 129 * @throws IllegalArgumentException Raised if provided Gaussian sigma is 130 * negative or zero, or if provided number of bins is smaller than the 131 * minimum allowed number of bins or if minValue < maxValue. 132 */ 133 public HistogramMaximumLikelihoodEstimator( 134 final double minValue, final double maxValue, final double[] inputData, final double gaussianSigma, 135 final int numberOfBins) { 136 super(minValue, maxValue, inputData, gaussianSigma); 137 internalSetNumberOfBins(numberOfBins); 138 bins = null; 139 gaussian = null; 140 } 141 142 143 /** 144 * Returns method to be used for maximum likelihood estimation, which for 145 * this class is MaximumLikelihoodEstimatorMethod. 146 * HISTOGRAM_MAXIMUM_LIKELIHOOD_ESTIMATOR. 147 * 148 * @return Method for maximum likelihood estimation. 149 */ 150 @Override 151 public MaximumLikelihoodEstimatorMethod getMethod() { 152 return MaximumLikelihoodEstimatorMethod.HISTOGRAM_MAXIMUM_LIKELIHOOD_ESTIMATOR; 153 } 154 155 /** 156 * Returns number of bins to be used on the histogram. The larger the value 157 * the more precise results will be but more data will be needed to obtain 158 * statistically meaningful data so that enough samples are present on each 159 * bin. 160 * 161 * @return Number of bins to be used on the histogram. 162 */ 163 public int getNumberOfBins() { 164 return numberOfBins; 165 } 166 167 /** 168 * Sets number of bins to be used on the histogram. The larger the provided 169 * value being set the more precise results will be but more data will be 170 * needed to obtain statistically meaningful data so that enough samples are 171 * present on each bin. 172 * 173 * @param numberOfBins Number of bins to be used on the histogram. 174 * @throws LockedException Exception raised if this instance is locked. 175 * This method can only be executed when computations finish and this 176 * instance becomes unlocked. 177 * @throws IllegalArgumentException Raised if provided value is lower than 178 * the allowed minimum. 179 */ 180 public void setNumberOfBins(final int numberOfBins) throws LockedException { 181 if (isLocked()) { 182 throw new LockedException(); 183 } 184 internalSetNumberOfBins(numberOfBins); 185 } 186 187 /** 188 * Starts the estimation of the most likely value contained within provided 189 * input data array. 190 * 191 * @return The most likely value. 192 * @throws LockedException Exception raised if this instance is locked. 193 * This method can only be executed when computations finish and this 194 * instance becomes unlocked. 195 * @throws NotReadyException Exception raised if this instance is not yet 196 * ready. 197 * @see #isReady() 198 */ 199 @Override 200 public double estimate() throws LockedException, NotReadyException { 201 if (isLocked()) { 202 throw new LockedException(); 203 } 204 if (!isReady()) { 205 throw new NotReadyException(); 206 } 207 208 locked = true; 209 210 if (!areMinMaxAvailable) { 211 computeMinMaxValues(); 212 } 213 214 if ((maxValue - minValue) < EPS) { 215 // min-max limits are almost equal, so we return it as the solution 216 locked = false; 217 return (minValue + maxValue) * 0.5; 218 } 219 220 // Create histogram (initialized to 0.0) 221 if (bins == null || bins.length != numberOfBins) { 222 bins = new double[numberOfBins]; 223 } 224 Arrays.fill(bins, 0.0); 225 226 227 // Vector containing gaussians being added to histogram 228 // Data is added to histogram as if it was convoluted with a gaussian 229 // to add smoothness to results 230 if (gaussian == null || gaussian.length != numberOfBins) { 231 gaussian = new double[numberOfBins]; 232 } 233 234 // set of data values between bins 235 final var delta = (maxValue - minValue) / (numberOfBins - 1); 236 237 // iterate over input data to add data to histogram 238 double gaussianCenterPos; 239 for (var data : inputData) { 240 gaussianCenterPos = (data - minValue) / delta; 241 computeGaussian(gaussian, gaussianCenterPos); 242 243 // add gaussian centered at input data value to histogram bins 244 ArrayUtils.sum(bins, gaussian, bins); 245 } 246 247 // find location of maximum in histogram 248 var maxBin = 0.0; 249 final double maxFuncValue; 250 int maxPos = 0; 251 for (var i = 0; i < numberOfBins; i++) { 252 if (bins[i] > maxBin) { 253 maxBin = bins[i]; 254 maxPos = i; 255 } 256 } 257 258 maxFuncValue = minValue + maxPos * delta; 259 260 locked = false; 261 return maxFuncValue; 262 } 263 264 /** 265 * Internal method to compute values of Gaussian vector assumed to be 266 * centered at provided value. Gaussian values are stored in provided array 267 * so that array can be reused. 268 * 269 * @param gaussian Array containing Gaussian values 270 * @param centerPos Value where Gaussian is centered 271 */ 272 protected void computeGaussian(final double[] gaussian, final double centerPos) { 273 final var length = gaussian.length; 274 275 double x; 276 for (var i = 0; i < length; i++) { 277 x = i - centerPos; 278 gaussian[i] = Math.exp(-x * x / (2.0 * gaussianSigma * gaussianSigma)) 279 / (Math.sqrt(2.0 * Math.PI) * gaussianSigma); 280 } 281 } 282 283 /** 284 * Internal method to set number of bins to be used on the histogram. The 285 * larger the value being set the more precise results will be but more data 286 * will be needed to obtain statistically meaningful data so that enough 287 * samples are present on each bin. 288 * 289 * @param numberOfBins Number of bins to be used on the histogram. 290 * @throws IllegalArgumentException Raised if provided value is lower than 291 * the allowed minimum. 292 */ 293 private void internalSetNumberOfBins(final int numberOfBins) { 294 if (numberOfBins < MIN_NUMBER_OF_BINS) { 295 throw new IllegalArgumentException(); 296 } 297 298 this.numberOfBins = numberOfBins; 299 } 300 }