1 /*
2 * Copyright (C) 2012 Alberto Irurueta Carro (alberto@irurueta.com)
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 package com.irurueta.numerical;
17
18 import com.irurueta.algebra.ArrayUtils;
19
20 import java.util.Arrays;
21
22 /**
23 * Class to estimate the most likely value from a series of samples assumed to
24 * be normally distributed.
25 * This implementation will compute a histogram of the probability distribution
26 * function of all the provided samples.
27 * The probability distribution function is computed by aggregating all the
28 * samples as a series of Gaussian functions with a small sigma and centered at
29 * the exact value of the sample.
30 */
31 public class HistogramMaximumLikelihoodEstimator extends MaximumLikelihoodEstimator {
32
33 /**
34 * Default number of bins to be used on the histogram.
35 */
36 public static final int DEFAULT_NUMBER_OF_BINS = 100;
37
38 /**
39 * Minimum number of bins allowed on the histogram.
40 */
41 public static final int MIN_NUMBER_OF_BINS = 2;
42
43 /**
44 * Value to be considered as the machine precision.
45 */
46 public static final double EPS = 1e-9;
47
48 /**
49 * Number of bins to be used on the histogram. The larger the value the
50 * more precise results will be but more data will be needed to obtain
51 * statistically meaningful data so that enough samples are present on each
52 * bin.
53 */
54 private int numberOfBins;
55
56 /**
57 * Number of samples contained on each bin. The number of samples is not
58 * an integer because the Gaussians of each sample will only have a value
59 * equal to one on their respective centers, but at other locations, only
60 * the fractional part is added to each bin. Using Gaussians on each sample
61 * behave as an interpolation of the location of each sample around a
62 * certain value.
63 */
64 private double[] bins;
65
66 /**
67 * Array containing the Gaussian values corresponding to each bin for a
68 * single sample. The values of each array are updated for each sample and
69 * aggregated to the values of the bins array.
70 */
71 private double[] gaussian;
72
73 /**
74 * Empty constructor.
75 */
76 public HistogramMaximumLikelihoodEstimator() {
77 super();
78 numberOfBins = DEFAULT_NUMBER_OF_BINS;
79 bins = null;
80 gaussian = null;
81 }
82
83 /**
84 * Constructor.
85 *
86 * @param gaussianSigma Gaussian sigma to be used on each sample.
87 * @param numberOfBins Number of bins to be used on the histogram.
88 * @throws IllegalArgumentException Raised if provided Gaussian sigma is
89 * negative or zero, or if provided number of bins is smaller than the
90 * minimum allowed number of bins.
91 */
92 public HistogramMaximumLikelihoodEstimator(final double gaussianSigma, final int numberOfBins) {
93 super(gaussianSigma);
94 internalSetNumberOfBins(numberOfBins);
95 bins = null;
96 gaussian = null;
97 }
98
99 /**
100 * Constructor.
101 *
102 * @param inputData Array containing input data where most likely value must
103 * be estimated from.
104 * @param gaussianSigma Gaussian sigma to be used on each sample.
105 * @param numberOfBins Number of bins to be used on the histogram.
106 * @throws IllegalArgumentException Raised if provided Gaussian sigma is
107 * negative or zero, or if provided number of bins is smaller than the
108 * minimum allowed number of bins.
109 */
110 public HistogramMaximumLikelihoodEstimator(final double[] inputData, final double gaussianSigma,
111 final int numberOfBins) {
112 super(inputData, gaussianSigma);
113 internalSetNumberOfBins(numberOfBins);
114 bins = null;
115 gaussian = null;
116 }
117
118 /**
119 * Constructor.
120 *
121 * @param minValue Minimum value assumed to be contained within input data
122 * array.
123 * @param maxValue Maximum value assumed to be contained within input data
124 * array.
125 * @param inputData Array containing input data where most likely value must
126 * be estimated from.
127 * @param gaussianSigma Gaussian sigma to be used on each sample.
128 * @param numberOfBins Number of bins to be used on the histogram.
129 * @throws IllegalArgumentException Raised if provided Gaussian sigma is
130 * negative or zero, or if provided number of bins is smaller than the
131 * minimum allowed number of bins or if minValue < maxValue.
132 */
133 public HistogramMaximumLikelihoodEstimator(
134 final double minValue, final double maxValue, final double[] inputData, final double gaussianSigma,
135 final int numberOfBins) {
136 super(minValue, maxValue, inputData, gaussianSigma);
137 internalSetNumberOfBins(numberOfBins);
138 bins = null;
139 gaussian = null;
140 }
141
142
143 /**
144 * Returns method to be used for maximum likelihood estimation, which for
145 * this class is MaximumLikelihoodEstimatorMethod.
146 * HISTOGRAM_MAXIMUM_LIKELIHOOD_ESTIMATOR.
147 *
148 * @return Method for maximum likelihood estimation.
149 */
150 @Override
151 public MaximumLikelihoodEstimatorMethod getMethod() {
152 return MaximumLikelihoodEstimatorMethod.HISTOGRAM_MAXIMUM_LIKELIHOOD_ESTIMATOR;
153 }
154
155 /**
156 * Returns number of bins to be used on the histogram. The larger the value
157 * the more precise results will be but more data will be needed to obtain
158 * statistically meaningful data so that enough samples are present on each
159 * bin.
160 *
161 * @return Number of bins to be used on the histogram.
162 */
163 public int getNumberOfBins() {
164 return numberOfBins;
165 }
166
167 /**
168 * Sets number of bins to be used on the histogram. The larger the provided
169 * value being set the more precise results will be but more data will be
170 * needed to obtain statistically meaningful data so that enough samples are
171 * present on each bin.
172 *
173 * @param numberOfBins Number of bins to be used on the histogram.
174 * @throws LockedException Exception raised if this instance is locked.
175 * This method can only be executed when computations finish and this
176 * instance becomes unlocked.
177 * @throws IllegalArgumentException Raised if provided value is lower than
178 * the allowed minimum.
179 */
180 public void setNumberOfBins(final int numberOfBins) throws LockedException {
181 if (isLocked()) {
182 throw new LockedException();
183 }
184 internalSetNumberOfBins(numberOfBins);
185 }
186
187 /**
188 * Starts the estimation of the most likely value contained within provided
189 * input data array.
190 *
191 * @return The most likely value.
192 * @throws LockedException Exception raised if this instance is locked.
193 * This method can only be executed when computations finish and this
194 * instance becomes unlocked.
195 * @throws NotReadyException Exception raised if this instance is not yet
196 * ready.
197 * @see #isReady()
198 */
199 @Override
200 public double estimate() throws LockedException, NotReadyException {
201 if (isLocked()) {
202 throw new LockedException();
203 }
204 if (!isReady()) {
205 throw new NotReadyException();
206 }
207
208 locked = true;
209
210 if (!areMinMaxAvailable) {
211 computeMinMaxValues();
212 }
213
214 if ((maxValue - minValue) < EPS) {
215 // min-max limits are almost equal, so we return it as the solution
216 locked = false;
217 return (minValue + maxValue) * 0.5;
218 }
219
220 // Create histogram (initialized to 0.0)
221 if (bins == null || bins.length != numberOfBins) {
222 bins = new double[numberOfBins];
223 }
224 Arrays.fill(bins, 0.0);
225
226
227 // Vector containing gaussians being added to histogram
228 // Data is added to histogram as if it was convoluted with a gaussian
229 // to add smoothness to results
230 if (gaussian == null || gaussian.length != numberOfBins) {
231 gaussian = new double[numberOfBins];
232 }
233
234 // set of data values between bins
235 final var delta = (maxValue - minValue) / (numberOfBins - 1);
236
237 // iterate over input data to add data to histogram
238 double gaussianCenterPos;
239 for (var data : inputData) {
240 gaussianCenterPos = (data - minValue) / delta;
241 computeGaussian(gaussian, gaussianCenterPos);
242
243 // add gaussian centered at input data value to histogram bins
244 ArrayUtils.sum(bins, gaussian, bins);
245 }
246
247 // find location of maximum in histogram
248 var maxBin = 0.0;
249 final double maxFuncValue;
250 int maxPos = 0;
251 for (var i = 0; i < numberOfBins; i++) {
252 if (bins[i] > maxBin) {
253 maxBin = bins[i];
254 maxPos = i;
255 }
256 }
257
258 maxFuncValue = minValue + maxPos * delta;
259
260 locked = false;
261 return maxFuncValue;
262 }
263
264 /**
265 * Internal method to compute values of Gaussian vector assumed to be
266 * centered at provided value. Gaussian values are stored in provided array
267 * so that array can be reused.
268 *
269 * @param gaussian Array containing Gaussian values
270 * @param centerPos Value where Gaussian is centered
271 */
272 protected void computeGaussian(final double[] gaussian, final double centerPos) {
273 final var length = gaussian.length;
274
275 double x;
276 for (var i = 0; i < length; i++) {
277 x = i - centerPos;
278 gaussian[i] = Math.exp(-x * x / (2.0 * gaussianSigma * gaussianSigma))
279 / (Math.sqrt(2.0 * Math.PI) * gaussianSigma);
280 }
281 }
282
283 /**
284 * Internal method to set number of bins to be used on the histogram. The
285 * larger the value being set the more precise results will be but more data
286 * will be needed to obtain statistically meaningful data so that enough
287 * samples are present on each bin.
288 *
289 * @param numberOfBins Number of bins to be used on the histogram.
290 * @throws IllegalArgumentException Raised if provided value is lower than
291 * the allowed minimum.
292 */
293 private void internalSetNumberOfBins(final int numberOfBins) {
294 if (numberOfBins < MIN_NUMBER_OF_BINS) {
295 throw new IllegalArgumentException();
296 }
297
298 this.numberOfBins = numberOfBins;
299 }
300 }