View Javadoc
1   /*
2    * Copyright (C) 2012 Alberto Irurueta Carro (alberto@irurueta.com)
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *         http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   */
16  package com.irurueta.numerical;
17  
18  import com.irurueta.algebra.ArrayUtils;
19  
20  import java.util.Arrays;
21  
22  /**
23   * Class to estimate the most likely value from a series of samples assumed to
24   * be normally distributed.
25   * This implementation will compute a histogram of the probability distribution
26   * function of all the provided samples.
27   * The probability distribution function is computed by aggregating all the
28   * samples as a series of Gaussian functions with a small sigma and centered at
29   * the exact value of the sample.
30   */
31  public class HistogramMaximumLikelihoodEstimator extends MaximumLikelihoodEstimator {
32  
33      /**
34       * Default number of bins to be used on the histogram.
35       */
36      public static final int DEFAULT_NUMBER_OF_BINS = 100;
37  
38      /**
39       * Minimum number of bins allowed on the histogram.
40       */
41      public static final int MIN_NUMBER_OF_BINS = 2;
42  
43      /**
44       * Value to be considered as the machine precision.
45       */
46      public static final double EPS = 1e-9;
47  
48      /**
49       * Number of bins to be used on the histogram. The larger the value the
50       * more precise results will be but more data will be needed to obtain
51       * statistically meaningful data so that enough samples are present on each
52       * bin.
53       */
54      private int numberOfBins;
55  
56      /**
57       * Number of samples contained on each bin. The number of samples is not
58       * an integer because the Gaussians of each sample will only have a value
59       * equal to one on their respective centers, but at other locations, only
60       * the fractional part is added to each bin. Using Gaussians on each sample
61       * behave as an interpolation of the location of each sample around a
62       * certain value.
63       */
64      private double[] bins;
65  
66      /**
67       * Array containing the Gaussian values corresponding to each bin for a
68       * single sample. The values of each array are updated for each sample and
69       * aggregated to the values of the bins array.
70       */
71      private double[] gaussian;
72  
73      /**
74       * Empty constructor.
75       */
76      public HistogramMaximumLikelihoodEstimator() {
77          super();
78          numberOfBins = DEFAULT_NUMBER_OF_BINS;
79          bins = null;
80          gaussian = null;
81      }
82  
83      /**
84       * Constructor.
85       *
86       * @param gaussianSigma Gaussian sigma to be used on each sample.
87       * @param numberOfBins  Number of bins to be used on the histogram.
88       * @throws IllegalArgumentException Raised if provided Gaussian sigma is
89       *                                  negative or zero, or if provided number of bins is smaller than the
90       *                                  minimum allowed number of bins.
91       */
92      public HistogramMaximumLikelihoodEstimator(final double gaussianSigma, final int numberOfBins) {
93          super(gaussianSigma);
94          internalSetNumberOfBins(numberOfBins);
95          bins = null;
96          gaussian = null;
97      }
98  
99      /**
100      * Constructor.
101      *
102      * @param inputData     Array containing input data where most likely value must
103      *                      be estimated from.
104      * @param gaussianSigma Gaussian sigma to be used on each sample.
105      * @param numberOfBins  Number of bins to be used on the histogram.
106      * @throws IllegalArgumentException Raised if provided Gaussian sigma is
107      *                                  negative or zero, or if provided number of bins is smaller than the
108      *                                  minimum allowed number of bins.
109      */
110     public HistogramMaximumLikelihoodEstimator(final double[] inputData, final double gaussianSigma,
111                                                final int numberOfBins) {
112         super(inputData, gaussianSigma);
113         internalSetNumberOfBins(numberOfBins);
114         bins = null;
115         gaussian = null;
116     }
117 
118     /**
119      * Constructor.
120      *
121      * @param minValue      Minimum value assumed to be contained within input data
122      *                      array.
123      * @param maxValue      Maximum value assumed to be contained within input data
124      *                      array.
125      * @param inputData     Array containing input data where most likely value must
126      *                      be estimated from.
127      * @param gaussianSigma Gaussian sigma to be used on each sample.
128      * @param numberOfBins  Number of bins to be used on the histogram.
129      * @throws IllegalArgumentException Raised if provided Gaussian sigma is
130      *                                  negative or zero, or if provided number of bins is smaller than the
131      *                                  minimum allowed number of bins or if minValue < maxValue.
132      */
133     public HistogramMaximumLikelihoodEstimator(
134             final double minValue, final double maxValue, final double[] inputData, final double gaussianSigma,
135             final int numberOfBins) {
136         super(minValue, maxValue, inputData, gaussianSigma);
137         internalSetNumberOfBins(numberOfBins);
138         bins = null;
139         gaussian = null;
140     }
141 
142 
143     /**
144      * Returns method to be used for maximum likelihood estimation, which for
145      * this class is MaximumLikelihoodEstimatorMethod.
146      * HISTOGRAM_MAXIMUM_LIKELIHOOD_ESTIMATOR.
147      *
148      * @return Method for maximum likelihood estimation.
149      */
150     @Override
151     public MaximumLikelihoodEstimatorMethod getMethod() {
152         return MaximumLikelihoodEstimatorMethod.HISTOGRAM_MAXIMUM_LIKELIHOOD_ESTIMATOR;
153     }
154 
155     /**
156      * Returns number of bins to be used on the histogram. The larger the value
157      * the more precise results will be but more data will be needed to obtain
158      * statistically meaningful data so that enough samples are present on each
159      * bin.
160      *
161      * @return Number of bins to be used on the histogram.
162      */
163     public int getNumberOfBins() {
164         return numberOfBins;
165     }
166 
167     /**
168      * Sets number of bins to be used on the histogram. The larger the provided
169      * value being set the more precise results will be but more data will be
170      * needed to obtain statistically meaningful data so that enough samples are
171      * present on each bin.
172      *
173      * @param numberOfBins Number of bins to be used on the histogram.
174      * @throws LockedException          Exception raised if this instance is locked.
175      *                                  This method can only be executed when computations finish and this
176      *                                  instance becomes unlocked.
177      * @throws IllegalArgumentException Raised if provided value is lower than
178      *                                  the allowed minimum.
179      */
180     public void setNumberOfBins(final int numberOfBins) throws LockedException {
181         if (isLocked()) {
182             throw new LockedException();
183         }
184         internalSetNumberOfBins(numberOfBins);
185     }
186 
187     /**
188      * Starts the estimation of the most likely value contained within provided
189      * input data array.
190      *
191      * @return The most likely value.
192      * @throws LockedException   Exception raised if this instance is locked.
193      *                           This method can only be executed when computations finish and this
194      *                           instance becomes unlocked.
195      * @throws NotReadyException Exception raised if this instance is not yet
196      *                           ready.
197      * @see #isReady()
198      */
199     @Override
200     public double estimate() throws LockedException, NotReadyException {
201         if (isLocked()) {
202             throw new LockedException();
203         }
204         if (!isReady()) {
205             throw new NotReadyException();
206         }
207 
208         locked = true;
209 
210         if (!areMinMaxAvailable) {
211             computeMinMaxValues();
212         }
213 
214         if ((maxValue - minValue) < EPS) {
215             // min-max limits are almost equal, so we return it as the solution
216             locked = false;
217             return (minValue + maxValue) * 0.5;
218         }
219 
220         // Create histogram (initialized to 0.0)
221         if (bins == null || bins.length != numberOfBins) {
222             bins = new double[numberOfBins];
223         }
224         Arrays.fill(bins, 0.0);
225 
226 
227         // Vector containing gaussians being added to histogram
228         // Data is added to histogram as if it was convoluted with a gaussian
229         // to add smoothness to results
230         if (gaussian == null || gaussian.length != numberOfBins) {
231             gaussian = new double[numberOfBins];
232         }
233 
234         // set of data values between bins
235         final var delta = (maxValue - minValue) / (numberOfBins - 1);
236 
237         // iterate over input data to add data to histogram
238         double gaussianCenterPos;
239         for (var data : inputData) {
240             gaussianCenterPos = (data - minValue) / delta;
241             computeGaussian(gaussian, gaussianCenterPos);
242 
243             // add gaussian centered at input data value to histogram bins
244             ArrayUtils.sum(bins, gaussian, bins);
245         }
246 
247         // find location of maximum in histogram
248         var maxBin = 0.0;
249         final double maxFuncValue;
250         int maxPos = 0;
251         for (var i = 0; i < numberOfBins; i++) {
252             if (bins[i] > maxBin) {
253                 maxBin = bins[i];
254                 maxPos = i;
255             }
256         }
257 
258         maxFuncValue = minValue + maxPos * delta;
259 
260         locked = false;
261         return maxFuncValue;
262     }
263 
264     /**
265      * Internal method to compute values of Gaussian vector assumed to be
266      * centered at provided value. Gaussian values are stored in provided array
267      * so that array can be reused.
268      *
269      * @param gaussian  Array containing Gaussian values
270      * @param centerPos Value where Gaussian is centered
271      */
272     protected void computeGaussian(final double[] gaussian, final double centerPos) {
273         final var length = gaussian.length;
274 
275         double x;
276         for (var i = 0; i < length; i++) {
277             x = i - centerPos;
278             gaussian[i] = Math.exp(-x * x / (2.0 * gaussianSigma * gaussianSigma))
279                     / (Math.sqrt(2.0 * Math.PI) * gaussianSigma);
280         }
281     }
282 
283     /**
284      * Internal method to set number of bins to be used on the histogram. The
285      * larger the value being set the more precise results will be but more data
286      * will be needed to obtain statistically meaningful data so that enough
287      * samples are present on each bin.
288      *
289      * @param numberOfBins Number of bins to be used on the histogram.
290      * @throws IllegalArgumentException Raised if provided value is lower than
291      *                                  the allowed minimum.
292      */
293     private void internalSetNumberOfBins(final int numberOfBins) {
294         if (numberOfBins < MIN_NUMBER_OF_BINS) {
295             throw new IllegalArgumentException();
296         }
297 
298         this.numberOfBins = numberOfBins;
299     }
300 }