1 /*
2 * Copyright (C) 2013 Alberto Irurueta Carro (alberto@irurueta.com)
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 package com.irurueta.numerical.robust;
17
18 import com.irurueta.statistics.UniformRandomizer;
19
20 import java.util.HashSet;
21 import java.util.Set;
22
23 /**
24 * This class computes indices of subsets of samples using a uniform randomizer
25 * to pick random samples as fast as possible.
26 * This class ensures that samples are not repeated within a single subset.
27 */
28 public class FastRandomSubsetSelector extends SubsetSelector {
29
30 /**
31 * Constant defining whether randomizer needs to be initialized with system
32 * timer. By disabling this option it is ensured that the same result is
33 * obtained on each execution.
34 */
35 public static final boolean DEFAULT_SEED_RANDOMIZER_WITH_TIME = false;
36
37 /**
38 * Randomizer to pick random indexes.
39 */
40 private UniformRandomizer randomizer;
41
42 /**
43 * Set containing selected indices on a given run.
44 * This is kept around between executions to avoid excessive calls to
45 * garbage collector, as random subset generation is likely to be called
46 * a large number of times during robust estimations. Because of that,
47 * if a robust estimator is capable of spanning multiple executions at once
48 * on different threads, then each thread needs to have a different subset
49 * selector instance.
50 */
51 private final Set<Integer> selectedIndices;
52
53 /**
54 * Constructor.
55 *
56 * @param numSamples number of samples to select subsets from
57 * @throws IllegalArgumentException if provided number of samples is zero
58 * or negative
59 */
60 public FastRandomSubsetSelector(final int numSamples) {
61 this(numSamples, DEFAULT_SEED_RANDOMIZER_WITH_TIME);
62 }
63
64 /**
65 * Constructor.
66 *
67 * @param numSamples number of samples to select subsets from.
68 * @param seedRandomizerWithTime true if randomizer seed must be initialized
69 * to system timer to obtain more random results.
70 * @throws IllegalArgumentException if provided number of samples is zero
71 * or negative.
72 */
73 public FastRandomSubsetSelector(final int numSamples, final boolean seedRandomizerWithTime) {
74 super(numSamples);
75 createRandomizer(seedRandomizerWithTime);
76 selectedIndices = new HashSet<>();
77 }
78
79 /**
80 * Returns type of this subset selector.
81 *
82 * @return type of this subset selector.
83 */
84 @Override
85 public SubsetSelectorType getType() {
86 return SubsetSelectorType.FAST_RANDOM_SUBSET_SELECTOR;
87 }
88
89 /**
90 * Computes a random subset of indices within range of number of samples to
91 * be used on robust estimators.
92 *
93 * @param subsetSize subset size to be computed. This value must be smaller
94 * than total number of samples.
95 * @param result array containing indices to be picked. Provided array must
96 * be at least of length subsetSize. The former subsetSize entries of the
97 * array will be modified by this method.
98 * @throws NotEnoughSamplesException if subset size is greater than the
99 * total number of samples.
100 * @throws InvalidSubsetSizeException if subset size is zero or if result
101 * array does not have at least a length of subsetSize.
102 */
103 @Override
104 public void computeRandomSubsets(final int subsetSize, final int[] result) throws NotEnoughSamplesException,
105 InvalidSubsetSizeException {
106 if (subsetSize == 0) {
107 throw new InvalidSubsetSizeException();
108 }
109 if (result.length < subsetSize) {
110 throw new InvalidSubsetSizeException();
111 }
112 if (numSamples < subsetSize) {
113 throw new NotEnoughSamplesException();
114 }
115
116 // On start set of selected indices is empty
117 selectedIndices.clear();
118
119 var counter = 0;
120 int index;
121 do {
122 index = randomizer.nextInt(0, numSamples);
123
124 // check whether this index has already been selected
125 if (selectedIndices.contains(index)) {
126 continue;
127 }
128
129 // if not selected, pick it now
130 selectedIndices.add(index);
131 result[counter] = index;
132 counter++;
133 } while (counter < subsetSize);
134
135 selectedIndices.clear();
136 }
137
138 /**
139 * Computes a random subset of indices within provided range of positions to
140 * be used on robust estimators.
141 *
142 * @param minPos minimum position to be picked. This value must be greater
143 * or equal than zero and smaller than the total number of samples and less
144 * than maxPos.
145 * @param maxPos maximum position to be picked. This value must be greater
146 * or equal than zero and smaller than the total number of samples and
147 * greater than minPos.
148 * @param subsetSize subset size to be computed. This value must be smaller
149 * than total number of samples.
150 * @param pickLast true indicates that last sample in range must always be
151 * picked within subset. This is done to obtain faster execution times and
152 * greater stability on some algorithms.
153 * @param result array containing indices to be picked. Provided array must
154 * be at least of length subsetSize. The former subsetSize entries of the
155 * array will be modified by this method.
156 * @throws NotEnoughSamplesException if subset size is greater than the
157 * total number of samples or if maxPos is greater than the total number of
158 * samples.
159 * @throws InvalidSubsetSizeException if subset size is zero or if result
160 * array does not have at least a length of subsetSize, or ig subset size
161 * is greater than the allowed range of positions to be picked.
162 * @throws InvalidSubsetRangeException if maximum position is smaller than
163 * minimum position or maximum or minimum position are negative.
164 */
165 @Override
166 public void computeRandomSubsetsInRange(
167 final int minPos, final int maxPos, final int subsetSize, final boolean pickLast, final int[] result)
168 throws NotEnoughSamplesException, InvalidSubsetSizeException, InvalidSubsetRangeException {
169 if (subsetSize == 0) {
170 throw new InvalidSubsetSizeException();
171 }
172 if (result.length < subsetSize) {
173 throw new InvalidSubsetSizeException();
174 }
175 if (minPos >= maxPos || maxPos < 0 || minPos < 0) {
176 throw new InvalidSubsetRangeException();
177 }
178 if ((maxPos - minPos) < subsetSize) {
179 throw new InvalidSubsetSizeException();
180 }
181 if (numSamples < subsetSize) {
182 throw new NotEnoughSamplesException();
183 }
184 if (maxPos > numSamples) {
185 throw new NotEnoughSamplesException();
186 }
187
188 // On start set of selected indices is empty
189 selectedIndices.clear();
190
191 var counter = 0;
192 int index;
193
194 if (pickLast) {
195 // this is done to accelerate computations and obtain more
196 // stable results in some cases
197 // pick last element in range
198 index = maxPos - 1;
199 selectedIndices.add(index);
200 result[counter] = index;
201 counter++;
202 }
203
204 if (!pickLast || counter < result.length) {
205 // keep selecting only if not all elements have already been selected
206 do {
207 index = randomizer.nextInt(minPos, maxPos);
208
209 // check whether this index has already been selected
210 if (selectedIndices.contains(index)) {
211 continue;
212 }
213
214 // if not selected, pick it now
215 selectedIndices.add(index);
216 result[counter] = index;
217 counter++;
218 } while (counter < subsetSize);
219 }
220
221 selectedIndices.clear();
222 }
223
224 /**
225 * Returns internal randomizer to generate uniformly distributed random
226 * values.
227 *
228 * @return internal randomizer.
229 */
230 protected UniformRandomizer getRandomizer() {
231 return randomizer;
232 }
233
234 /**
235 * Initializes randomizer for an instance of this class.
236 *
237 * @param seedWithTime if true randomizer will be initialized using current
238 * time as seed. If false, randomizer will always generate the same
239 * pseudo-random sequence on each JVM execution.
240 */
241 private void createRandomizer(final boolean seedWithTime) {
242 randomizer = new UniformRandomizer();
243 if (seedWithTime) {
244 randomizer.setSeed(System.currentTimeMillis());
245 }
246 }
247 }