1 /* 2 * Copyright (C) 2013 Alberto Irurueta Carro (alberto@irurueta.com) 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.irurueta.numerical.robust; 17 18 import com.irurueta.statistics.UniformRandomizer; 19 20 import java.util.HashSet; 21 import java.util.Set; 22 23 /** 24 * This class computes indices of subsets of samples using a uniform randomizer 25 * to pick random samples as fast as possible. 26 * This class ensures that samples are not repeated within a single subset. 27 */ 28 public class FastRandomSubsetSelector extends SubsetSelector { 29 30 /** 31 * Constant defining whether randomizer needs to be initialized with system 32 * timer. By disabling this option it is ensured that the same result is 33 * obtained on each execution. 34 */ 35 public static final boolean DEFAULT_SEED_RANDOMIZER_WITH_TIME = false; 36 37 /** 38 * Randomizer to pick random indexes. 39 */ 40 private UniformRandomizer randomizer; 41 42 /** 43 * Set containing selected indices on a given run. 44 * This is kept around between executions to avoid excessive calls to 45 * garbage collector, as random subset generation is likely to be called 46 * a large number of times during robust estimations. Because of that, 47 * if a robust estimator is capable of spanning multiple executions at once 48 * on different threads, then each thread needs to have a different subset 49 * selector instance. 50 */ 51 private final Set<Integer> selectedIndices; 52 53 /** 54 * Constructor. 55 * 56 * @param numSamples number of samples to select subsets from 57 * @throws IllegalArgumentException if provided number of samples is zero 58 * or negative 59 */ 60 public FastRandomSubsetSelector(final int numSamples) { 61 this(numSamples, DEFAULT_SEED_RANDOMIZER_WITH_TIME); 62 } 63 64 /** 65 * Constructor. 66 * 67 * @param numSamples number of samples to select subsets from. 68 * @param seedRandomizerWithTime true if randomizer seed must be initialized 69 * to system timer to obtain more random results. 70 * @throws IllegalArgumentException if provided number of samples is zero 71 * or negative. 72 */ 73 public FastRandomSubsetSelector(final int numSamples, final boolean seedRandomizerWithTime) { 74 super(numSamples); 75 createRandomizer(seedRandomizerWithTime); 76 selectedIndices = new HashSet<>(); 77 } 78 79 /** 80 * Returns type of this subset selector. 81 * 82 * @return type of this subset selector. 83 */ 84 @Override 85 public SubsetSelectorType getType() { 86 return SubsetSelectorType.FAST_RANDOM_SUBSET_SELECTOR; 87 } 88 89 /** 90 * Computes a random subset of indices within range of number of samples to 91 * be used on robust estimators. 92 * 93 * @param subsetSize subset size to be computed. This value must be smaller 94 * than total number of samples. 95 * @param result array containing indices to be picked. Provided array must 96 * be at least of length subsetSize. The former subsetSize entries of the 97 * array will be modified by this method. 98 * @throws NotEnoughSamplesException if subset size is greater than the 99 * total number of samples. 100 * @throws InvalidSubsetSizeException if subset size is zero or if result 101 * array does not have at least a length of subsetSize. 102 */ 103 @Override 104 public void computeRandomSubsets(final int subsetSize, final int[] result) throws NotEnoughSamplesException, 105 InvalidSubsetSizeException { 106 if (subsetSize == 0) { 107 throw new InvalidSubsetSizeException(); 108 } 109 if (result.length < subsetSize) { 110 throw new InvalidSubsetSizeException(); 111 } 112 if (numSamples < subsetSize) { 113 throw new NotEnoughSamplesException(); 114 } 115 116 // On start set of selected indices is empty 117 selectedIndices.clear(); 118 119 var counter = 0; 120 int index; 121 do { 122 index = randomizer.nextInt(0, numSamples); 123 124 // check whether this index has already been selected 125 if (selectedIndices.contains(index)) { 126 continue; 127 } 128 129 // if not selected, pick it now 130 selectedIndices.add(index); 131 result[counter] = index; 132 counter++; 133 } while (counter < subsetSize); 134 135 selectedIndices.clear(); 136 } 137 138 /** 139 * Computes a random subset of indices within provided range of positions to 140 * be used on robust estimators. 141 * 142 * @param minPos minimum position to be picked. This value must be greater 143 * or equal than zero and smaller than the total number of samples and less 144 * than maxPos. 145 * @param maxPos maximum position to be picked. This value must be greater 146 * or equal than zero and smaller than the total number of samples and 147 * greater than minPos. 148 * @param subsetSize subset size to be computed. This value must be smaller 149 * than total number of samples. 150 * @param pickLast true indicates that last sample in range must always be 151 * picked within subset. This is done to obtain faster execution times and 152 * greater stability on some algorithms. 153 * @param result array containing indices to be picked. Provided array must 154 * be at least of length subsetSize. The former subsetSize entries of the 155 * array will be modified by this method. 156 * @throws NotEnoughSamplesException if subset size is greater than the 157 * total number of samples or if maxPos is greater than the total number of 158 * samples. 159 * @throws InvalidSubsetSizeException if subset size is zero or if result 160 * array does not have at least a length of subsetSize, or ig subset size 161 * is greater than the allowed range of positions to be picked. 162 * @throws InvalidSubsetRangeException if maximum position is smaller than 163 * minimum position or maximum or minimum position are negative. 164 */ 165 @Override 166 public void computeRandomSubsetsInRange( 167 final int minPos, final int maxPos, final int subsetSize, final boolean pickLast, final int[] result) 168 throws NotEnoughSamplesException, InvalidSubsetSizeException, InvalidSubsetRangeException { 169 if (subsetSize == 0) { 170 throw new InvalidSubsetSizeException(); 171 } 172 if (result.length < subsetSize) { 173 throw new InvalidSubsetSizeException(); 174 } 175 if (minPos >= maxPos || maxPos < 0 || minPos < 0) { 176 throw new InvalidSubsetRangeException(); 177 } 178 if ((maxPos - minPos) < subsetSize) { 179 throw new InvalidSubsetSizeException(); 180 } 181 if (numSamples < subsetSize) { 182 throw new NotEnoughSamplesException(); 183 } 184 if (maxPos > numSamples) { 185 throw new NotEnoughSamplesException(); 186 } 187 188 // On start set of selected indices is empty 189 selectedIndices.clear(); 190 191 var counter = 0; 192 int index; 193 194 if (pickLast) { 195 // this is done to accelerate computations and obtain more 196 // stable results in some cases 197 // pick last element in range 198 index = maxPos - 1; 199 selectedIndices.add(index); 200 result[counter] = index; 201 counter++; 202 } 203 204 if (!pickLast || counter < result.length) { 205 // keep selecting only if not all elements have already been selected 206 do { 207 index = randomizer.nextInt(minPos, maxPos); 208 209 // check whether this index has already been selected 210 if (selectedIndices.contains(index)) { 211 continue; 212 } 213 214 // if not selected, pick it now 215 selectedIndices.add(index); 216 result[counter] = index; 217 counter++; 218 } while (counter < subsetSize); 219 } 220 221 selectedIndices.clear(); 222 } 223 224 /** 225 * Returns internal randomizer to generate uniformly distributed random 226 * values. 227 * 228 * @return internal randomizer. 229 */ 230 protected UniformRandomizer getRandomizer() { 231 return randomizer; 232 } 233 234 /** 235 * Initializes randomizer for an instance of this class. 236 * 237 * @param seedWithTime if true randomizer will be initialized using current 238 * time as seed. If false, randomizer will always generate the same 239 * pseudo-random sequence on each JVM execution. 240 */ 241 private void createRandomizer(final boolean seedWithTime) { 242 randomizer = new UniformRandomizer(); 243 if (seedWithTime) { 244 randomizer.setSeed(System.currentTimeMillis()); 245 } 246 } 247 }