use of com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper in project random-cut-forest-by-aws by aws.
the class ConsistencyTest method MixedShinglingTest.
@Test
public void MixedShinglingTest() {
int sampleSize = 256;
int baseDimensions = 1;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
System.out.println(seed);
// test is exact equality, reducing the number of trials
int numTrials = 1;
// and using fewer trees to speed up test
int numberOfTrees = 30;
int length = 2000 * sampleSize;
int testLength = length;
for (int i = 0; i < numTrials; i++) {
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).numberOfTrees(numberOfTrees).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).numberOfTrees(numberOfTrees).internalShinglingEnabled(false).shingleSize(shingleSize).anomalyRate(0.01).build();
ThresholdedRandomCutForest third = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).numberOfTrees(numberOfTrees).internalShinglingEnabled(false).shingleSize(1).anomalyRate(0.01).build();
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(length + testLength, 50, 100, 5, seed + i, baseDimensions);
double[][] shingledData = generateShingledData(dataWithKeys.data, shingleSize, baseDimensions, false);
assertEquals(shingledData.length, dataWithKeys.data.length - shingleSize + 1);
int count = shingleSize - 1;
// insert initial points
for (int j = 0; j < shingleSize - 1; j++) {
first.process(dataWithKeys.data[j], 0L);
}
for (int j = 0; j < length; j++) {
// validate equality of points
for (int y = 0; y < baseDimensions; y++) {
assertEquals(dataWithKeys.data[count][y], shingledData[j][(shingleSize - 1) * baseDimensions + y], 1e-10);
}
AnomalyDescriptor firstResult = first.process(dataWithKeys.data[count], 0L);
++count;
AnomalyDescriptor secondResult = second.process(shingledData[j], 0L);
AnomalyDescriptor thirdResult = third.process(shingledData[j], 0L);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getAnomalyGrade(), secondResult.getAnomalyGrade(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
// grades will not match between first and third because the thresholder has
// wrong info
// about shinglesize
}
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest fourth = mapper.toModel(mapper.toState(second));
for (int j = length; j < shingledData.length; j++) {
// validate eaulity of points
for (int y = 0; y < baseDimensions; y++) {
assertEquals(dataWithKeys.data[count][y], shingledData[j][(shingleSize - 1) * baseDimensions + y], 1e-10);
}
AnomalyDescriptor firstResult = first.process(dataWithKeys.data[count], 0L);
++count;
AnomalyDescriptor secondResult = second.process(shingledData[j], 0L);
AnomalyDescriptor thirdResult = third.process(shingledData[j], 0L);
AnomalyDescriptor fourthResult = fourth.process(shingledData[j], 0L);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getAnomalyGrade(), secondResult.getAnomalyGrade(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
// grades will not match between first and third because the thresholder has
// wrong info
// about shinglesize
assertEquals(firstResult.getRCFScore(), fourthResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getAnomalyGrade(), fourthResult.getAnomalyGrade(), 1e-10);
}
}
}
use of com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper in project random-cut-forest-by-aws by aws.
the class ConsistencyTest method TimeAugmentedTest.
@ParameterizedTest
@EnumSource(TransformMethod.class)
public void TimeAugmentedTest(TransformMethod transformMethod) {
int sampleSize = 256;
int baseDimensions = 1;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
// test is exact equality, reducing the number of trials
int numTrials = 1;
// and using fewer trees to speed up test
int numberOfTrees = 30;
int length = 10 * sampleSize;
int dataSize = 2 * length;
for (int i = 0; i < numTrials; i++) {
Precision precision = Precision.FLOAT_32;
long seed = new Random().nextLong();
System.out.println("seed = " + seed);
// TransformMethod transformMethod = TransformMethod.NONE;
ThresholdedRandomCutForest first = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.TIME_AUGMENTED).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
// ensuring that the parameters are the same; otherwise the grades/scores cannot
// be the same
// weighTime has to be 0 in the above
first.setLowerThreshold(1.1);
second.setLowerThreshold(1.1);
first.setHorizon(0.75);
second.setHorizon(0.75);
Random noise = new Random(0);
// change the last argument seed for a different run
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50, 100, 5, seed, baseDimensions);
int count = 0;
for (int j = 0; j < length; j++) {
long timestamp = 100 * count + noise.nextInt(10) - 5;
AnomalyDescriptor result = first.process(dataWithKeys.data[j], timestamp);
AnomalyDescriptor test = second.process(dataWithKeys.data[j], timestamp);
checkArgument(Math.abs(result.getRCFScore() - test.getRCFScore()) < 1e-10, " error");
checkArgument(Math.abs(result.getAnomalyGrade() - test.getAnomalyGrade()) < 1e-10, " error");
++count;
}
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
for (int j = length; j < 2 * length; j++) {
// can be a different gap
long timestamp = 150 * count + noise.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(dataWithKeys.data[count], timestamp);
AnomalyDescriptor secondResult = second.process(dataWithKeys.data[count], timestamp);
AnomalyDescriptor thirdResult = third.process(dataWithKeys.data[count], timestamp);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getAnomalyGrade(), secondResult.getAnomalyGrade(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getAnomalyGrade(), thirdResult.getAnomalyGrade(), 1e-10);
}
}
}
use of com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper in project random-cut-forest-by-aws by aws.
the class ConsistencyTest method ImputeTest.
@ParameterizedTest
@EnumSource(TransformMethod.class)
public void ImputeTest(TransformMethod transformMethod) {
int sampleSize = 256;
int baseDimensions = 1;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
// test is exact equality, reducing the number of trials
int numTrials = 1;
// and using fewer trees to speed up test
int numberOfTrees = 30;
int length = 10 * sampleSize;
int dataSize = 2 * length;
for (int i = 0; i < numTrials; i++) {
Precision precision = Precision.FLOAT_32;
long seed = new Random().nextLong();
System.out.println("seed = " + seed);
double[] weights = new double[] { 1.7, 4.2 };
ThresholdedRandomCutForest first = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).weights(weights).build();
ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STREAMING_IMPUTE).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).weights(weights).build();
// ensuring that the parameters are the same; otherwise the grades/scores cannot
// be the same
// weighTime has to be 0 in the above
first.setLowerThreshold(1.1);
second.setLowerThreshold(1.1);
first.setHorizon(0.75);
second.setHorizon(0.75);
Random noise = new Random(0);
// change the last argument seed for a different run
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50, 100, 5, seed, baseDimensions);
for (int j = 0; j < length; j++) {
// gap has to be asymptotically same
long timestamp = 100 * j + noise.nextInt(10) - 5;
AnomalyDescriptor result = first.process(dataWithKeys.data[j], 0L);
AnomalyDescriptor test = second.process(dataWithKeys.data[j], timestamp);
assertEquals(result.getRCFScore(), test.getRCFScore(), 1e-6);
assertEquals(result.getAnomalyGrade(), test.getAnomalyGrade(), 1e-6);
}
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
for (int j = length; j < 2 * length; j++) {
// has to be the same gap
long timestamp = 100 * j + noise.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(dataWithKeys.data[j], 0L);
AnomalyDescriptor thirdResult = third.process(dataWithKeys.data[j], timestamp);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-6);
assertEquals(firstResult.getAnomalyGrade(), thirdResult.getAnomalyGrade(), 1e-6);
}
}
}
Aggregations