use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class ConsistencyTest method InternalShinglingTest.
@Test
public void InternalShinglingTest() {
int sampleSize = 256;
int baseDimensions = 2;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
// just once since testing exact equality
int numTrials = 1;
int length = 400 * sampleSize;
for (int i = 0; i < numTrials; i++) {
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(true).shingleSize(shingleSize).randomSeed(seed).build();
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).build();
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(length, 50, 100, 5, seed + i, baseDimensions);
for (double[] point : dataWithKeys.data) {
AnomalyDescriptor firstResult = first.process(point, 0L);
assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-10);
forest.update(point);
}
}
}
use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class ConsistencyTest method MixedShinglingTest.
@Test
public void MixedShinglingTest() {
int sampleSize = 256;
int baseDimensions = 1;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
System.out.println(seed);
// test is exact equality, reducing the number of trials
int numTrials = 1;
// and using fewer trees to speed up test
int numberOfTrees = 30;
int length = 2000 * sampleSize;
int testLength = length;
for (int i = 0; i < numTrials; i++) {
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).numberOfTrees(numberOfTrees).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).numberOfTrees(numberOfTrees).internalShinglingEnabled(false).shingleSize(shingleSize).anomalyRate(0.01).build();
ThresholdedRandomCutForest third = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).numberOfTrees(numberOfTrees).internalShinglingEnabled(false).shingleSize(1).anomalyRate(0.01).build();
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(length + testLength, 50, 100, 5, seed + i, baseDimensions);
double[][] shingledData = generateShingledData(dataWithKeys.data, shingleSize, baseDimensions, false);
assertEquals(shingledData.length, dataWithKeys.data.length - shingleSize + 1);
int count = shingleSize - 1;
// insert initial points
for (int j = 0; j < shingleSize - 1; j++) {
first.process(dataWithKeys.data[j], 0L);
}
for (int j = 0; j < length; j++) {
// validate equality of points
for (int y = 0; y < baseDimensions; y++) {
assertEquals(dataWithKeys.data[count][y], shingledData[j][(shingleSize - 1) * baseDimensions + y], 1e-10);
}
AnomalyDescriptor firstResult = first.process(dataWithKeys.data[count], 0L);
++count;
AnomalyDescriptor secondResult = second.process(shingledData[j], 0L);
AnomalyDescriptor thirdResult = third.process(shingledData[j], 0L);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getAnomalyGrade(), secondResult.getAnomalyGrade(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
// grades will not match between first and third because the thresholder has
// wrong info
// about shinglesize
}
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest fourth = mapper.toModel(mapper.toState(second));
for (int j = length; j < shingledData.length; j++) {
// validate eaulity of points
for (int y = 0; y < baseDimensions; y++) {
assertEquals(dataWithKeys.data[count][y], shingledData[j][(shingleSize - 1) * baseDimensions + y], 1e-10);
}
AnomalyDescriptor firstResult = first.process(dataWithKeys.data[count], 0L);
++count;
AnomalyDescriptor secondResult = second.process(shingledData[j], 0L);
AnomalyDescriptor thirdResult = third.process(shingledData[j], 0L);
AnomalyDescriptor fourthResult = fourth.process(shingledData[j], 0L);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getAnomalyGrade(), secondResult.getAnomalyGrade(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
// grades will not match between first and third because the thresholder has
// wrong info
// about shinglesize
assertEquals(firstResult.getRCFScore(), fourthResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getAnomalyGrade(), fourthResult.getAnomalyGrade(), 1e-10);
}
}
}
use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class ConsistencyTest method TimeAugmentedTest.
@ParameterizedTest
@EnumSource(TransformMethod.class)
public void TimeAugmentedTest(TransformMethod transformMethod) {
int sampleSize = 256;
int baseDimensions = 1;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
// test is exact equality, reducing the number of trials
int numTrials = 1;
// and using fewer trees to speed up test
int numberOfTrees = 30;
int length = 10 * sampleSize;
int dataSize = 2 * length;
for (int i = 0; i < numTrials; i++) {
Precision precision = Precision.FLOAT_32;
long seed = new Random().nextLong();
System.out.println("seed = " + seed);
// TransformMethod transformMethod = TransformMethod.NONE;
ThresholdedRandomCutForest first = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.TIME_AUGMENTED).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
// ensuring that the parameters are the same; otherwise the grades/scores cannot
// be the same
// weighTime has to be 0 in the above
first.setLowerThreshold(1.1);
second.setLowerThreshold(1.1);
first.setHorizon(0.75);
second.setHorizon(0.75);
Random noise = new Random(0);
// change the last argument seed for a different run
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50, 100, 5, seed, baseDimensions);
int count = 0;
for (int j = 0; j < length; j++) {
long timestamp = 100 * count + noise.nextInt(10) - 5;
AnomalyDescriptor result = first.process(dataWithKeys.data[j], timestamp);
AnomalyDescriptor test = second.process(dataWithKeys.data[j], timestamp);
checkArgument(Math.abs(result.getRCFScore() - test.getRCFScore()) < 1e-10, " error");
checkArgument(Math.abs(result.getAnomalyGrade() - test.getAnomalyGrade()) < 1e-10, " error");
++count;
}
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
for (int j = length; j < 2 * length; j++) {
// can be a different gap
long timestamp = 150 * count + noise.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(dataWithKeys.data[count], timestamp);
AnomalyDescriptor secondResult = second.process(dataWithKeys.data[count], timestamp);
AnomalyDescriptor thirdResult = third.process(dataWithKeys.data[count], timestamp);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getAnomalyGrade(), secondResult.getAnomalyGrade(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getAnomalyGrade(), thirdResult.getAnomalyGrade(), 1e-10);
}
}
}
use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testRoundTripTimeAugmentedNormalize.
@ParameterizedTest
@EnumSource(value = TransformMethod.class)
public void testRoundTripTimeAugmentedNormalize(TransformMethod method) {
int sampleSize = 256;
int baseDimensions = 2;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
ThresholdedRandomCutForest first = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.TIME_AUGMENTED).normalizeTime(true).transformMethod(method).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).weights(new double[] { 1.0, 2.0 }).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.TIME_AUGMENTED).normalizeTime(true).internalShinglingEnabled(true).transformMethod(method).shingleSize(shingleSize).anomalyRate(0.01).weights(new double[] { 1.0, 2.0 }).build();
Random r = new Random();
long count = 0;
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(10 * sampleSize, 50, 100, 5, seed, baseDimensions);
for (double[] point : dataWithKeys.data) {
long stamp = 1000 * count + r.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(point, stamp);
AnomalyDescriptor secondResult = second.process(point, stamp);
++count;
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
}
// serialize + deserialize
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
MultiDimDataWithKey testData = ShingledMultiDimDataWithKeys.getMultiDimData(100, 50, 100, 5, seed, baseDimensions);
// update re-instantiated forest
for (double[] point : testData.data) {
long stamp = 100 * count + r.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(point, stamp);
AnomalyDescriptor secondResult = second.process(point, stamp);
AnomalyDescriptor thirdResult = third.process(point, stamp);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
++count;
}
}
use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testRoundTripImputeInitial.
@ParameterizedTest
@MethodSource("args")
public void testRoundTripImputeInitial(TransformMethod transformMethod, ImputationMethod imputationMethod) {
int sampleSize = 256;
int baseDimensions = 2;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
System.out.println(seed);
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.STREAMING_IMPUTE).internalShinglingEnabled(true).shingleSize(shingleSize).transformMethod(transformMethod).imputationMethod(imputationMethod).fillValues(new double[] { 1.0, 2.0 }).anomalyRate(0.01).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.STREAMING_IMPUTE).internalShinglingEnabled(true).shingleSize(shingleSize).transformMethod(transformMethod).imputationMethod(imputationMethod).fillValues(new double[] { 1.0, 2.0 }).anomalyRate(0.01).build();
Random r = new Random(0);
long count = 0;
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(sampleSize, 50, 100, 5, seed, baseDimensions);
for (double[] point : dataWithKeys.data) {
if (r.nextDouble() > 0.1) {
long stamp = 1000 * count + r.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(point, stamp);
AnomalyDescriptor secondResult = second.process(point, stamp);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
}
++count;
// serialize + deserialize
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
second = mapper.toModel(mapper.toState(second));
}
}
Aggregations