use of com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testRoundTripTimeAugmentedNormalize.
@ParameterizedTest
@EnumSource(value = TransformMethod.class)
public void testRoundTripTimeAugmentedNormalize(TransformMethod method) {
int sampleSize = 256;
int baseDimensions = 2;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
ThresholdedRandomCutForest first = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.TIME_AUGMENTED).normalizeTime(true).transformMethod(method).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).weights(new double[] { 1.0, 2.0 }).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.TIME_AUGMENTED).normalizeTime(true).internalShinglingEnabled(true).transformMethod(method).shingleSize(shingleSize).anomalyRate(0.01).weights(new double[] { 1.0, 2.0 }).build();
Random r = new Random();
long count = 0;
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(10 * sampleSize, 50, 100, 5, seed, baseDimensions);
for (double[] point : dataWithKeys.data) {
long stamp = 1000 * count + r.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(point, stamp);
AnomalyDescriptor secondResult = second.process(point, stamp);
++count;
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
}
// serialize + deserialize
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
MultiDimDataWithKey testData = ShingledMultiDimDataWithKeys.getMultiDimData(100, 50, 100, 5, seed, baseDimensions);
// update re-instantiated forest
for (double[] point : testData.data) {
long stamp = 100 * count + r.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(point, stamp);
AnomalyDescriptor secondResult = second.process(point, stamp);
AnomalyDescriptor thirdResult = third.process(point, stamp);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
++count;
}
}
use of com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testRoundTripImputeInitial.
@ParameterizedTest
@MethodSource("args")
public void testRoundTripImputeInitial(TransformMethod transformMethod, ImputationMethod imputationMethod) {
int sampleSize = 256;
int baseDimensions = 2;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
System.out.println(seed);
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.STREAMING_IMPUTE).internalShinglingEnabled(true).shingleSize(shingleSize).transformMethod(transformMethod).imputationMethod(imputationMethod).fillValues(new double[] { 1.0, 2.0 }).anomalyRate(0.01).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.STREAMING_IMPUTE).internalShinglingEnabled(true).shingleSize(shingleSize).transformMethod(transformMethod).imputationMethod(imputationMethod).fillValues(new double[] { 1.0, 2.0 }).anomalyRate(0.01).build();
Random r = new Random(0);
long count = 0;
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(sampleSize, 50, 100, 5, seed, baseDimensions);
for (double[] point : dataWithKeys.data) {
if (r.nextDouble() > 0.1) {
long stamp = 1000 * count + r.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(point, stamp);
AnomalyDescriptor secondResult = second.process(point, stamp);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
}
++count;
// serialize + deserialize
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
second = mapper.toModel(mapper.toState(second));
}
}
use of com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testConversions.
@Test
public void testConversions() {
int dimensions = 10;
for (int trials = 0; trials < 10; trials++) {
long seed = new Random().nextLong();
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(false).randomSeed(seed).build();
// note shingleSize == 1
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).anomalyRate(0.01).build();
Random r = new Random();
for (int i = 0; i < new Random().nextInt(1000); i++) {
double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
first.process(point, 0L);
forest.update(point);
}
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
mapper.setSaveTreeStateEnabled(true);
mapper.setPartialTreeStateEnabled(true);
RandomCutForest copyForest = mapper.toModel(mapper.toState(forest));
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest(copyForest, 0.01, null);
//
for (int i = 0; i < new Random().nextInt(1000); i++) {
double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-10);
forest.update(point);
}
// serialize + deserialize
ThresholdedRandomCutForestMapper newMapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = newMapper.toModel(newMapper.toState(second));
// update re-instantiated forest
for (int i = 0; i < 100; i++) {
double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
AnomalyDescriptor thirdResult = third.process(point, 0L);
double score = forest.getAnomalyScore(point);
assertEquals(score, firstResult.getRCFScore(), 1e-10);
assertEquals(score, secondResult.getRCFScore(), 1e-10);
assertEquals(score, thirdResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getDataConfidence(), thirdResult.getDataConfidence(), 1e-10);
forest.update(point);
}
}
}
use of com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testRoundTripTimeAugmented.
@ParameterizedTest
@EnumSource(value = TransformMethod.class)
public void testRoundTripTimeAugmented(TransformMethod method) {
int sampleSize = 256;
int baseDimensions = 1;
int shingleSize = 8;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
double value = 1.0 + 0.25 * new Random().nextDouble();
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.TIME_AUGMENTED).internalShinglingEnabled(true).shingleSize(shingleSize).transformMethod(method).anomalyRate(0.01).adjustThreshold(true).weights(new double[] { 1.0 }).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.TIME_AUGMENTED).internalShinglingEnabled(true).shingleSize(shingleSize).transformMethod(method).anomalyRate(0.01).adjustThreshold(true).weights(new double[] { 1.0 }).build();
first.setLowerThreshold(value);
second.setLowerThreshold(value);
Random r = new Random();
long count = 0;
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(10 * sampleSize, 50, 100, 5, seed, baseDimensions);
for (double[] point : dataWithKeys.data) {
long stamp = 100 * count + r.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(point, stamp);
AnomalyDescriptor secondResult = second.process(point, stamp);
++count;
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
if (firstResult.getAnomalyGrade() > 0) {
assertEquals(secondResult.getAnomalyGrade(), firstResult.getAnomalyGrade(), 1e-10);
assert (firstResult.getRCFScore() >= value);
}
}
// serialize + deserialize
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
MultiDimDataWithKey testData = ShingledMultiDimDataWithKeys.getMultiDimData(100, 50, 100, 5, seed, baseDimensions);
// update re-instantiated forest
for (double[] point : testData.data) {
long stamp = 100 * count + r.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
AnomalyDescriptor thirdResult = third.process(point, 0L);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getAnomalyGrade(), thirdResult.getAnomalyGrade(), 1e-10);
++count;
}
}
use of com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testRoundTripStandardShingleSizeOne.
@Test
public void testRoundTripStandardShingleSizeOne() {
int dimensions = 10;
for (int trials = 0; trials < 1; trials++) {
long seed = new Random().nextLong();
RandomCutForest.Builder<?> builder = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed);
// note shingleSize == 1
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).anomalyRate(0.01).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).anomalyRate(0.01).forestMode(ForestMode.STANDARD).internalShinglingEnabled(false).build();
RandomCutForest forest = builder.build();
Random r = new Random();
for (int i = 0; i < 2000 + new Random().nextInt(1000); i++) {
double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
assertEquals(firstResult.getDataConfidence(), secondResult.getDataConfidence(), 1e-10);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-10);
forest.update(point);
}
// serialize + deserialize
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
// update re-instantiated forest
for (int i = 0; i < 100; i++) {
double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
AnomalyDescriptor thirdResult = third.process(point, 0L);
double score = forest.getAnomalyScore(point);
assertEquals(score, firstResult.getRCFScore(), 1e-10);
assertEquals(score, secondResult.getRCFScore(), 1e-10);
assertEquals(score, thirdResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getDataConfidence(), secondResult.getDataConfidence(), 1e-10);
forest.update(point);
}
}
}
Aggregations