use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class ConsistencyTest method ExternalShinglingTest.
@Test
public void ExternalShinglingTest() {
int sampleSize = 256;
int baseDimensions = 1;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
// just once since testing exact equality
int numTrials = 1;
int length = 400 * sampleSize;
for (int i = 0; i < numTrials; i++) {
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(false).shingleSize(shingleSize).randomSeed(seed).build();
RandomCutForest copyForest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(false).shingleSize(1).randomSeed(seed).build();
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).shingleSize(shingleSize).anomalyRate(0.01).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).shingleSize(1).anomalyRate(0.01).build();
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.generateShingledDataWithKey(length, 50, shingleSize, baseDimensions, seed);
int gradeDifference = 0;
for (double[] point : dataWithKeys.data) {
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-10);
assertEquals(firstResult.getRCFScore(), copyForest.getAnomalyScore(point), 1e-10);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
if ((firstResult.getAnomalyGrade() > 0) != (secondResult.getAnomalyGrade() > 0)) {
++gradeDifference;
// thresholded random cut forest uses shingle size in the corrector step
// this is supposed to be different
}
forest.update(point);
copyForest.update(point);
}
assertTrue(gradeDifference > 0);
}
}
use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class RandomCutForestShingledFunctionalTest method InternalShinglingTest.
@ParameterizedTest
@ValueSource(booleans = { true, false })
public void InternalShinglingTest(boolean rotation) {
int sampleSize = 256;
int baseDimensions = 2;
int shingleSize = 2;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
System.out.println(seed);
// test is exact equality, reducing the number of trials
int numTrials = 1;
int length = 4000 * sampleSize;
for (int i = 0; i < numTrials; i++) {
RandomCutForest first = new RandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).internalRotationEnabled(rotation).shingleSize(shingleSize).build();
RandomCutForest second = new RandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).shingleSize(shingleSize).build();
RandomCutForest third = new RandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).shingleSize(1).build();
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(length, 50, 100, 5, seed + i, baseDimensions);
double[][] shingledData = generateShingledData(dataWithKeys.data, shingleSize, baseDimensions, rotation);
assertEquals(shingledData.length, dataWithKeys.data.length - shingleSize + 1);
int count = shingleSize - 1;
// insert initial points
for (int j = 0; j < shingleSize - 1; j++) {
first.update(dataWithKeys.data[j]);
}
for (int j = 0; j < shingledData.length; j++) {
// validate equality of points
for (int y = 0; y < baseDimensions; y++) {
int position = (rotation) ? (count % shingleSize) : shingleSize - 1;
assertEquals(dataWithKeys.data[count][y], shingledData[j][position * baseDimensions + y], 1e-10);
}
double firstResult = first.getAnomalyScore(dataWithKeys.data[count]);
first.update(dataWithKeys.data[count]);
++count;
double secondResult = second.getAnomalyScore(shingledData[j]);
second.update(shingledData[j]);
double thirdResult = third.getAnomalyScore(shingledData[j]);
third.update(shingledData[j]);
assertEquals(firstResult, secondResult, 1e-10);
assertEquals(secondResult, thirdResult, 1e-10);
}
PointStore store = (PointStore) first.getUpdateCoordinator().getStore();
assertEquals(store.getCurrentStoreCapacity() * dimensions, store.getStore().length);
store = (PointStore) second.getUpdateCoordinator().getStore();
assertEquals(store.getCurrentStoreCapacity() * dimensions, store.getStore().length);
store = (PointStore) third.getUpdateCoordinator().getStore();
assertEquals(store.getCurrentStoreCapacity() * dimensions, store.getStore().length);
}
}
use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testRoundTripStandardShingled.
@Test
public void testRoundTripStandardShingled() {
int sampleSize = 256;
int baseDimensions = 2;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
RandomCutForest.Builder<?> builder = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed);
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).shingleSize(shingleSize).anomalyRate(0.01).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).shingleSize(shingleSize).anomalyRate(0.01).build();
RandomCutForest forest = builder.build();
// thresholds should not affect scores
double value = 0.75 + 0.5 * new Random().nextDouble();
first.setLowerThreshold(value);
second.setLowerThreshold(value);
Random r = new Random();
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.generateShingledDataWithKey(10 * sampleSize, 50, shingleSize, baseDimensions, seed);
for (double[] point : dataWithKeys.data) {
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-4);
forest.update(point);
}
// serialize + deserialize
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
MultiDimDataWithKey testData = ShingledMultiDimDataWithKeys.generateShingledDataWithKey(100, 50, shingleSize, baseDimensions, seed);
// update re-instantiated forest
for (double[] point : testData.data) {
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
AnomalyDescriptor thirdResult = third.process(point, 0L);
double score = forest.getAnomalyScore(point);
assertEquals(score, firstResult.getRCFScore(), 1e-4);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getDataConfidence(), thirdResult.getDataConfidence(), 1e-10);
forest.update(point);
}
}
use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testRoundTripImpute.
@ParameterizedTest
@MethodSource("args")
public void testRoundTripImpute(TransformMethod transformMethod, ImputationMethod imputationMethod) {
int sampleSize = 256;
int baseDimensions = 1;
int shingleSize = 8;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
ThresholdedRandomCutForest first = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.STREAMING_IMPUTE).internalShinglingEnabled(true).shingleSize(shingleSize).transformMethod(transformMethod).imputationMethod(imputationMethod).fillValues(new double[] { 1.0 }).anomalyRate(0.01).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.STREAMING_IMPUTE).internalShinglingEnabled(true).shingleSize(shingleSize).transformMethod(transformMethod).imputationMethod(imputationMethod).fillValues(new double[] { 1.0 }).anomalyRate(0.01).build();
Random r = new Random();
long count = 0;
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(10 * sampleSize, 50, 100, 5, seed, baseDimensions);
for (double[] point : dataWithKeys.data) {
if (r.nextDouble() > 0.1) {
long stamp = 1000 * count + r.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(point, stamp);
AnomalyDescriptor secondResult = second.process(point, stamp);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
}
++count;
}
;
// serialize + deserialize
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
MultiDimDataWithKey testData = ShingledMultiDimDataWithKeys.getMultiDimData(100, 50, 100, 5, seed, baseDimensions);
// update re-instantiated forest
for (double[] point : testData.data) {
long stamp = 1000 * count + r.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(point, stamp);
// AnomalyDescriptor secondResult = second.process(point, stamp);
AnomalyDescriptor thirdResult = third.process(point, stamp);
// assertEquals(firstResult.getRcfScore(), secondResult.getRcfScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
++count;
}
}
use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testRoundTripStandardShingledInternal.
@Test
public void testRoundTripStandardShingledInternal() {
int sampleSize = 256;
int baseDimensions = 2;
int shingleSize = 8;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(true).shingleSize(shingleSize).randomSeed(seed).build();
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).adjustThreshold(true).boundingBoxCacheFraction(0).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).adjustThreshold(true).build();
double value = 0.75 + 0.5 * new Random().nextDouble();
first.setLowerThreshold(value);
second.setLowerThreshold(value);
Random r = new Random();
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(10 * sampleSize, 50, 100, 5, seed, baseDimensions);
for (double[] point : dataWithKeys.data) {
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-4);
if (firstResult.getAnomalyGrade() > 0) {
assertEquals(secondResult.getAnomalyGrade(), firstResult.getAnomalyGrade(), 1e-10);
assert (firstResult.getRCFScore() >= value);
}
forest.update(point);
}
// serialize + deserialize
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
MultiDimDataWithKey testData = ShingledMultiDimDataWithKeys.getMultiDimData(100, 50, 100, 5, seed, baseDimensions);
// update re-instantiated forest
for (double[] point : testData.data) {
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
AnomalyDescriptor thirdResult = third.process(point, 0L);
double score = forest.getAnomalyScore(point);
assertEquals(score, firstResult.getRCFScore(), 1e-4);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getDataConfidence(), thirdResult.getDataConfidence(), 1e-10);
forest.update(point);
}
}
Aggregations