Search in sources :

Example 11 with MultiDimDataWithKey

use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.

the class ConsistencyTest method ExternalShinglingTest.

@Test
public void ExternalShinglingTest() {
    int sampleSize = 256;
    int baseDimensions = 1;
    int shingleSize = 4;
    int dimensions = baseDimensions * shingleSize;
    long seed = new Random().nextLong();
    // just once since testing exact equality
    int numTrials = 1;
    int length = 400 * sampleSize;
    for (int i = 0; i < numTrials; i++) {
        RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(false).shingleSize(shingleSize).randomSeed(seed).build();
        RandomCutForest copyForest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(false).shingleSize(1).randomSeed(seed).build();
        ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).shingleSize(shingleSize).anomalyRate(0.01).build();
        ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).shingleSize(1).anomalyRate(0.01).build();
        MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.generateShingledDataWithKey(length, 50, shingleSize, baseDimensions, seed);
        int gradeDifference = 0;
        for (double[] point : dataWithKeys.data) {
            AnomalyDescriptor firstResult = first.process(point, 0L);
            AnomalyDescriptor secondResult = second.process(point, 0L);
            assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-10);
            assertEquals(firstResult.getRCFScore(), copyForest.getAnomalyScore(point), 1e-10);
            assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
            if ((firstResult.getAnomalyGrade() > 0) != (secondResult.getAnomalyGrade() > 0)) {
                ++gradeDifference;
            // thresholded random cut forest uses shingle size in the corrector step
            // this is supposed to be different
            }
            forest.update(point);
            copyForest.update(point);
        }
        assertTrue(gradeDifference > 0);
    }
}
Also used : Random(java.util.Random) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 12 with MultiDimDataWithKey

use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.

the class RandomCutForestShingledFunctionalTest method InternalShinglingTest.

@ParameterizedTest
@ValueSource(booleans = { true, false })
public void InternalShinglingTest(boolean rotation) {
    int sampleSize = 256;
    int baseDimensions = 2;
    int shingleSize = 2;
    int dimensions = baseDimensions * shingleSize;
    long seed = new Random().nextLong();
    System.out.println(seed);
    // test is exact equality, reducing the number of trials
    int numTrials = 1;
    int length = 4000 * sampleSize;
    for (int i = 0; i < numTrials; i++) {
        RandomCutForest first = new RandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).internalRotationEnabled(rotation).shingleSize(shingleSize).build();
        RandomCutForest second = new RandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).shingleSize(shingleSize).build();
        RandomCutForest third = new RandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).shingleSize(1).build();
        MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(length, 50, 100, 5, seed + i, baseDimensions);
        double[][] shingledData = generateShingledData(dataWithKeys.data, shingleSize, baseDimensions, rotation);
        assertEquals(shingledData.length, dataWithKeys.data.length - shingleSize + 1);
        int count = shingleSize - 1;
        // insert initial points
        for (int j = 0; j < shingleSize - 1; j++) {
            first.update(dataWithKeys.data[j]);
        }
        for (int j = 0; j < shingledData.length; j++) {
            // validate equality of points
            for (int y = 0; y < baseDimensions; y++) {
                int position = (rotation) ? (count % shingleSize) : shingleSize - 1;
                assertEquals(dataWithKeys.data[count][y], shingledData[j][position * baseDimensions + y], 1e-10);
            }
            double firstResult = first.getAnomalyScore(dataWithKeys.data[count]);
            first.update(dataWithKeys.data[count]);
            ++count;
            double secondResult = second.getAnomalyScore(shingledData[j]);
            second.update(shingledData[j]);
            double thirdResult = third.getAnomalyScore(shingledData[j]);
            third.update(shingledData[j]);
            assertEquals(firstResult, secondResult, 1e-10);
            assertEquals(secondResult, thirdResult, 1e-10);
        }
        PointStore store = (PointStore) first.getUpdateCoordinator().getStore();
        assertEquals(store.getCurrentStoreCapacity() * dimensions, store.getStore().length);
        store = (PointStore) second.getUpdateCoordinator().getStore();
        assertEquals(store.getCurrentStoreCapacity() * dimensions, store.getStore().length);
        store = (PointStore) third.getUpdateCoordinator().getStore();
        assertEquals(store.getCurrentStoreCapacity() * dimensions, store.getStore().length);
    }
}
Also used : PointStore(com.amazon.randomcutforest.store.PointStore) Random(java.util.Random) ShingleBuilder(com.amazon.randomcutforest.util.ShingleBuilder) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) ValueSource(org.junit.jupiter.params.provider.ValueSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 13 with MultiDimDataWithKey

use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.

the class ThresholdedRandomCutForestMapperTest method testRoundTripStandardShingled.

@Test
public void testRoundTripStandardShingled() {
    int sampleSize = 256;
    int baseDimensions = 2;
    int shingleSize = 4;
    int dimensions = baseDimensions * shingleSize;
    long seed = new Random().nextLong();
    RandomCutForest.Builder<?> builder = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed);
    ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).shingleSize(shingleSize).anomalyRate(0.01).build();
    ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).shingleSize(shingleSize).anomalyRate(0.01).build();
    RandomCutForest forest = builder.build();
    // thresholds should not affect scores
    double value = 0.75 + 0.5 * new Random().nextDouble();
    first.setLowerThreshold(value);
    second.setLowerThreshold(value);
    Random r = new Random();
    MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.generateShingledDataWithKey(10 * sampleSize, 50, shingleSize, baseDimensions, seed);
    for (double[] point : dataWithKeys.data) {
        AnomalyDescriptor firstResult = first.process(point, 0L);
        AnomalyDescriptor secondResult = second.process(point, 0L);
        assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
        assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-4);
        forest.update(point);
    }
    // serialize + deserialize
    ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
    ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
    MultiDimDataWithKey testData = ShingledMultiDimDataWithKeys.generateShingledDataWithKey(100, 50, shingleSize, baseDimensions, seed);
    // update re-instantiated forest
    for (double[] point : testData.data) {
        AnomalyDescriptor firstResult = first.process(point, 0L);
        AnomalyDescriptor secondResult = second.process(point, 0L);
        AnomalyDescriptor thirdResult = third.process(point, 0L);
        double score = forest.getAnomalyScore(point);
        assertEquals(score, firstResult.getRCFScore(), 1e-4);
        assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
        assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
        assertEquals(firstResult.getDataConfidence(), thirdResult.getDataConfidence(), 1e-10);
        forest.update(point);
    }
}
Also used : Random(java.util.Random) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 14 with MultiDimDataWithKey

use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.

the class ThresholdedRandomCutForestMapperTest method testRoundTripImpute.

@ParameterizedTest
@MethodSource("args")
public void testRoundTripImpute(TransformMethod transformMethod, ImputationMethod imputationMethod) {
    int sampleSize = 256;
    int baseDimensions = 1;
    int shingleSize = 8;
    int dimensions = baseDimensions * shingleSize;
    long seed = new Random().nextLong();
    ThresholdedRandomCutForest first = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.STREAMING_IMPUTE).internalShinglingEnabled(true).shingleSize(shingleSize).transformMethod(transformMethod).imputationMethod(imputationMethod).fillValues(new double[] { 1.0 }).anomalyRate(0.01).build();
    ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.STREAMING_IMPUTE).internalShinglingEnabled(true).shingleSize(shingleSize).transformMethod(transformMethod).imputationMethod(imputationMethod).fillValues(new double[] { 1.0 }).anomalyRate(0.01).build();
    Random r = new Random();
    long count = 0;
    MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(10 * sampleSize, 50, 100, 5, seed, baseDimensions);
    for (double[] point : dataWithKeys.data) {
        if (r.nextDouble() > 0.1) {
            long stamp = 1000 * count + r.nextInt(10) - 5;
            AnomalyDescriptor firstResult = first.process(point, stamp);
            AnomalyDescriptor secondResult = second.process(point, stamp);
            assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
        }
        ++count;
    }
    ;
    // serialize + deserialize
    ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
    ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
    MultiDimDataWithKey testData = ShingledMultiDimDataWithKeys.getMultiDimData(100, 50, 100, 5, seed, baseDimensions);
    // update re-instantiated forest
    for (double[] point : testData.data) {
        long stamp = 1000 * count + r.nextInt(10) - 5;
        AnomalyDescriptor firstResult = first.process(point, stamp);
        // AnomalyDescriptor secondResult = second.process(point, stamp);
        AnomalyDescriptor thirdResult = third.process(point, stamp);
        // assertEquals(firstResult.getRcfScore(), secondResult.getRcfScore(), 1e-10);
        assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
        ++count;
    }
}
Also used : Random(java.util.Random) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 15 with MultiDimDataWithKey

use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.

the class ThresholdedRandomCutForestMapperTest method testRoundTripStandardShingledInternal.

@Test
public void testRoundTripStandardShingledInternal() {
    int sampleSize = 256;
    int baseDimensions = 2;
    int shingleSize = 8;
    int dimensions = baseDimensions * shingleSize;
    long seed = new Random().nextLong();
    RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(true).shingleSize(shingleSize).randomSeed(seed).build();
    ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).adjustThreshold(true).boundingBoxCacheFraction(0).build();
    ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).adjustThreshold(true).build();
    double value = 0.75 + 0.5 * new Random().nextDouble();
    first.setLowerThreshold(value);
    second.setLowerThreshold(value);
    Random r = new Random();
    MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(10 * sampleSize, 50, 100, 5, seed, baseDimensions);
    for (double[] point : dataWithKeys.data) {
        AnomalyDescriptor firstResult = first.process(point, 0L);
        AnomalyDescriptor secondResult = second.process(point, 0L);
        assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
        assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-4);
        if (firstResult.getAnomalyGrade() > 0) {
            assertEquals(secondResult.getAnomalyGrade(), firstResult.getAnomalyGrade(), 1e-10);
            assert (firstResult.getRCFScore() >= value);
        }
        forest.update(point);
    }
    // serialize + deserialize
    ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
    ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
    MultiDimDataWithKey testData = ShingledMultiDimDataWithKeys.getMultiDimData(100, 50, 100, 5, seed, baseDimensions);
    // update re-instantiated forest
    for (double[] point : testData.data) {
        AnomalyDescriptor firstResult = first.process(point, 0L);
        AnomalyDescriptor secondResult = second.process(point, 0L);
        AnomalyDescriptor thirdResult = third.process(point, 0L);
        double score = forest.getAnomalyScore(point);
        assertEquals(score, firstResult.getRCFScore(), 1e-4);
        assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
        assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
        assertEquals(firstResult.getDataConfidence(), thirdResult.getDataConfidence(), 1e-10);
        forest.update(point);
    }
}
Also used : Random(java.util.Random) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

MultiDimDataWithKey (com.amazon.randomcutforest.testutils.MultiDimDataWithKey)19 Random (java.util.Random)19 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)15 AnomalyDescriptor (com.amazon.randomcutforest.parkservices.AnomalyDescriptor)13 ThresholdedRandomCutForest (com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)13 EnumSource (org.junit.jupiter.params.provider.EnumSource)7 Precision (com.amazon.randomcutforest.config.Precision)6 Test (org.junit.jupiter.api.Test)5 RandomCutForest (com.amazon.randomcutforest.RandomCutForest)4 ThresholdedRandomCutForestMapper (com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper)3 NormalMixtureTestData (com.amazon.randomcutforest.testutils.NormalMixtureTestData)2 MethodSource (org.junit.jupiter.params.provider.MethodSource)2 TransformMethod (com.amazon.randomcutforest.config.TransformMethod)1 PointStore (com.amazon.randomcutforest.store.PointStore)1 ShingleBuilder (com.amazon.randomcutforest.util.ShingleBuilder)1 ValueSource (org.junit.jupiter.params.provider.ValueSource)1