Search in sources :

Example 1 with ThresholdedRandomCutForestMapper

use of com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper in project random-cut-forest-by-aws by aws.

the class ConsistencyTest method MixedShinglingTest.

@Test
public void MixedShinglingTest() {
    int sampleSize = 256;
    int baseDimensions = 1;
    int shingleSize = 4;
    int dimensions = baseDimensions * shingleSize;
    long seed = new Random().nextLong();
    System.out.println(seed);
    // test is exact equality, reducing the number of trials
    int numTrials = 1;
    // and using fewer trees to speed up test
    int numberOfTrees = 30;
    int length = 2000 * sampleSize;
    int testLength = length;
    for (int i = 0; i < numTrials; i++) {
        ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).numberOfTrees(numberOfTrees).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).build();
        ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).numberOfTrees(numberOfTrees).internalShinglingEnabled(false).shingleSize(shingleSize).anomalyRate(0.01).build();
        ThresholdedRandomCutForest third = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).numberOfTrees(numberOfTrees).internalShinglingEnabled(false).shingleSize(1).anomalyRate(0.01).build();
        MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(length + testLength, 50, 100, 5, seed + i, baseDimensions);
        double[][] shingledData = generateShingledData(dataWithKeys.data, shingleSize, baseDimensions, false);
        assertEquals(shingledData.length, dataWithKeys.data.length - shingleSize + 1);
        int count = shingleSize - 1;
        // insert initial points
        for (int j = 0; j < shingleSize - 1; j++) {
            first.process(dataWithKeys.data[j], 0L);
        }
        for (int j = 0; j < length; j++) {
            // validate equality of points
            for (int y = 0; y < baseDimensions; y++) {
                assertEquals(dataWithKeys.data[count][y], shingledData[j][(shingleSize - 1) * baseDimensions + y], 1e-10);
            }
            AnomalyDescriptor firstResult = first.process(dataWithKeys.data[count], 0L);
            ++count;
            AnomalyDescriptor secondResult = second.process(shingledData[j], 0L);
            AnomalyDescriptor thirdResult = third.process(shingledData[j], 0L);
            assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
            assertEquals(firstResult.getAnomalyGrade(), secondResult.getAnomalyGrade(), 1e-10);
            assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
        // grades will not match between first and third because the thresholder has
        // wrong info
        // about shinglesize
        }
        ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
        ThresholdedRandomCutForest fourth = mapper.toModel(mapper.toState(second));
        for (int j = length; j < shingledData.length; j++) {
            // validate eaulity of points
            for (int y = 0; y < baseDimensions; y++) {
                assertEquals(dataWithKeys.data[count][y], shingledData[j][(shingleSize - 1) * baseDimensions + y], 1e-10);
            }
            AnomalyDescriptor firstResult = first.process(dataWithKeys.data[count], 0L);
            ++count;
            AnomalyDescriptor secondResult = second.process(shingledData[j], 0L);
            AnomalyDescriptor thirdResult = third.process(shingledData[j], 0L);
            AnomalyDescriptor fourthResult = fourth.process(shingledData[j], 0L);
            assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
            assertEquals(firstResult.getAnomalyGrade(), secondResult.getAnomalyGrade(), 1e-10);
            assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
            // grades will not match between first and third because the thresholder has
            // wrong info
            // about shinglesize
            assertEquals(firstResult.getRCFScore(), fourthResult.getRCFScore(), 1e-10);
            assertEquals(firstResult.getAnomalyGrade(), fourthResult.getAnomalyGrade(), 1e-10);
        }
    }
}
Also used : Random(java.util.Random) ThresholdedRandomCutForestMapper(com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 2 with ThresholdedRandomCutForestMapper

use of com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper in project random-cut-forest-by-aws by aws.

the class ConsistencyTest method TimeAugmentedTest.

@ParameterizedTest
@EnumSource(TransformMethod.class)
public void TimeAugmentedTest(TransformMethod transformMethod) {
    int sampleSize = 256;
    int baseDimensions = 1;
    int shingleSize = 4;
    int dimensions = baseDimensions * shingleSize;
    // test is exact equality, reducing the number of trials
    int numTrials = 1;
    // and using fewer trees to speed up test
    int numberOfTrees = 30;
    int length = 10 * sampleSize;
    int dataSize = 2 * length;
    for (int i = 0; i < numTrials; i++) {
        Precision precision = Precision.FLOAT_32;
        long seed = new Random().nextLong();
        System.out.println("seed = " + seed);
        // TransformMethod transformMethod = TransformMethod.NONE;
        ThresholdedRandomCutForest first = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
        ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.TIME_AUGMENTED).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
        // ensuring that the parameters are the same; otherwise the grades/scores cannot
        // be the same
        // weighTime has to be 0 in the above
        first.setLowerThreshold(1.1);
        second.setLowerThreshold(1.1);
        first.setHorizon(0.75);
        second.setHorizon(0.75);
        Random noise = new Random(0);
        // change the last argument seed for a different run
        MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50, 100, 5, seed, baseDimensions);
        int count = 0;
        for (int j = 0; j < length; j++) {
            long timestamp = 100 * count + noise.nextInt(10) - 5;
            AnomalyDescriptor result = first.process(dataWithKeys.data[j], timestamp);
            AnomalyDescriptor test = second.process(dataWithKeys.data[j], timestamp);
            checkArgument(Math.abs(result.getRCFScore() - test.getRCFScore()) < 1e-10, " error");
            checkArgument(Math.abs(result.getAnomalyGrade() - test.getAnomalyGrade()) < 1e-10, " error");
            ++count;
        }
        ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
        ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
        for (int j = length; j < 2 * length; j++) {
            // can be a different gap
            long timestamp = 150 * count + noise.nextInt(10) - 5;
            AnomalyDescriptor firstResult = first.process(dataWithKeys.data[count], timestamp);
            AnomalyDescriptor secondResult = second.process(dataWithKeys.data[count], timestamp);
            AnomalyDescriptor thirdResult = third.process(dataWithKeys.data[count], timestamp);
            assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
            assertEquals(firstResult.getAnomalyGrade(), secondResult.getAnomalyGrade(), 1e-10);
            assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
            assertEquals(firstResult.getAnomalyGrade(), thirdResult.getAnomalyGrade(), 1e-10);
        }
    }
}
Also used : Random(java.util.Random) Precision(com.amazon.randomcutforest.config.Precision) ThresholdedRandomCutForestMapper(com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) EnumSource(org.junit.jupiter.params.provider.EnumSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 3 with ThresholdedRandomCutForestMapper

use of com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper in project random-cut-forest-by-aws by aws.

the class ConsistencyTest method ImputeTest.

@ParameterizedTest
@EnumSource(TransformMethod.class)
public void ImputeTest(TransformMethod transformMethod) {
    int sampleSize = 256;
    int baseDimensions = 1;
    int shingleSize = 4;
    int dimensions = baseDimensions * shingleSize;
    // test is exact equality, reducing the number of trials
    int numTrials = 1;
    // and using fewer trees to speed up test
    int numberOfTrees = 30;
    int length = 10 * sampleSize;
    int dataSize = 2 * length;
    for (int i = 0; i < numTrials; i++) {
        Precision precision = Precision.FLOAT_32;
        long seed = new Random().nextLong();
        System.out.println("seed = " + seed);
        double[] weights = new double[] { 1.7, 4.2 };
        ThresholdedRandomCutForest first = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).weights(weights).build();
        ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STREAMING_IMPUTE).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).weights(weights).build();
        // ensuring that the parameters are the same; otherwise the grades/scores cannot
        // be the same
        // weighTime has to be 0 in the above
        first.setLowerThreshold(1.1);
        second.setLowerThreshold(1.1);
        first.setHorizon(0.75);
        second.setHorizon(0.75);
        Random noise = new Random(0);
        // change the last argument seed for a different run
        MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50, 100, 5, seed, baseDimensions);
        for (int j = 0; j < length; j++) {
            // gap has to be asymptotically same
            long timestamp = 100 * j + noise.nextInt(10) - 5;
            AnomalyDescriptor result = first.process(dataWithKeys.data[j], 0L);
            AnomalyDescriptor test = second.process(dataWithKeys.data[j], timestamp);
            assertEquals(result.getRCFScore(), test.getRCFScore(), 1e-6);
            assertEquals(result.getAnomalyGrade(), test.getAnomalyGrade(), 1e-6);
        }
        ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
        ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
        for (int j = length; j < 2 * length; j++) {
            // has to be the same gap
            long timestamp = 100 * j + noise.nextInt(10) - 5;
            AnomalyDescriptor firstResult = first.process(dataWithKeys.data[j], 0L);
            AnomalyDescriptor thirdResult = third.process(dataWithKeys.data[j], timestamp);
            assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-6);
            assertEquals(firstResult.getAnomalyGrade(), thirdResult.getAnomalyGrade(), 1e-6);
        }
    }
}
Also used : Random(java.util.Random) Precision(com.amazon.randomcutforest.config.Precision) ThresholdedRandomCutForestMapper(com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) EnumSource(org.junit.jupiter.params.provider.EnumSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

ThresholdedRandomCutForestMapper (com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper)3 MultiDimDataWithKey (com.amazon.randomcutforest.testutils.MultiDimDataWithKey)3 Random (java.util.Random)3 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)3 Precision (com.amazon.randomcutforest.config.Precision)2 EnumSource (org.junit.jupiter.params.provider.EnumSource)2 Test (org.junit.jupiter.api.Test)1