Examples with NormalMixtureTestData - com.amazon.randomcutforest.testutils.NormalMixtureTestData

Example 16 with NormalMixtureTestData

use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.

the class RandomCutForestFunctionalTest method oneTimeSetUp.

@BeforeAll
public static void oneTimeSetUp() {
    // this is a stochastic dataset and will have different values for different
    // runs
    numberOfTrees = 100;
    sampleSize = 256;
    dimensions = 3;
    randomSeed = 123;
    parallelExecutionForest = RandomCutForest.builder().numberOfTrees(numberOfTrees).sampleSize(sampleSize).dimensions(dimensions).randomSeed(randomSeed).centerOfMassEnabled(true).storeSequenceIndexesEnabled(true).build();
    singleThreadedForest = RandomCutForest.builder().numberOfTrees(numberOfTrees).sampleSize(sampleSize).dimensions(dimensions).randomSeed(randomSeed).centerOfMassEnabled(true).storeSequenceIndexesEnabled(true).parallelExecutionEnabled(false).build();
    dataSize = 10_000;
    baseMu = 0.0;
    baseSigma = 1.0;
    anomalyMu = 5.0;
    anomalySigma = 1.5;
    transitionToAnomalyProbability = 0.01;
    transitionToBaseProbability = 0.4;
    NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
    double[][] data = generator.generateTestData(dataSize, dimensions);
    for (int i = 0; i < dataSize; i++) {
        parallelExecutionForest.update(data[i]);
        singleThreadedForest.update(data[i]);
    }
}

Also used : NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData) BeforeAll(org.junit.jupiter.api.BeforeAll)

Example 17 with NormalMixtureTestData

use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.

the class HyperTreeTest method setup.

// ===========================================================
@BeforeAll
public static void setup() {
    dataSize = 2000;
    // this is a tree test
    numberOfTrees = 1;
    sampleSize = 256;
    dimensions = 30;
    baseMu = 0.0;
    baseSigma = 1.0;
    anomalyMu = 0.0;
    anomalySigma = 1.5;
    transitionToAnomalyProbability = 0.0;
    // ignoring anomaly cluster for now
    transitionToBaseProbability = 1.0;
    generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
}

Also used : NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData) BeforeAll(org.junit.jupiter.api.BeforeAll)

Example 18 with NormalMixtureTestData

use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.

the class DynamicSampling method run.

@Override
public void run() throws Exception {
    // Create and populate a random cut forest
    int dimensions = 4;
    int numberOfTrees = 50;
    int sampleSize = 256;
    Precision precision = Precision.FLOAT_64;
    int dataSize = 4 * sampleSize;
    NormalMixtureTestData testData = new NormalMixtureTestData();
    RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
    RandomCutForest forest2 = RandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
    int first_anomalies = 0;
    int second_anomalies = 0;
    forest2.setTimeDecay(10 * forest2.getTimeDecay());
    for (double[] point : testData.generateTestData(dataSize, dimensions)) {
        if (forest.getAnomalyScore(point) > 1.0) {
            first_anomalies++;
        }
        if (forest2.getAnomalyScore(point) > 1.0) {
            second_anomalies++;
        }
        forest.update(point);
        forest2.update(point);
    }
    System.out.println("Unusual scores: forest one " + first_anomalies + ", second one " + second_anomalies);
    // should be roughly equal
    first_anomalies = second_anomalies = 0;
    testData = new NormalMixtureTestData(-3, 40);
    for (double[] point : testData.generateTestData(dataSize, dimensions)) {
        if (forest.getAnomalyScore(point) > 1.0) {
            first_anomalies++;
        }
        if (forest2.getAnomalyScore(point) > 1.0) {
            second_anomalies++;
        }
        forest.update(point);
        forest2.update(point);
    }
    System.out.println("Unusual scores: forest one " + first_anomalies + ", second one " + second_anomalies);
    // forest2 should adapt faster
    first_anomalies = second_anomalies = 0;
    RandomCutForestMapper mapper = new RandomCutForestMapper();
    mapper.setSaveExecutorContextEnabled(true);
    RandomCutForest copyForest = mapper.toModel(mapper.toState(forest));
    copyForest.setTimeDecay(50 * forest.getTimeDecay());
    // force an adjustment to catch up
    testData = new NormalMixtureTestData(-10, -40);
    int forced_change_anomalies = 0;
    for (double[] point : testData.generateTestData(dataSize, dimensions)) {
        if (forest.getAnomalyScore(point) > 1.0) {
            first_anomalies++;
        }
        if (forest2.getAnomalyScore(point) > 1.0) {
            second_anomalies++;
        }
        if (copyForest.getAnomalyScore(point) > 1.0) {
            forced_change_anomalies++;
        }
        copyForest.update(point);
        forest.update(point);
        forest2.update(point);
    }
    // both should show the similar rate of adjustment
    System.out.println("Unusual scores: forest one " + first_anomalies + ", second one " + second_anomalies + ", forced (first) " + forced_change_anomalies);
}

Also used : Precision(com.amazon.randomcutforest.config.Precision) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) RandomCutForestMapper(com.amazon.randomcutforest.state.RandomCutForestMapper) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData)

Example 19 with NormalMixtureTestData

use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.

the class Thresholded1DGaussianMix method run.

@Override
public void run() throws Exception {
    // Create and populate a random cut forest
    int shingleSize = 4;
    int numberOfTrees = 50;
    int sampleSize = 256;
    Precision precision = Precision.FLOAT_32;
    int dataSize = 4 * sampleSize;
    // change this to try different number of attributes,
    // this parameter is not expected to be larger than 5 for this example
    int baseDimensions = 1;
    int count = 0;
    int dimensions = baseDimensions * shingleSize;
    ThresholdedRandomCutForest forest = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).precision(precision).anomalyRate(0.01).forestMode(ForestMode.TIME_AUGMENTED).build();
    long seed = new Random().nextLong();
    System.out.println("Anomalies would correspond to a run, based on a change of state.");
    System.out.println("Each change is normal <-> anomaly;  so after the second change the data is normal");
    System.out.println("seed = " + seed);
    NormalMixtureTestData normalMixtureTestData = new NormalMixtureTestData(10, 1.0, 50, 2.0, 0.01, 0.1);
    MultiDimDataWithKey dataWithKeys = normalMixtureTestData.generateTestDataWithKey(dataSize, 1, 0);
    int keyCounter = 0;
    for (double[] point : dataWithKeys.data) {
        AnomalyDescriptor result = forest.process(point, count);
        if (keyCounter < dataWithKeys.changeIndices.length && result.getInternalTimeStamp() == dataWithKeys.changeIndices[keyCounter]) {
            System.out.println("timestamp " + (result.getInputTimestamp()) + " CHANGE");
            ++keyCounter;
        }
        if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
            System.out.println("timestamp " + (count) + " CHANGE ");
            ++keyCounter;
        }
        if (result.getAnomalyGrade() != 0) {
            System.out.print("timestamp " + (count) + " RESULT value ");
            for (int i = 0; i < baseDimensions; i++) {
                System.out.print(result.getCurrentInput()[i] + ", ");
            }
            System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
            if (result.isExpectedValuesPresent()) {
                if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
                    System.out.print(-result.getRelativeIndex() + " steps ago, instead of ");
                    for (int i = 0; i < baseDimensions; i++) {
                        System.out.print(result.getPastValues()[i] + ", ");
                    }
                    System.out.print("expected ");
                    for (int i = 0; i < baseDimensions; i++) {
                        System.out.print(result.getExpectedValuesList()[0][i] + ", ");
                        if (result.getPastValues()[i] != result.getExpectedValuesList()[0][i]) {
                            System.out.print("( " + (result.getPastValues()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
                        }
                    }
                } else {
                    System.out.print("expected ");
                    for (int i = 0; i < baseDimensions; i++) {
                        System.out.print(result.getExpectedValuesList()[0][i] + ", ");
                        if (result.getCurrentInput()[i] != result.getExpectedValuesList()[0][i]) {
                            System.out.print("( " + (result.getCurrentInput()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
                        }
                    }
                }
            }
            System.out.println();
        }
        ++count;
    }
}

Also used : Random(java.util.Random) Precision(com.amazon.randomcutforest.config.Precision) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)

Example 20 with NormalMixtureTestData

use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.

the class ObjectStreamExample method run.

@Override
public void run() throws Exception {
    // Create and populate a random cut forest
    int dimensions = 10;
    int numberOfTrees = 50;
    int sampleSize = 256;
    Precision precision = Precision.FLOAT_32;
    RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
    int dataSize = 1000 * sampleSize;
    NormalMixtureTestData testData = new NormalMixtureTestData();
    for (double[] point : testData.generateTestData(dataSize, dimensions)) {
        forest.update(point);
    }
    // Convert to an array of bytes and print the size
    RandomCutForestMapper mapper = new RandomCutForestMapper();
    mapper.setSaveExecutorContextEnabled(true);
    System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
    byte[] bytes = serialize(mapper.toState(forest));
    System.out.printf("Object output stream size = %d bytes%n", bytes.length);
    // Restore from object stream and compare anomaly scores produced by the two
    // forests
    RandomCutForestState state2 = (RandomCutForestState) deserialize(bytes);
    RandomCutForest forest2 = mapper.toModel(state2);
    int testSize = 100;
    double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
    int differences = 0;
    int anomalies = 0;
    for (double[] point : testData.generateTestData(testSize, dimensions)) {
        double score = forest.getAnomalyScore(point);
        double score2 = forest2.getAnomalyScore(point);
        // also scored as an anomaly by the other forest
        if (score > 1 || score2 > 1) {
            anomalies++;
            if (Math.abs(score - score2) > delta) {
                differences++;
            }
        }
        forest.update(point);
        forest2.update(point);
    }
    // first validate that this was a nontrivial test
    if (anomalies == 0) {
        throw new IllegalStateException("test data did not produce any anomalies");
    }
    // validate that the two forests agree on anomaly scores
    if (differences >= 0.01 * testSize) {
        throw new IllegalStateException("restored forest does not agree with original forest");
    }
    System.out.println("Looks good!");
}

Aggregations

NormalMixtureTestData (com.amazon.randomcutforest.testutils.NormalMixtureTestData)20 Precision (com.amazon.randomcutforest.config.Precision)8 RandomCutForest (com.amazon.randomcutforest.RandomCutForest)7 Random (java.util.Random)7 Test (org.junit.jupiter.api.Test)6 RandomCutForestMapper (com.amazon.randomcutforest.state.RandomCutForestMapper)5 DiVector (com.amazon.randomcutforest.returntypes.DiVector)4 RandomCutForestState (com.amazon.randomcutforest.state.RandomCutForestState)4 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)4 BeforeAll (org.junit.jupiter.api.BeforeAll)3 AnomalyDescriptor (com.amazon.randomcutforest.parkservices.AnomalyDescriptor)2 ThresholdedRandomCutForest (com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)2 MultiDimDataWithKey (com.amazon.randomcutforest.testutils.MultiDimDataWithKey)2 LinkedBuffer (io.protostuff.LinkedBuffer)2 ArgumentsSource (org.junit.jupiter.params.provider.ArgumentsSource)2 ConditionalSampleSummary (com.amazon.randomcutforest.returntypes.ConditionalSampleSummary)1 CompactSampler (com.amazon.randomcutforest.sampler.CompactSampler)1 ShingleBuilder (com.amazon.randomcutforest.util.ShingleBuilder)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 Instant (java.time.Instant)1