Search in sources :

Example 6 with RandomCutForest

use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.

the class JsonExample method run.

@Override
public void run() throws Exception {
    // Create and populate a random cut forest
    int dimensions = 4;
    int numberOfTrees = 50;
    int sampleSize = 256;
    Precision precision = Precision.FLOAT_64;
    RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
    int dataSize = 4 * sampleSize;
    NormalMixtureTestData testData = new NormalMixtureTestData();
    for (double[] point : testData.generateTestData(dataSize, dimensions)) {
        forest.update(point);
    }
    // Convert to JSON and print the number of bytes
    RandomCutForestMapper mapper = new RandomCutForestMapper();
    mapper.setSaveExecutorContextEnabled(true);
    ObjectMapper jsonMapper = new ObjectMapper();
    String json = jsonMapper.writeValueAsString(mapper.toState(forest));
    System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
    System.out.printf("JSON size = %d bytes%n", json.getBytes().length);
    // Restore from JSON and compare anomaly scores produced by the two forests
    RandomCutForest forest2 = mapper.toModel(jsonMapper.readValue(json, RandomCutForestState.class));
    int testSize = 100;
    double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
    int differences = 0;
    int anomalies = 0;
    for (double[] point : testData.generateTestData(testSize, dimensions)) {
        double score = forest.getAnomalyScore(point);
        double score2 = forest2.getAnomalyScore(point);
        // also scored as an anomaly by the other forest
        if (score > 1 || score2 > 1) {
            anomalies++;
            if (Math.abs(score - score2) > delta) {
                differences++;
            }
        }
        forest.update(point);
        forest2.update(point);
    }
    // first validate that this was a nontrivial test
    if (anomalies == 0) {
        throw new IllegalStateException("test data did not produce any anomalies");
    }
    // validate that the two forests agree on anomaly scores
    if (differences >= 0.01 * testSize) {
        throw new IllegalStateException("restored forest does not agree with original forest");
    }
    System.out.println("Looks good!");
}
Also used : Precision(com.amazon.randomcutforest.config.Precision) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) RandomCutForestMapper(com.amazon.randomcutforest.state.RandomCutForestMapper) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData) RandomCutForestState(com.amazon.randomcutforest.state.RandomCutForestState) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 7 with RandomCutForest

use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.

the class ProtostuffExample method run.

@Override
public void run() throws Exception {
    // Create and populate a random cut forest
    int dimensions = 10;
    int numberOfTrees = 50;
    int sampleSize = 256;
    Precision precision = Precision.FLOAT_32;
    RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
    int dataSize = 1000 * sampleSize;
    NormalMixtureTestData testData = new NormalMixtureTestData();
    for (double[] point : testData.generateTestData(dataSize, dimensions)) {
        forest.update(point);
    }
    // Convert to an array of bytes and print the size
    RandomCutForestMapper mapper = new RandomCutForestMapper();
    mapper.setSaveExecutorContextEnabled(true);
    Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
    LinkedBuffer buffer = LinkedBuffer.allocate(512);
    byte[] bytes;
    try {
        RandomCutForestState state = mapper.toState(forest);
        bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
    } finally {
        buffer.clear();
    }
    System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
    System.out.printf("protostuff size = %d bytes%n", bytes.length);
    // Restore from protostuff and compare anomaly scores produced by the two
    // forests
    RandomCutForestState state2 = schema.newMessage();
    ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
    RandomCutForest forest2 = mapper.toModel(state2);
    int testSize = 100;
    double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
    int differences = 0;
    int anomalies = 0;
    for (double[] point : testData.generateTestData(testSize, dimensions)) {
        double score = forest.getAnomalyScore(point);
        double score2 = forest2.getAnomalyScore(point);
        // also scored as an anomaly by the other forest
        if (score > 1 || score2 > 1) {
            anomalies++;
            if (Math.abs(score - score2) > delta) {
                differences++;
            }
        }
        forest.update(point);
        forest2.update(point);
    }
    // first validate that this was a nontrivial test
    if (anomalies == 0) {
        throw new IllegalStateException("test data did not produce any anomalies");
    }
    // validate that the two forests agree on anomaly scores
    if (differences >= 0.01 * testSize) {
        throw new IllegalStateException("restored forest does not agree with original forest");
    }
    System.out.println("Looks good!");
}
Also used : LinkedBuffer(io.protostuff.LinkedBuffer) Precision(com.amazon.randomcutforest.config.Precision) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) RandomCutForestMapper(com.amazon.randomcutforest.state.RandomCutForestMapper) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData) RandomCutForestState(com.amazon.randomcutforest.state.RandomCutForestState)

Example 8 with RandomCutForest

use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.

the class ProtostuffExampleWithShingles method run.

@Override
public void run() throws Exception {
    // Create and populate a random cut forest
    int dimensions = 10;
    int numberOfTrees = 50;
    int sampleSize = 256;
    Precision precision = Precision.FLOAT_64;
    RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).shingleSize(dimensions).build();
    int count = 1;
    int dataSize = 1000 * sampleSize;
    for (double[] point : generateShingledData(dataSize, dimensions, 0)) {
        forest.update(point);
    }
    // Convert to an array of bytes and print the size
    RandomCutForestMapper mapper = new RandomCutForestMapper();
    mapper.setSaveExecutorContextEnabled(true);
    mapper.setSaveTreeStateEnabled(false);
    Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
    LinkedBuffer buffer = LinkedBuffer.allocate(512);
    byte[] bytes;
    try {
        RandomCutForestState state = mapper.toState(forest);
        bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
    } finally {
        buffer.clear();
    }
    System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
    System.out.printf("protostuff size = %d bytes%n", bytes.length);
    // Restore from protostuff and compare anomaly scores produced by the two
    // forests
    RandomCutForestState state2 = schema.newMessage();
    ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
    RandomCutForest forest2 = mapper.toModel(state2);
    int testSize = 10000;
    double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
    int differences = 0;
    int anomalies = 0;
    for (double[] point : generateShingledData(testSize, dimensions, 2)) {
        double score = forest.getAnomalyScore(point);
        double score2 = forest2.getAnomalyScore(point);
        // also scored as an anomaly by the other forest
        if (score > 1 || score2 > 1) {
            anomalies++;
            if (Math.abs(score - score2) > delta) {
                differences++;
            }
        }
        forest.update(point);
        forest2.update(point);
    }
    // validate that the two forests agree on anomaly scores
    if (differences >= 0.01 * testSize) {
        throw new IllegalStateException("restored forest does not agree with original forest");
    }
    System.out.println("Looks good!");
}
Also used : LinkedBuffer(io.protostuff.LinkedBuffer) Precision(com.amazon.randomcutforest.config.Precision) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) RandomCutForestMapper(com.amazon.randomcutforest.state.RandomCutForestMapper) RandomCutForestState(com.amazon.randomcutforest.state.RandomCutForestState)

Example 9 with RandomCutForest

use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.

the class DynamicThroughput method run.

@Override
public void run() throws Exception {
    // Create and populate a random cut forest
    int dimensions = 4;
    int numberOfTrees = 50;
    int sampleSize = 256;
    Precision precision = Precision.FLOAT_64;
    int dataSize = 10 * sampleSize;
    NormalMixtureTestData testData = new NormalMixtureTestData();
    // generate data once to eliminate caching issues
    testData.generateTestData(dataSize, dimensions);
    testData.generateTestData(sampleSize, dimensions);
    for (int i = 0; i < 5; i++) {
        RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
        RandomCutForest forest2 = RandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
        forest2.setBoundingBoxCacheFraction(i * 0.25);
        int anomalies = 0;
        for (double[] point : testData.generateTestData(dataSize, dimensions)) {
            double score = forest.getAnomalyScore(point);
            double score2 = forest2.getAnomalyScore(point);
            if (Math.abs(score - score2) > 1e-10) {
                anomalies++;
            }
            forest.update(point);
            forest2.update(point);
        }
        Instant start = Instant.now();
        for (double[] point : testData.generateTestData(sampleSize, dimensions)) {
            double score = forest.getAnomalyScore(point);
            double score2 = forest2.getAnomalyScore(point);
            if (Math.abs(score - score2) > 1e-10) {
                anomalies++;
            }
            forest.update(point);
            forest2.update(point);
        }
        Instant finish = Instant.now();
        // first validate that this was a nontrivial test
        if (anomalies > 0) {
            throw new IllegalStateException("score mismatch");
        }
        System.out.println("So far so good! Caching fraction = " + (i * 0.25) + ", Time =" + Duration.between(start, finish).toMillis() + " ms (note only one forest is changing)");
    }
}
Also used : Precision(com.amazon.randomcutforest.config.Precision) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) Instant(java.time.Instant) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData)

Example 10 with RandomCutForest

use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.

the class BoxCacheTest method testChangingBoundingBoxFloat32.

@Test
public void testChangingBoundingBoxFloat32() {
    int dimensions = 4;
    int numberOfTrees = 1;
    int sampleSize = 64;
    int dataSize = 1000 * sampleSize;
    Random random = new Random();
    long seed = random.nextLong();
    double[][] big = RandomCutForestTest.generateShingledData(dataSize, dimensions, 2);
    RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(Precision.FLOAT_32).randomSeed(seed).boundingBoxCacheFraction(0).build();
    RandomCutForest otherForest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(Precision.FLOAT_32).randomSeed(seed).boundingBoxCacheFraction(1).build();
    int num = 0;
    for (double[] point : big) {
        ++num;
        if (num % sampleSize == 0) {
            forest.setBoundingBoxCacheFraction(random.nextDouble());
        }
        assertEquals(forest.getAnomalyScore(point), otherForest.getAnomalyScore(point));
        forest.update(point);
        otherForest.update(point);
    }
}
Also used : Random(java.util.Random) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) Test(org.junit.jupiter.api.Test) RandomCutForestTest(com.amazon.randomcutforest.RandomCutForestTest)

Aggregations

RandomCutForest (com.amazon.randomcutforest.RandomCutForest)33 Random (java.util.Random)14 RandomCutForestMapper (com.amazon.randomcutforest.state.RandomCutForestMapper)11 Precision (com.amazon.randomcutforest.config.Precision)10 RandomCutForestState (com.amazon.randomcutforest.state.RandomCutForestState)10 Test (org.junit.jupiter.api.Test)10 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)10 NormalMixtureTestData (com.amazon.randomcutforest.testutils.NormalMixtureTestData)7 ThresholdedRandomCutForest (com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)5 AnomalyDescriptor (com.amazon.randomcutforest.parkservices.AnomalyDescriptor)4 CompactSampler (com.amazon.randomcutforest.sampler.CompactSampler)4 MultiDimDataWithKey (com.amazon.randomcutforest.testutils.MultiDimDataWithKey)4 ArrayList (java.util.ArrayList)4 ComponentList (com.amazon.randomcutforest.ComponentList)3 PointStoreCoordinator (com.amazon.randomcutforest.executor.PointStoreCoordinator)3 CompactSamplerMapper (com.amazon.randomcutforest.state.sampler.CompactSamplerMapper)3 CompactSamplerState (com.amazon.randomcutforest.state.sampler.CompactSamplerState)3 PointStoreMapper (com.amazon.randomcutforest.state.store.PointStoreMapper)3 CompactRandomCutTreeContext (com.amazon.randomcutforest.state.tree.CompactRandomCutTreeContext)3 IPointStore (com.amazon.randomcutforest.store.IPointStore)3