Search in sources :

Example 1 with RandomCutForest

use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.

the class ConsistencyTest method InternalShinglingTest.

@Test
public void InternalShinglingTest() {
    int sampleSize = 256;
    int baseDimensions = 2;
    int shingleSize = 4;
    int dimensions = baseDimensions * shingleSize;
    long seed = new Random().nextLong();
    // just once since testing exact equality
    int numTrials = 1;
    int length = 400 * sampleSize;
    for (int i = 0; i < numTrials; i++) {
        RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(true).shingleSize(shingleSize).randomSeed(seed).build();
        ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).build();
        MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(length, 50, 100, 5, seed + i, baseDimensions);
        for (double[] point : dataWithKeys.data) {
            AnomalyDescriptor firstResult = first.process(point, 0L);
            assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-10);
            forest.update(point);
        }
    }
}
Also used : Random(java.util.Random) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 2 with RandomCutForest

use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.

the class ThresholdedRandomCutForestMapperTest method testConversions.

@Test
public void testConversions() {
    int dimensions = 10;
    for (int trials = 0; trials < 10; trials++) {
        long seed = new Random().nextLong();
        RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(false).randomSeed(seed).build();
        // note shingleSize == 1
        ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).anomalyRate(0.01).build();
        Random r = new Random();
        for (int i = 0; i < new Random().nextInt(1000); i++) {
            double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
            first.process(point, 0L);
            forest.update(point);
        }
        RandomCutForestMapper mapper = new RandomCutForestMapper();
        mapper.setSaveExecutorContextEnabled(true);
        mapper.setSaveTreeStateEnabled(true);
        mapper.setPartialTreeStateEnabled(true);
        RandomCutForest copyForest = mapper.toModel(mapper.toState(forest));
        ThresholdedRandomCutForest second = new ThresholdedRandomCutForest(copyForest, 0.01, null);
        // 
        for (int i = 0; i < new Random().nextInt(1000); i++) {
            double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
            AnomalyDescriptor firstResult = first.process(point, 0L);
            AnomalyDescriptor secondResult = second.process(point, 0L);
            assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
            assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-10);
            forest.update(point);
        }
        // serialize + deserialize
        ThresholdedRandomCutForestMapper newMapper = new ThresholdedRandomCutForestMapper();
        ThresholdedRandomCutForest third = newMapper.toModel(newMapper.toState(second));
        // update re-instantiated forest
        for (int i = 0; i < 100; i++) {
            double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
            AnomalyDescriptor firstResult = first.process(point, 0L);
            AnomalyDescriptor secondResult = second.process(point, 0L);
            AnomalyDescriptor thirdResult = third.process(point, 0L);
            double score = forest.getAnomalyScore(point);
            assertEquals(score, firstResult.getRCFScore(), 1e-10);
            assertEquals(score, secondResult.getRCFScore(), 1e-10);
            assertEquals(score, thirdResult.getRCFScore(), 1e-10);
            assertEquals(firstResult.getDataConfidence(), thirdResult.getDataConfidence(), 1e-10);
            forest.update(point);
        }
    }
}
Also used : Random(java.util.Random) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) RandomCutForestMapper(com.amazon.randomcutforest.state.RandomCutForestMapper) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 3 with RandomCutForest

use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.

the class ThresholdedRandomCutForestMapperTest method testRoundTripStandardShingleSizeOne.

@Test
public void testRoundTripStandardShingleSizeOne() {
    int dimensions = 10;
    for (int trials = 0; trials < 1; trials++) {
        long seed = new Random().nextLong();
        RandomCutForest.Builder<?> builder = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed);
        // note shingleSize == 1
        ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).anomalyRate(0.01).build();
        ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).anomalyRate(0.01).forestMode(ForestMode.STANDARD).internalShinglingEnabled(false).build();
        RandomCutForest forest = builder.build();
        Random r = new Random();
        for (int i = 0; i < 2000 + new Random().nextInt(1000); i++) {
            double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
            AnomalyDescriptor firstResult = first.process(point, 0L);
            AnomalyDescriptor secondResult = second.process(point, 0L);
            assertEquals(firstResult.getDataConfidence(), secondResult.getDataConfidence(), 1e-10);
            assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
            assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-10);
            forest.update(point);
        }
        // serialize + deserialize
        ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
        ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
        // update re-instantiated forest
        for (int i = 0; i < 100; i++) {
            double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
            AnomalyDescriptor firstResult = first.process(point, 0L);
            AnomalyDescriptor secondResult = second.process(point, 0L);
            AnomalyDescriptor thirdResult = third.process(point, 0L);
            double score = forest.getAnomalyScore(point);
            assertEquals(score, firstResult.getRCFScore(), 1e-10);
            assertEquals(score, secondResult.getRCFScore(), 1e-10);
            assertEquals(score, thirdResult.getRCFScore(), 1e-10);
            assertEquals(firstResult.getDataConfidence(), secondResult.getDataConfidence(), 1e-10);
            forest.update(point);
        }
    }
}
Also used : Random(java.util.Random) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 4 with RandomCutForest

use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.

the class ProtostuffExampleWithDynamicLambda method run.

@Override
public void run() throws Exception {
    // Create and populate a random cut forest
    int dimensions = 4;
    int numberOfTrees = 50;
    int sampleSize = 256;
    Precision precision = Precision.FLOAT_64;
    RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
    int dataSize = 4 * sampleSize;
    NormalMixtureTestData testData = new NormalMixtureTestData();
    for (double[] point : testData.generateTestData(dataSize, dimensions)) {
        forest.update(point);
    }
    // Convert to an array of bytes and print the size
    RandomCutForestMapper mapper = new RandomCutForestMapper();
    mapper.setSaveExecutorContextEnabled(true);
    Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
    LinkedBuffer buffer = LinkedBuffer.allocate(512);
    byte[] bytes;
    try {
        RandomCutForestState state = mapper.toState(forest);
        bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
    } finally {
        buffer.clear();
    }
    System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
    System.out.printf("protostuff size = %d bytes%n", bytes.length);
    // Restore from protostuff and compare anomaly scores produced by the two
    // forests
    RandomCutForestState state2 = schema.newMessage();
    ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
    RandomCutForest forest2 = mapper.toModel(state2);
    double saveLambda = forest.getTimeDecay();
    forest.setTimeDecay(10 * forest.getTimeDecay());
    forest2.setTimeDecay(10 * forest2.getTimeDecay());
    for (int i = 0; i < numberOfTrees; i++) {
        CompactSampler sampler = (CompactSampler) ((SamplerPlusTree) forest.getComponents().get(i)).getSampler();
        CompactSampler sampler2 = (CompactSampler) ((SamplerPlusTree) forest2.getComponents().get(i)).getSampler();
        if (sampler.getMaxSequenceIndex() != sampler2.getMaxSequenceIndex()) {
            throw new IllegalStateException("Incorrect sampler state");
        }
        if (sampler.getMostRecentTimeDecayUpdate() != sampler2.getMostRecentTimeDecayUpdate()) {
            throw new IllegalStateException("Incorrect sampler state");
        }
        if (sampler2.getMostRecentTimeDecayUpdate() != dataSize - 1) {
            throw new IllegalStateException("Incorrect sampler state");
        }
    }
    int testSize = 100;
    double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
    int differences = 0;
    int anomalies = 0;
    for (double[] point : testData.generateTestData(testSize, dimensions)) {
        double score = forest.getAnomalyScore(point);
        double score2 = forest2.getAnomalyScore(point);
        // also scored as an anomaly by the other forest
        if (score > 1 || score2 > 1) {
            anomalies++;
            if (Math.abs(score - score2) > delta) {
                differences++;
            }
        }
        forest.update(point);
        forest2.update(point);
    }
    // first validate that this was a nontrivial test
    if (anomalies == 0) {
        throw new IllegalStateException("test data did not produce any anomalies");
    }
    // validate that the two forests agree on anomaly scores
    if (differences >= 0.01 * testSize) {
        throw new IllegalStateException("restored forest does not agree with original forest");
    }
    System.out.println("Looks good!");
}
Also used : LinkedBuffer(io.protostuff.LinkedBuffer) CompactSampler(com.amazon.randomcutforest.sampler.CompactSampler) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) RandomCutForestState(com.amazon.randomcutforest.state.RandomCutForestState) Precision(com.amazon.randomcutforest.config.Precision) RandomCutForestMapper(com.amazon.randomcutforest.state.RandomCutForestMapper) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData)

Example 5 with RandomCutForest

use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.

the class DynamicDensity method run.

/**
 * plot the dynamic_density_example using any tool in gnuplot one can plot the
 * directions to higher density via do for [i=0:358:2] {plot
 * "dynamic_density_example" index (i+1) u 1:2:3:4 w vectors t ""} or the raw
 * density at the points via do for [i=0:358:2] {plot "dynamic_density_example"
 * index i w p pt 7 palette t ""}
 *
 * @throws Exception
 */
@Override
public void run() throws Exception {
    int newDimensions = 2;
    long randomSeed = 123;
    RandomCutForest newForest = RandomCutForest.builder().numberOfTrees(100).sampleSize(256).dimensions(newDimensions).randomSeed(randomSeed).timeDecay(1.0 / 800).centerOfMassEnabled(true).build();
    String name = "dynamic_density_example";
    BufferedWriter file = new BufferedWriter(new FileWriter(name));
    double[][] data = generate(1000);
    double[] queryPoint;
    for (int degree = 0; degree < 360; degree += 2) {
        for (double[] datum : data) {
            newForest.update(rotateClockWise(datum, -2 * PI * degree / 360));
        }
        for (double[] datum : data) {
            queryPoint = rotateClockWise(datum, -2 * PI * degree / 360);
            DensityOutput density = newForest.getSimpleDensity(queryPoint);
            double value = density.getDensity(0.001, 2);
            file.append(queryPoint[0] + " " + queryPoint[1] + " " + value + "\n");
        }
        file.append("\n");
        file.append("\n");
        for (double x = -0.95; x < 1; x += 0.1) {
            for (double y = -0.95; y < 1; y += 0.1) {
                DensityOutput density = newForest.getSimpleDensity(new double[] { x, y });
                double aboveInY = density.getDirectionalDensity(0.001, 2).low[1];
                double belowInY = density.getDirectionalDensity(0.001, 2).high[1];
                double toTheLeft = density.getDirectionalDensity(0.001, 2).high[0];
                double toTheRight = density.getDirectionalDensity(0.001, 2).low[0];
                double len = Math.sqrt(aboveInY * aboveInY + belowInY * belowInY + toTheLeft * toTheLeft + toTheRight * toTheRight);
                file.append(x + " " + y + " " + ((toTheRight - toTheLeft) * 0.05 / len) + " " + ((aboveInY - belowInY) * 0.05 / len) + "\n");
            }
        }
        file.append("\n");
        file.append("\n");
    }
    file.close();
}
Also used : DensityOutput(com.amazon.randomcutforest.returntypes.DensityOutput) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) FileWriter(java.io.FileWriter) BufferedWriter(java.io.BufferedWriter)

Aggregations

RandomCutForest (com.amazon.randomcutforest.RandomCutForest)33 Random (java.util.Random)14 RandomCutForestMapper (com.amazon.randomcutforest.state.RandomCutForestMapper)11 Precision (com.amazon.randomcutforest.config.Precision)10 RandomCutForestState (com.amazon.randomcutforest.state.RandomCutForestState)10 Test (org.junit.jupiter.api.Test)10 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)10 NormalMixtureTestData (com.amazon.randomcutforest.testutils.NormalMixtureTestData)7 ThresholdedRandomCutForest (com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)5 AnomalyDescriptor (com.amazon.randomcutforest.parkservices.AnomalyDescriptor)4 CompactSampler (com.amazon.randomcutforest.sampler.CompactSampler)4 MultiDimDataWithKey (com.amazon.randomcutforest.testutils.MultiDimDataWithKey)4 ArrayList (java.util.ArrayList)4 ComponentList (com.amazon.randomcutforest.ComponentList)3 PointStoreCoordinator (com.amazon.randomcutforest.executor.PointStoreCoordinator)3 CompactSamplerMapper (com.amazon.randomcutforest.state.sampler.CompactSamplerMapper)3 CompactSamplerState (com.amazon.randomcutforest.state.sampler.CompactSamplerState)3 PointStoreMapper (com.amazon.randomcutforest.state.store.PointStoreMapper)3 CompactRandomCutTreeContext (com.amazon.randomcutforest.state.tree.CompactRandomCutTreeContext)3 IPointStore (com.amazon.randomcutforest.store.IPointStore)3