Search in sources :

Example 6 with Precision

use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.

the class ProtostuffExampleWithShingles method run.

@Override
public void run() throws Exception {
    // Create and populate a random cut forest
    int dimensions = 10;
    int numberOfTrees = 50;
    int sampleSize = 256;
    Precision precision = Precision.FLOAT_64;
    RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).shingleSize(dimensions).build();
    int count = 1;
    int dataSize = 1000 * sampleSize;
    for (double[] point : generateShingledData(dataSize, dimensions, 0)) {
        forest.update(point);
    }
    // Convert to an array of bytes and print the size
    RandomCutForestMapper mapper = new RandomCutForestMapper();
    mapper.setSaveExecutorContextEnabled(true);
    mapper.setSaveTreeStateEnabled(false);
    Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
    LinkedBuffer buffer = LinkedBuffer.allocate(512);
    byte[] bytes;
    try {
        RandomCutForestState state = mapper.toState(forest);
        bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
    } finally {
        buffer.clear();
    }
    System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
    System.out.printf("protostuff size = %d bytes%n", bytes.length);
    // Restore from protostuff and compare anomaly scores produced by the two
    // forests
    RandomCutForestState state2 = schema.newMessage();
    ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
    RandomCutForest forest2 = mapper.toModel(state2);
    int testSize = 10000;
    double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
    int differences = 0;
    int anomalies = 0;
    for (double[] point : generateShingledData(testSize, dimensions, 2)) {
        double score = forest.getAnomalyScore(point);
        double score2 = forest2.getAnomalyScore(point);
        // also scored as an anomaly by the other forest
        if (score > 1 || score2 > 1) {
            anomalies++;
            if (Math.abs(score - score2) > delta) {
                differences++;
            }
        }
        forest.update(point);
        forest2.update(point);
    }
    // validate that the two forests agree on anomaly scores
    if (differences >= 0.01 * testSize) {
        throw new IllegalStateException("restored forest does not agree with original forest");
    }
    System.out.println("Looks good!");
}
Also used : LinkedBuffer(io.protostuff.LinkedBuffer) Precision(com.amazon.randomcutforest.config.Precision) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) RandomCutForestMapper(com.amazon.randomcutforest.state.RandomCutForestMapper) RandomCutForestState(com.amazon.randomcutforest.state.RandomCutForestState)

Example 7 with Precision

use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.

the class DynamicThroughput method run.

@Override
public void run() throws Exception {
    // Create and populate a random cut forest
    int dimensions = 4;
    int numberOfTrees = 50;
    int sampleSize = 256;
    Precision precision = Precision.FLOAT_64;
    int dataSize = 10 * sampleSize;
    NormalMixtureTestData testData = new NormalMixtureTestData();
    // generate data once to eliminate caching issues
    testData.generateTestData(dataSize, dimensions);
    testData.generateTestData(sampleSize, dimensions);
    for (int i = 0; i < 5; i++) {
        RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
        RandomCutForest forest2 = RandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
        forest2.setBoundingBoxCacheFraction(i * 0.25);
        int anomalies = 0;
        for (double[] point : testData.generateTestData(dataSize, dimensions)) {
            double score = forest.getAnomalyScore(point);
            double score2 = forest2.getAnomalyScore(point);
            if (Math.abs(score - score2) > 1e-10) {
                anomalies++;
            }
            forest.update(point);
            forest2.update(point);
        }
        Instant start = Instant.now();
        for (double[] point : testData.generateTestData(sampleSize, dimensions)) {
            double score = forest.getAnomalyScore(point);
            double score2 = forest2.getAnomalyScore(point);
            if (Math.abs(score - score2) > 1e-10) {
                anomalies++;
            }
            forest.update(point);
            forest2.update(point);
        }
        Instant finish = Instant.now();
        // first validate that this was a nontrivial test
        if (anomalies > 0) {
            throw new IllegalStateException("score mismatch");
        }
        System.out.println("So far so good! Caching fraction = " + (i * 0.25) + ", Time =" + Duration.between(start, finish).toMillis() + " ms (note only one forest is changing)");
    }
}
Also used : Precision(com.amazon.randomcutforest.config.Precision) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) Instant(java.time.Instant) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData)

Example 8 with Precision

use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.

the class ThresholdedInternalShinglingExample method run.

@Override
public void run() throws Exception {
    // Create and populate a random cut forest
    int shingleSize = 4;
    int numberOfTrees = 50;
    int sampleSize = 256;
    Precision precision = Precision.FLOAT_32;
    int dataSize = 4 * sampleSize;
    // change this to try different number of attributes,
    // this parameter is not expected to be larger than 5 for this example
    int baseDimensions = 1;
    long count = 0;
    int dimensions = baseDimensions * shingleSize;
    TransformMethod transformMethod = TransformMethod.NORMALIZE_DIFFERENCE;
    ThresholdedRandomCutForest forest = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
    ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.TIME_AUGMENTED).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
    // ensuring that the parameters are the same; otherwise the grades/scores cannot
    // be the same
    // weighTime has to be 0
    forest.setLowerThreshold(1.1);
    second.setLowerThreshold(1.1);
    forest.setHorizon(0.75);
    second.setHorizon(0.75);
    long seed = new Random().nextLong();
    Random noise = new Random(0);
    System.out.println("seed = " + seed);
    // change the last argument seed for a different run
    MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50, 100, 5, seed, baseDimensions);
    int keyCounter = 0;
    for (double[] point : dataWithKeys.data) {
        // idea is that we expect the arrival order to be roughly 100 apart (say
        // seconds)
        // then the noise corresponds to a jitter; one can try TIME_AUGMENTED and
        // .normalizeTime(true)
        long timestamp = 100 * count + noise.nextInt(10) - 5;
        AnomalyDescriptor result = forest.process(point, timestamp);
        AnomalyDescriptor test = second.process(point, timestamp);
        checkArgument(Math.abs(result.getRCFScore() - test.getRCFScore()) < 1e-10, " error");
        checkArgument(Math.abs(result.getAnomalyGrade() - test.getAnomalyGrade()) < 1e-10, " error");
        if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
            System.out.println("timestamp " + count + " CHANGE " + Arrays.toString(dataWithKeys.changes[keyCounter]));
            ++keyCounter;
        }
        if (result.getAnomalyGrade() != 0) {
            System.out.print("timestamp " + count + " RESULT value " + result.getInternalTimeStamp() + " ");
            for (int i = 0; i < baseDimensions; i++) {
                System.out.print(result.getCurrentInput()[i] + ", ");
            }
            System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
            if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
                System.out.print(-result.getRelativeIndex() + " steps ago, ");
            }
            if (result.isExpectedValuesPresent()) {
                if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
                    System.out.print("instead of ");
                    for (int i = 0; i < baseDimensions; i++) {
                        System.out.print(result.getPastValues()[i] + ", ");
                    }
                    System.out.print("expected ");
                    for (int i = 0; i < baseDimensions; i++) {
                        System.out.print(result.getExpectedValuesList()[0][i] + ", ");
                        if (result.getPastValues()[i] != result.getExpectedValuesList()[0][i]) {
                            System.out.print("( " + (result.getPastValues()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
                        }
                    }
                } else {
                    System.out.print("expected ");
                    for (int i = 0; i < baseDimensions; i++) {
                        System.out.print(result.getExpectedValuesList()[0][i] + ", ");
                        if (result.getCurrentInput()[i] != result.getExpectedValuesList()[0][i]) {
                            System.out.print("( " + (result.getCurrentInput()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
                        }
                    }
                }
            } else {
                System.out.print("insufficient data to provide expected values");
            }
            System.out.println();
        }
        ++count;
    }
}
Also used : Random(java.util.Random) Precision(com.amazon.randomcutforest.config.Precision) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) TransformMethod(com.amazon.randomcutforest.config.TransformMethod) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)

Example 9 with Precision

use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.

the class V1JsonToV3StateConverter method convert.

/**
 * the function merges a collection of RCF-1.0 models with same model parameters
 * and fixes the number of trees in the new model (which has to be less or equal
 * than the sum of the old models) The conversion uses the execution context of
 * the first forest and can be adjusted subsequently by setters
 *
 * @param serializedForests A non-empty list of forests (together having more
 *                          trees than numberOfTrees)
 * @param numberOfTrees     the new number of trees
 * @param precision         the precision of the new forest
 * @return a merged RCF with the first numberOfTrees trees
 */
public RandomCutForestState convert(List<V1SerializedRandomCutForest> serializedForests, int numberOfTrees, Precision precision) {
    checkArgument(serializedForests.size() > 0, "incorrect usage of convert");
    checkArgument(numberOfTrees > 0, "incorrect parameter");
    int sum = 0;
    for (int i = 0; i < serializedForests.size(); i++) {
        sum += serializedForests.get(i).getNumberOfTrees();
    }
    checkArgument(sum >= numberOfTrees, "incorrect parameters");
    RandomCutForestState state = new RandomCutForestState();
    state.setNumberOfTrees(numberOfTrees);
    state.setDimensions(serializedForests.get(0).getDimensions());
    state.setTimeDecay(serializedForests.get(0).getLambda());
    state.setSampleSize(serializedForests.get(0).getSampleSize());
    state.setShingleSize(1);
    state.setCenterOfMassEnabled(serializedForests.get(0).isCenterOfMassEnabled());
    state.setOutputAfter(serializedForests.get(0).getOutputAfter());
    state.setStoreSequenceIndexesEnabled(serializedForests.get(0).isStoreSequenceIndexesEnabled());
    state.setTotalUpdates(serializedForests.get(0).getExecutor().getExecutor().getTotalUpdates());
    state.setCompact(true);
    state.setInternalShinglingEnabled(false);
    state.setBoundingBoxCacheFraction(1.0);
    state.setSaveSamplerStateEnabled(true);
    state.setSaveTreeStateEnabled(false);
    state.setSaveCoordinatorStateEnabled(true);
    state.setPrecision(precision.name());
    state.setCompressed(false);
    state.setPartialTreeState(false);
    ExecutionContext executionContext = new ExecutionContext();
    executionContext.setParallelExecutionEnabled(serializedForests.get(0).isParallelExecutionEnabled());
    executionContext.setThreadPoolSize(serializedForests.get(0).getThreadPoolSize());
    state.setExecutionContext(executionContext);
    SamplerConverter samplerConverter = new SamplerConverter(state.getDimensions(), state.getNumberOfTrees() * state.getSampleSize() + 1, precision, numberOfTrees);
    serializedForests.stream().flatMap(f -> Arrays.stream(f.getExecutor().getExecutor().getTreeUpdaters())).limit(numberOfTrees).map(V1SerializedRandomCutForest.TreeUpdater::getSampler).forEach(samplerConverter::addSampler);
    state.setPointStoreState(samplerConverter.getPointStoreState(precision));
    state.setCompactSamplerStates(samplerConverter.compactSamplerStates);
    return state;
}
Also used : Arrays(java.util.Arrays) RandomCutForestState(com.amazon.randomcutforest.state.RandomCutForestState) ExecutionContext(com.amazon.randomcutforest.state.ExecutionContext) CompactSamplerState(com.amazon.randomcutforest.state.sampler.CompactSamplerState) URL(java.net.URL) Precision(com.amazon.randomcutforest.config.Precision) CommonUtils.checkArgument(com.amazon.randomcutforest.CommonUtils.checkArgument) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) IOException(java.io.IOException) Reader(java.io.Reader) PointStoreState(com.amazon.randomcutforest.state.store.PointStoreState) RandomCutTree(com.amazon.randomcutforest.tree.RandomCutTree) ArrayList(java.util.ArrayList) PointStore(com.amazon.randomcutforest.store.PointStore) ITree(com.amazon.randomcutforest.tree.ITree) List(java.util.List) Optional(java.util.Optional) PointStoreMapper(com.amazon.randomcutforest.state.store.PointStoreMapper) IPointStore(com.amazon.randomcutforest.store.IPointStore) Collections(java.util.Collections) ExecutionContext(com.amazon.randomcutforest.state.ExecutionContext) RandomCutForestState(com.amazon.randomcutforest.state.RandomCutForestState)

Example 10 with Precision

use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.

the class ConsistencyTest method ImputeTest.

@ParameterizedTest
@EnumSource(TransformMethod.class)
public void ImputeTest(TransformMethod transformMethod) {
    int sampleSize = 256;
    int baseDimensions = 1;
    int shingleSize = 4;
    int dimensions = baseDimensions * shingleSize;
    // test is exact equality, reducing the number of trials
    int numTrials = 1;
    // and using fewer trees to speed up test
    int numberOfTrees = 30;
    int length = 10 * sampleSize;
    int dataSize = 2 * length;
    for (int i = 0; i < numTrials; i++) {
        Precision precision = Precision.FLOAT_32;
        long seed = new Random().nextLong();
        System.out.println("seed = " + seed);
        double[] weights = new double[] { 1.7, 4.2 };
        ThresholdedRandomCutForest first = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).weights(weights).build();
        ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STREAMING_IMPUTE).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).weights(weights).build();
        // ensuring that the parameters are the same; otherwise the grades/scores cannot
        // be the same
        // weighTime has to be 0 in the above
        first.setLowerThreshold(1.1);
        second.setLowerThreshold(1.1);
        first.setHorizon(0.75);
        second.setHorizon(0.75);
        Random noise = new Random(0);
        // change the last argument seed for a different run
        MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50, 100, 5, seed, baseDimensions);
        for (int j = 0; j < length; j++) {
            // gap has to be asymptotically same
            long timestamp = 100 * j + noise.nextInt(10) - 5;
            AnomalyDescriptor result = first.process(dataWithKeys.data[j], 0L);
            AnomalyDescriptor test = second.process(dataWithKeys.data[j], timestamp);
            assertEquals(result.getRCFScore(), test.getRCFScore(), 1e-6);
            assertEquals(result.getAnomalyGrade(), test.getAnomalyGrade(), 1e-6);
        }
        ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
        ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
        for (int j = length; j < 2 * length; j++) {
            // has to be the same gap
            long timestamp = 100 * j + noise.nextInt(10) - 5;
            AnomalyDescriptor firstResult = first.process(dataWithKeys.data[j], 0L);
            AnomalyDescriptor thirdResult = third.process(dataWithKeys.data[j], timestamp);
            assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-6);
            assertEquals(firstResult.getAnomalyGrade(), thirdResult.getAnomalyGrade(), 1e-6);
        }
    }
}
Also used : Random(java.util.Random) Precision(com.amazon.randomcutforest.config.Precision) ThresholdedRandomCutForestMapper(com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) EnumSource(org.junit.jupiter.params.provider.EnumSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Aggregations

Precision (com.amazon.randomcutforest.config.Precision)17 RandomCutForest (com.amazon.randomcutforest.RandomCutForest)8 NormalMixtureTestData (com.amazon.randomcutforest.testutils.NormalMixtureTestData)8 Random (java.util.Random)8 MultiDimDataWithKey (com.amazon.randomcutforest.testutils.MultiDimDataWithKey)6 RandomCutForestState (com.amazon.randomcutforest.state.RandomCutForestState)5 AnomalyDescriptor (com.amazon.randomcutforest.parkservices.AnomalyDescriptor)4 ThresholdedRandomCutForest (com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)4 RandomCutForestMapper (com.amazon.randomcutforest.state.RandomCutForestMapper)4 CommonUtils.checkArgument (com.amazon.randomcutforest.CommonUtils.checkArgument)3 CompactSampler (com.amazon.randomcutforest.sampler.CompactSampler)3 LinkedBuffer (io.protostuff.LinkedBuffer)3 CommonUtils.checkNotNull (com.amazon.randomcutforest.CommonUtils.checkNotNull)2 CommonUtils.toDoubleArray (com.amazon.randomcutforest.CommonUtils.toDoubleArray)2 CommonUtils.toFloatArray (com.amazon.randomcutforest.CommonUtils.toFloatArray)2 AnomalyAttributionVisitor (com.amazon.randomcutforest.anomalydetection.AnomalyAttributionVisitor)2 AnomalyScoreVisitor (com.amazon.randomcutforest.anomalydetection.AnomalyScoreVisitor)2 DynamicAttributionVisitor (com.amazon.randomcutforest.anomalydetection.DynamicAttributionVisitor)2 DynamicScoreVisitor (com.amazon.randomcutforest.anomalydetection.DynamicScoreVisitor)2 SimulatedTransductiveScalarScoreVisitor (com.amazon.randomcutforest.anomalydetection.SimulatedTransductiveScalarScoreVisitor)2