Search in sources :

Example 6 with NormalMixtureTestData

use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.

the class RandomCutForestMapperTest method testRoundTripForCompactForest.

@ParameterizedTest
@MethodSource("compactForestProvider")
public void testRoundTripForCompactForest(RandomCutForest forest) {
    NormalMixtureTestData testData = new NormalMixtureTestData();
    for (double[] point : testData.generateTestData(sampleSize, dimensions)) {
        forest.update(point);
    }
    RandomCutForest forest2 = mapper.toModel(mapper.toState(forest));
    assertCompactForestEquals(forest, forest2);
}
Also used : RandomCutForest(com.amazon.randomcutforest.RandomCutForest) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) MethodSource(org.junit.jupiter.params.provider.MethodSource)

Example 7 with NormalMixtureTestData

use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.

the class AttributionExamplesFunctionalTest method attributionUnMaskingTest.

@Test
public void attributionUnMaskingTest() {
    // starts with the same setup as rrcfTest; data corresponds to two small
    // clusters at x=+/-5.0
    // queries q_1=(0,0,0, ..., 0)
    // inserts updates (0,1,0, ..., 0) a few times
    // queries q_2=(0,1,0, ..., 0)
    // attribution of q_2 is now affected by q_1 (which is still an anomaly)
    int newDimensions = 30;
    randomSeed = 179;
    sampleSize = 256;
    RandomCutForest newForest = RandomCutForest.builder().numberOfTrees(100).sampleSize(sampleSize).dimensions(newDimensions).randomSeed(randomSeed).compact(true).boundingBoxCacheFraction(new Random().nextDouble()).timeDecay(1e-5).build();
    dataSize = 2000 + 5;
    baseMu = 0.0;
    baseSigma = 1.0;
    anomalyMu = 0.0;
    anomalySigma = 1.5;
    transitionToAnomalyProbability = 0.0;
    // ignoring anomaly cluster for now
    transitionToBaseProbability = 1.0;
    Random prg = new Random(0);
    NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
    double[][] data = generator.generateTestData(dataSize, newDimensions, 100);
    for (int i = 0; i < 2000; i++) {
        // shrink, shift at random
        for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
        if (prg.nextDouble() < 0.5)
            data[i][0] += 5.0;
        else
            data[i][0] -= 5.0;
        newForest.update(data[i]);
    }
    float[] queryOne = new float[30];
    float[] queryTwo = new float[30];
    queryTwo[1] = 1;
    double originalScoreTwo = newForest.getAnomalyScore(queryTwo);
    // testing approximation with precision 0 (no approximation)
    DiVector originalAttrTwo = newForest.getApproximateDynamicAttribution(queryTwo, 0, true, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    originalAttrTwo.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
    assertTrue(originalScoreTwo > 3.0);
    assertEquals(originalScoreTwo, originalAttrTwo.getHighLowSum(), 1E-5);
    // due to -5 cluster
    assertTrue(originalAttrTwo.high[0] > 0.75);
    // due to +5 cluster
    assertTrue(originalAttrTwo.low[0] > 0.75);
    // due to +1 in query
    assertTrue(originalAttrTwo.high[1] > 1);
    assertTrue(originalAttrTwo.getHighLowSum(0) > originalAttrTwo.getHighLowSum(1));
    double apx = newForest.getApproximateDynamicScore(queryTwo, 0.1, true, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    assertEquals(originalScoreTwo, CommonUtils.defaultScalarNormalizerFunction(apx, sampleSize), 0.2);
    assertEquals(apx, newForest.getApproximateDynamicAttribution(queryTwo, 0.1, true, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction).getHighLowSum(), 1e-5);
    // we insert queryOne a few times to make sure it is sampled
    for (int i = 2000; i < 2000 + 5; i++) {
        double score = newForest.getAnomalyScore(queryOne);
        double score2 = newForest.getAnomalyScore(queryTwo);
        DiVector attr2 = newForest.getDynamicAttribution(queryTwo, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
        attr2.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
        double score3 = newForest.getDynamicScore(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
        score3 = CommonUtils.defaultScalarNormalizerFunction(score3, sampleSize);
        DiVector attr3 = newForest.getDynamicAttribution(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
        attr3.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
        // verify
        assertTrue(score > 2.0);
        assertTrue(score2 > 2.0);
        assertTrue(score3 > 2.0);
        assertEquals(attr2.getHighLowSum(), score2, 1E-5);
        assertEquals(attr3.getHighLowSum(), score3, 1E-5);
        for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
        newForest.update(data[i]);
    // 5 different anomalous points
    }
    double midScoreTwo = newForest.getAnomalyScore(queryTwo);
    DiVector midAttrTwo = newForest.getDynamicAttribution(queryTwo, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    midAttrTwo.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
    assertTrue(midScoreTwo > 2.5);
    assertEquals(midScoreTwo, midAttrTwo.getHighLowSum(), 1E-5);
    // due to +1 in query
    assertTrue(midAttrTwo.high[1] > 1);
    assertTrue(midAttrTwo.getHighLowSum(0) < 1.2 * midAttrTwo.high[1]);
    // reversal of the dominant dimension
    // still an anomaly; but the attribution is masked by points
    double midUnmaskedScore = newForest.getDynamicScore(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    midUnmaskedScore = CommonUtils.defaultScalarNormalizerFunction(midUnmaskedScore, sampleSize);
    DiVector midUnmaskedAttr = newForest.getDynamicAttribution(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    midUnmaskedAttr.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
    assertTrue(midUnmaskedScore > 3.0);
    assertEquals(midUnmaskedScore, midUnmaskedAttr.getHighLowSum(), 1E-5);
    // due to +1 in query
    assertTrue(midUnmaskedAttr.high[1] > 1);
    assertTrue(midUnmaskedAttr.getHighLowSum(0) > midUnmaskedAttr.getHighLowSum(1));
    // a few more updates, which are identical
    for (int i = 2005; i < 2010; i++) {
        newForest.update(queryOne);
    }
    double finalScoreTwo = newForest.getAnomalyScore(queryTwo);
    DiVector finalAttrTwo = newForest.getDynamicAttribution(queryTwo, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    finalAttrTwo.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
    assertTrue(finalScoreTwo > 2.5);
    assertEquals(finalScoreTwo, finalAttrTwo.getHighLowSum(), 1E-5);
    // due to +1 in query
    assertTrue(finalAttrTwo.high[1] > 1);
    assertTrue(2 * finalAttrTwo.getHighLowSum(0) < finalAttrTwo.high[1]);
    // the drop in high[0] and low[0] is steep and the attribution has shifted
    // different thresholds
    double finalUnmaskedScore = newForest.getDynamicScore(queryTwo, 5, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    finalUnmaskedScore = CommonUtils.defaultScalarNormalizerFunction(finalUnmaskedScore, sampleSize);
    DiVector finalUnmaskedAttr = newForest.getDynamicAttribution(queryTwo, 5, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    finalUnmaskedAttr.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
    assertTrue(finalUnmaskedScore > 3.0);
    assertEquals(finalUnmaskedScore, finalUnmaskedAttr.getHighLowSum(), 1E-5);
    // due to +1 in query
    assertTrue(finalUnmaskedAttr.high[1] > 1);
    assertTrue(finalUnmaskedAttr.getHighLowSum(0) > 0.8 * finalUnmaskedAttr.getHighLowSum(1));
// the attributions in dimension 0 continue to be reduced, but do not vanish
// or become small as in the other case; the gap is not a factor of 4
}
Also used : Random(java.util.Random) DiVector(com.amazon.randomcutforest.returntypes.DiVector) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData) Test(org.junit.jupiter.api.Test)

Example 8 with NormalMixtureTestData

use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.

the class RandomCutForestConsistencyFunctionalTest method testConsistentScoring.

@Test
public void testConsistentScoring() {
    RandomCutForest.Builder<?> builder = RandomCutForest.builder().dimensions(dimensions).sampleSize(sampleSize).randomSeed(randomSeed);
    RandomCutForest pointerCachedSequential = builder.compact(false).boundingBoxCacheFraction(1.0).parallelExecutionEnabled(false).build();
    RandomCutForest pointerCachedParallel = builder.compact(false).boundingBoxCacheFraction(1.0).parallelExecutionEnabled(true).build();
    RandomCutForest pointerCachedRandomSequential = builder.compact(false).boundingBoxCacheFraction(new Random().nextDouble()).parallelExecutionEnabled(false).build();
    RandomCutForest pointerCachedRandomParallel = builder.compact(false).boundingBoxCacheFraction(new Random().nextDouble()).parallelExecutionEnabled(true).build();
    RandomCutForest pointerUncachedSequential = builder.compact(false).boundingBoxCacheFraction(0.0).parallelExecutionEnabled(false).build();
    RandomCutForest pointerUncachedParallel = builder.compact(false).boundingBoxCacheFraction(0.0).parallelExecutionEnabled(true).build();
    RandomCutForest compactCachedSequential = builder.compact(true).boundingBoxCacheFraction(1.0).parallelExecutionEnabled(false).build();
    RandomCutForest compactCachedParallel = builder.compact(true).boundingBoxCacheFraction(1.0).parallelExecutionEnabled(true).build();
    RandomCutForest compactUncachedSequential = builder.compact(true).boundingBoxCacheFraction(0.0).parallelExecutionEnabled(false).build();
    RandomCutForest compactUncachedParallel = builder.compact(true).boundingBoxCacheFraction(0.0).parallelExecutionEnabled(true).build();
    RandomCutForest compactCachedRandomSequential = builder.compact(true).boundingBoxCacheFraction(new Random().nextDouble()).parallelExecutionEnabled(false).build();
    RandomCutForest compactCachedRandomParallel = builder.compact(true).boundingBoxCacheFraction(new Random().nextDouble()).parallelExecutionEnabled(true).build();
    NormalMixtureTestData testData = new NormalMixtureTestData();
    double delta = 1e-10;
    int anomalies = 0;
    for (double[] point : testData.generateTestData(testSize, dimensions, 99)) {
        double score = pointerCachedSequential.getAnomalyScore(point);
        if (score > 0) {
            anomalies++;
        }
        assertEquals(score, pointerCachedParallel.getAnomalyScore(point), delta);
        assertEquals(score, pointerUncachedSequential.getAnomalyScore(point), delta);
        assertEquals(score, pointerUncachedParallel.getAnomalyScore(point), delta);
        assertEquals(score, compactCachedSequential.getAnomalyScore(point), delta);
        assertEquals(score, compactCachedParallel.getAnomalyScore(point), delta);
        assertEquals(score, compactUncachedSequential.getAnomalyScore(point), delta);
        assertEquals(score, compactUncachedParallel.getAnomalyScore(point), delta);
        assertEquals(score, pointerCachedRandomSequential.getAnomalyScore(point), delta);
        assertEquals(score, pointerCachedRandomParallel.getAnomalyScore(point), delta);
        assertEquals(score, compactCachedRandomSequential.getAnomalyScore(point), delta);
        assertEquals(score, compactCachedRandomParallel.getAnomalyScore(point), delta);
        pointerCachedSequential.update(point);
        pointerCachedParallel.update(point);
        pointerUncachedSequential.update(point);
        pointerUncachedParallel.update(point);
        pointerCachedRandomSequential.update(point);
        pointerCachedRandomParallel.update(point);
        compactCachedSequential.update(point);
        compactCachedParallel.update(point);
        compactUncachedSequential.update(point);
        compactUncachedParallel.update(point);
        compactCachedRandomSequential.update(point);
        compactCachedRandomParallel.update(point);
    }
    // verify that the test is nontrivial
    assertTrue(anomalies > 0);
}
Also used : Random(java.util.Random) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData) Test(org.junit.jupiter.api.Test)

Example 9 with NormalMixtureTestData

use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.

the class RandomCutForestShingledFunctionalTest method oneTimeSetUp.

@BeforeAll
public static void oneTimeSetUp() {
    numberOfTrees = 100;
    sampleSize = 256;
    dimensions = 2;
    randomSeed = 123;
    shingleSize = 3;
    shingleBuilder = new ShingleBuilder(dimensions, shingleSize);
    forest = RandomCutForest.builder().numberOfTrees(numberOfTrees).sampleSize(sampleSize).dimensions(shingleBuilder.getShingledPointSize()).randomSeed(randomSeed).centerOfMassEnabled(true).storeSequenceIndexesEnabled(true).build();
    dataSize = 10_000;
    baseMu = 0.0;
    baseSigma = 1.0;
    anomalyMu = 5.0;
    anomalySigma = 1.5;
    transitionToAnomalyProbability = 0.01;
    transitionToBaseProbability = 0.4;
    NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
    double[][] data = generator.generateTestData(dataSize, dimensions);
    for (int i = 0; i < dataSize; i++) {
        shingleBuilder.addPoint(data[i]);
        if (shingleBuilder.isFull()) {
            forest.update(shingleBuilder.getShingle());
        }
    }
}
Also used : ShingleBuilder(com.amazon.randomcutforest.util.ShingleBuilder) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData) BeforeAll(org.junit.jupiter.api.BeforeAll)

Example 10 with NormalMixtureTestData

use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.

the class ConditionalFieldTest method SimpleTest.

@Test
public void SimpleTest() {
    int newDimensions = 30;
    randomSeed = 101;
    sampleSize = 256;
    RandomCutForest newForest = RandomCutForest.builder().numberOfTrees(100).sampleSize(sampleSize).dimensions(newDimensions).randomSeed(randomSeed).compact(true).boundingBoxCacheFraction(0.0).build();
    dataSize = 2000 + 5;
    baseMu = 0.0;
    baseSigma = 1.0;
    anomalyMu = 0.0;
    anomalySigma = 1.0;
    transitionToAnomalyProbability = 0.0;
    // ignoring anomaly cluster for now
    transitionToBaseProbability = 1.0;
    Random prg = new Random(0);
    NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
    double[][] data = generator.generateTestData(dataSize, newDimensions, 100);
    for (int i = 0; i < 2000; i++) {
        // shrink, shift at random
        for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
        if (prg.nextDouble() < 0.5)
            data[i][0] += 5.0;
        else
            data[i][0] -= 5.0;
        newForest.update(data[i]);
    }
    float[] queryOne = new float[newDimensions];
    float[] queryTwo = new float[newDimensions];
    queryTwo[1] = 1;
    ConditionalSampleSummary summary = newForest.getConditionalFieldSummary(queryOne, 1, new int[] { 0 }, 1);
    assert (summary.summaryPoints.length == 2);
    assert (summary.relativeLikelihood.length == 2);
    assert (Math.abs(summary.summaryPoints[0][0] - 5.0) < 0.01 || Math.abs(summary.summaryPoints[0][0] + 5.0) < 0.01);
    assert (Math.abs(summary.summaryPoints[1][0] - 5.0) < 0.01 || Math.abs(summary.summaryPoints[1][0] + 5.0) < 0.01);
    assert (summary.relativeLikelihood[0] > 0.25);
    assert (summary.relativeLikelihood[1] > 0.25);
    summary = newForest.getConditionalFieldSummary(queryTwo, 1, new int[] { 0 }, 1);
    assert (summary.summaryPoints.length == 2);
    assert (summary.relativeLikelihood.length == 2);
    assertEquals(summary.summaryPoints[0][1], 1, 1e-6);
    assertEquals(summary.summaryPoints[1][1], 1, 1e-6);
    assert (Math.abs(summary.summaryPoints[0][0] - 5.0) < 0.01 || Math.abs(summary.summaryPoints[0][0] + 5.0) < 0.01);
    assert (Math.abs(summary.summaryPoints[1][0] - 5.0) < 0.01 || Math.abs(summary.summaryPoints[1][0] + 5.0) < 0.01);
    assert (summary.relativeLikelihood[0] > 0.25);
    assert (summary.relativeLikelihood[1] > 0.25);
}
Also used : ConditionalSampleSummary(com.amazon.randomcutforest.returntypes.ConditionalSampleSummary) Random(java.util.Random) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData) Test(org.junit.jupiter.api.Test)

Aggregations

NormalMixtureTestData (com.amazon.randomcutforest.testutils.NormalMixtureTestData)20 Precision (com.amazon.randomcutforest.config.Precision)8 RandomCutForest (com.amazon.randomcutforest.RandomCutForest)7 Random (java.util.Random)7 Test (org.junit.jupiter.api.Test)6 RandomCutForestMapper (com.amazon.randomcutforest.state.RandomCutForestMapper)5 DiVector (com.amazon.randomcutforest.returntypes.DiVector)4 RandomCutForestState (com.amazon.randomcutforest.state.RandomCutForestState)4 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)4 BeforeAll (org.junit.jupiter.api.BeforeAll)3 AnomalyDescriptor (com.amazon.randomcutforest.parkservices.AnomalyDescriptor)2 ThresholdedRandomCutForest (com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)2 MultiDimDataWithKey (com.amazon.randomcutforest.testutils.MultiDimDataWithKey)2 LinkedBuffer (io.protostuff.LinkedBuffer)2 ArgumentsSource (org.junit.jupiter.params.provider.ArgumentsSource)2 ConditionalSampleSummary (com.amazon.randomcutforest.returntypes.ConditionalSampleSummary)1 CompactSampler (com.amazon.randomcutforest.sampler.CompactSampler)1 ShingleBuilder (com.amazon.randomcutforest.util.ShingleBuilder)1 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 Instant (java.time.Instant)1