Search in sources :

Example 11 with NormalMixtureTestData

use of in project random-cut-forest-by-aws by aws.

the class RandomCutForestFunctionalTest method testSideEffectsB.

public void testSideEffectsB(RandomCutForest forest) {
    /* the changes to score and attribution should be in sync */
    DiVector initial = forest.getAnomalyAttribution(new double[] { 0.0, 0.0, 0.0 });
    NormalMixtureTestData generator2 = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
    double[][] newData = generator2.generateTestData(dataSize, dimensions);
    for (int i = 0; i < dataSize; i++) {
    double newScore = forest.getAnomalyScore(new double[] { 0.0, 0.0, 0.0 });
    DiVector newVector = forest.getAnomalyAttribution(new double[] { 0.0, 0.0, 0.0 });
    assertEquals(initial.getHighLowSum(), newVector.getHighLowSum(), 10E-10);
    assertEquals(initial.getHighLowSum(), newScore, 1E-10);
    assertArrayEquals(initial.high, newVector.high, 1E-10);
    assertArrayEquals(initial.low, newVector.low, 1E-10);
Also used : DiVector( NormalMixtureTestData( ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) ArgumentsSource(org.junit.jupiter.params.provider.ArgumentsSource)

Example 12 with NormalMixtureTestData

use of in project random-cut-forest-by-aws by aws.

the class RandomCutForestFunctionalTest method testShadowBuffer.

public void testShadowBuffer() {
     * This test checks that the attribution *DOES NOT* change as a ratio as more
     * copies of the points are added. The shadowbox in
     * the @DirectionalAttributionVisitor allows us to simulate a deletion without
     * performing a deletion.
     * The goal is to measure the attribution and have many copies of the same point
     * and eventually the attribution will become uniform in all directions.
     * we create a new forest so that other tests are unaffected.
    numberOfTrees = 100;
    sampleSize = 256;
    dimensions = 3;
    randomSeed = 123;
    RandomCutForest newForest = RandomCutForest.builder().numberOfTrees(numberOfTrees).sampleSize(sampleSize).dimensions(dimensions).randomSeed(randomSeed).centerOfMassEnabled(true).timeDecay(1e-5).storeSequenceIndexesEnabled(true).build();
    dataSize = 10_000;
    baseMu = 0.0;
    baseSigma = 1.0;
    anomalyMu = 5.0;
    anomalySigma = 1.5;
    transitionToAnomalyProbability = 0.01;
    transitionToBaseProbability = 0.4;
    NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
    double[][] data = generator.generateTestData(dataSize, dimensions);
    for (int i = 0; i < dataSize; i++) {
    double[] point = new double[] { -8.0, -8.0, 0.0 };
    DiVector result = newForest.getAnomalyAttribution(point);
    double score = newForest.getAnomalyScore(point);
    assertEquals(score, result.getHighLowSum(), 1E-5);
    assertTrue(score > 2);
    assertTrue(result.getHighLowSum(2) < 0.2);
    // 256/10_000
    for (int i = 0; i < 5; i++) {
    DiVector newResult = newForest.getAnomalyAttribution(point);
    double newScore = newForest.getAnomalyScore(point);
    assertEquals(newScore, newResult.getHighLowSum(), 1E-5);
    assertTrue(newScore < score);
    for (int j = 0; j < 3; j++) {
        // relationship holds at larger values
        if (result.high[j] > 0.2) {
            assertEquals(score * newResult.high[j], newScore * result.high[j], 0.1 * score);
        } else {
            assertTrue(newResult.high[j] < 0.2);
        if (result.low[j] > 0.2) {
            assertEquals(score * newResult.low[j], newScore * result.low[j], 0.1 * score);
        } else {
            assertTrue(newResult.low[j] < 0.2);
    // this will make the point an inlier
    for (int i = 0; i < 5000; i++) {
    DiVector finalResult = newForest.getAnomalyAttribution(point);
    double finalScore = newForest.getAnomalyScore(point);
    assertTrue(finalScore < 1);
    assertEquals(finalScore, finalResult.getHighLowSum(), 1E-5);
    for (int j = 0; j < 3; j++) {
        // relationship holds at larger values
        if (finalResult.high[j] > 0.2) {
            assertEquals(score * finalResult.high[j], finalScore * result.high[j], 0.1 * score);
        } else {
            assertTrue(newResult.high[j] < 0.2);
        if (finalResult.low[j] > 0.2) {
            assertEquals(score * finalResult.low[j], finalScore * result.low[j], 0.1 * score);
        } else {
            assertTrue(finalResult.low[j] < 0.2);
Also used : DiVector( NormalMixtureTestData( Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 13 with NormalMixtureTestData

use of in project random-cut-forest-by-aws by aws.

the class AttributionExamplesFunctionalTest method RRCFattributionTest.

public void RRCFattributionTest() {
    // starts with the same setup as rrcfTest; data corresponds to two small
    // clusters at x=+/-5.0
    // queries q_1=(0,0,0, ..., 0)
    // inserts updates (0,1,0, ..., 0) a few times
    // queries q_2=(0,1,0, ..., 0)
    // attribution of q_2 is now affected by q_1 (which is still an anomaly)
    int newDimensions = 30;
    randomSeed = 101;
    sampleSize = 256;
    RandomCutForest newForest = RandomCutForest.builder().numberOfTrees(100).sampleSize(sampleSize).dimensions(newDimensions).randomSeed(randomSeed).compact(true).boundingBoxCacheFraction(0.0).build();
    dataSize = 2000 + 5;
    baseMu = 0.0;
    baseSigma = 1.0;
    anomalyMu = 0.0;
    anomalySigma = 1.0;
    transitionToAnomalyProbability = 0.0;
    // ignoring anomaly cluster for now
    transitionToBaseProbability = 1.0;
    Random prg = new Random(0);
    NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
    double[][] data = generator.generateTestData(dataSize, newDimensions, 100);
    for (int i = 0; i < 2000; i++) {
        // shrink, shift at random
        for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
        if (prg.nextDouble() < 0.5)
            data[i][0] += 5.0;
            data[i][0] -= 5.0;
    double[] queryOne = new double[newDimensions];
    double[] queryTwo = new double[newDimensions];
    queryTwo[1] = 1;
    double originalScoreTwo = newForest.getAnomalyScore(queryTwo);
    DiVector originalAttrTwo = newForest.getAnomalyAttribution(queryTwo);
    assertTrue(originalScoreTwo > 3.0);
    assertEquals(originalScoreTwo, originalAttrTwo.getHighLowSum(), 1E-5);
    // due to -5 cluster
    assertTrue(originalAttrTwo.high[0] > 1.0);
    // due to +5 cluster
    assertTrue(originalAttrTwo.low[0] > 1.0);
    // due to +1 in query
    assertTrue(originalAttrTwo.high[1] > 1);
    assertTrue(originalAttrTwo.getHighLowSum(0) > 1.1 * originalAttrTwo.getHighLowSum(1));
    // we insert queryOne a few times to make sure it is sampled
    for (int i = 2000; i < 2000 + 5; i++) {
        double score = newForest.getAnomalyScore(queryOne);
        double score2 = newForest.getAnomalyScore(queryTwo);
        DiVector attr2 = newForest.getAnomalyAttribution(queryTwo);
        // verify
        assertTrue(score > 2.0);
        assertTrue(score2 > 2.0);
        assertEquals(attr2.getHighLowSum(), score2, 1E-5);
        for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
    // 5 different anomalous points
    double midScoreTwo = newForest.getAnomalyScore(queryTwo);
    DiVector midAttrTwo = newForest.getAnomalyAttribution(queryTwo);
    assertTrue(midScoreTwo > 2.4);
    assertEquals(midScoreTwo, midAttrTwo.getHighLowSum(), 1E-5);
    // due to -5 cluster !!!
    assertTrue(midAttrTwo.high[0] < 1);
    // due to +5 cluster !!!
    assertTrue(midAttrTwo.low[0] < 1);
    // due to +1 in query
    assertTrue(midAttrTwo.high[1] > 1);
    assertTrue(midAttrTwo.getHighLowSum(0) < 1.1 * midAttrTwo.high[1]);
    // a few more updates, which are identical
    for (int i = 2005; i < 2010; i++) {
    double finalScoreTwo = newForest.getAnomalyScore(queryTwo);
    DiVector finalAttrTwo = newForest.getAnomalyAttribution(queryTwo);
    assertTrue(finalScoreTwo > 2.4);
    assertEquals(finalScoreTwo, finalAttrTwo.getHighLowSum(), 1E-5);
    // due to -5 cluster !!!
    assertTrue(finalAttrTwo.high[0] < 0.5);
    // due to +5 cluster !!!
    assertTrue(finalAttrTwo.low[0] < 0.5);
    // due to +1 in query
    assertTrue(finalAttrTwo.high[1] > 1);
    assertTrue(2.5 * finalAttrTwo.getHighLowSum(0) < finalAttrTwo.high[1]);
// the drop in high[0] and low[0] is steep and the attribution has shifted
Also used : Random(java.util.Random) DiVector( NormalMixtureTestData( Test(org.junit.jupiter.api.Test)

Example 14 with NormalMixtureTestData

use of in project random-cut-forest-by-aws by aws.

the class RandomCutForestConsistencyFunctionalTest method testConsistentScoringSinglePrecision.

public void testConsistentScoringSinglePrecision() {
    RandomCutForest.Builder<?> builder = RandomCutForest.builder().dimensions(dimensions).sampleSize(sampleSize).randomSeed(randomSeed).parallelExecutionEnabled(false).compact(true);
    RandomCutForest compactFloatCached = builder.boundingBoxCacheFraction(1.0).precision(Precision.FLOAT_32).build();
    RandomCutForest compactFloatCachedParallel = builder.boundingBoxCacheFraction(1.0).precision(Precision.FLOAT_32).parallelExecutionEnabled(true).build();
    RandomCutForest compactFloatUncached = builder.boundingBoxCacheFraction(0.0).precision(Precision.FLOAT_32).build();
    RandomCutForest compactFloatCachedRandom = builder.boundingBoxCacheFraction(new Random().nextDouble()).precision(Precision.FLOAT_32).build();
    RandomCutForest compactFloatCachedRandomParallel = builder.boundingBoxCacheFraction(new Random().nextDouble()).precision(Precision.FLOAT_32).parallelExecutionEnabled(true).build();
    RandomCutForest compactFloatUncachedParallel = builder.boundingBoxCacheFraction(0.0).precision(Precision.FLOAT_32).parallelExecutionEnabled(true).build();
    RandomCutForest compactDoubleCached = builder.boundingBoxCacheFraction(1.0).precision(Precision.FLOAT_64).build();
    NormalMixtureTestData testData = new NormalMixtureTestData();
    int anomalies = 0;
    for (double[] point : testData.generateTestData(testSize, dimensions, 99)) {
        double score = compactFloatCached.getAnomalyScore(point);
        if (score > 0) {
        assertEquals(score, compactFloatUncached.getAnomalyScore(point), 1e-10);
        assertEquals(score, compactFloatUncachedParallel.getAnomalyScore(point), 1e-10);
        assertEquals(score, compactFloatCachedRandom.getAnomalyScore(point), 1e-10);
        assertEquals(score, compactFloatCachedRandomParallel.getAnomalyScore(point), 1e-10);
        // we expect some loss of precision when comparing to the score computed as a
        // double
        assertEquals(score, compactDoubleCached.getAnomalyScore(point), 1e-2);
    // verify that the test is nontrivial
    assertTrue(anomalies > 0);
Also used : Random(java.util.Random) NormalMixtureTestData( Test(org.junit.jupiter.api.Test)

Example 15 with NormalMixtureTestData

use of in project random-cut-forest-by-aws by aws.

the class RandomCutForestFunctionalTest method testSideEffectsA.

public void testSideEffectsA(RandomCutForest forest) {
    double score = forest.getAnomalyScore(new double[] { 0.0, 0.0, 0.0 });
    NormalMixtureTestData generator2 = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
    double[][] newData = generator2.generateTestData(dataSize, dimensions);
    for (int i = 0; i < dataSize; i++) {
    double newScore = forest.getAnomalyScore(new double[] { 0.0, 0.0, 0.0 });
    assertEquals(score, newScore, 10E-10);
Also used : NormalMixtureTestData( ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) ArgumentsSource(org.junit.jupiter.params.provider.ArgumentsSource)


NormalMixtureTestData ( Precision ( RandomCutForest ( Random (java.util.Random)7 Test (org.junit.jupiter.api.Test)6 RandomCutForestMapper ( DiVector ( RandomCutForestState ( ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)4 BeforeAll (org.junit.jupiter.api.BeforeAll)3 AnomalyDescriptor ( ThresholdedRandomCutForest ( MultiDimDataWithKey ( LinkedBuffer (io.protostuff.LinkedBuffer)2 ArgumentsSource (org.junit.jupiter.params.provider.ArgumentsSource)2 ConditionalSampleSummary ( CompactSampler ( ShingleBuilder ( ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 Instant (java.time.Instant)1