Search in sources :

Example 11 with DiVector

use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.

the class RandomCutForestShingledBenchmark method attributionAndUpdate.

@Benchmark
@OperationsPerInvocation(DATA_SIZE)
public RandomCutForest attributionAndUpdate(BenchmarkState state, Blackhole blackhole) {
    double[][] data = state.data;
    forest = state.forest;
    DiVector vector = new DiVector(forest.getDimensions());
    for (int i = INITIAL_DATA_SIZE; i < data.length; i++) {
        vector = forest.getAnomalyAttribution(data[i]);
        forest.update(data[i]);
    }
    blackhole.consume(vector);
    return forest;
}
Also used : DiVector(com.amazon.randomcutforest.returntypes.DiVector) Benchmark(org.openjdk.jmh.annotations.Benchmark) OperationsPerInvocation(org.openjdk.jmh.annotations.OperationsPerInvocation)

Example 12 with DiVector

use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.

the class AbstractAttributionVisitor method getResult.

/**
 * Take the normalization function applied to the corresponding scoring visitor
 * and apply that to each coordinate of the DiVector to modify the data in
 * place. The function has to be associative in its first parameter; that is, fn
 * (x1, y) + fn (x2, y) = fn (x1 + x2, y)
 *
 * @return The modified data.
 */
@Override
public DiVector getResult() {
    DiVector result = new DiVector(directionalAttribution);
    result.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, treeMass));
    return result;
}
Also used : DiVector(com.amazon.randomcutforest.returntypes.DiVector)

Example 13 with DiVector

use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.

the class AttributionExamplesFunctionalTest method RRCFattributionTest.

@Test
public void RRCFattributionTest() {
    // starts with the same setup as rrcfTest; data corresponds to two small
    // clusters at x=+/-5.0
    // queries q_1=(0,0,0, ..., 0)
    // inserts updates (0,1,0, ..., 0) a few times
    // queries q_2=(0,1,0, ..., 0)
    // attribution of q_2 is now affected by q_1 (which is still an anomaly)
    int newDimensions = 30;
    randomSeed = 101;
    sampleSize = 256;
    RandomCutForest newForest = RandomCutForest.builder().numberOfTrees(100).sampleSize(sampleSize).dimensions(newDimensions).randomSeed(randomSeed).compact(true).boundingBoxCacheFraction(0.0).build();
    dataSize = 2000 + 5;
    baseMu = 0.0;
    baseSigma = 1.0;
    anomalyMu = 0.0;
    anomalySigma = 1.0;
    transitionToAnomalyProbability = 0.0;
    // ignoring anomaly cluster for now
    transitionToBaseProbability = 1.0;
    Random prg = new Random(0);
    NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
    double[][] data = generator.generateTestData(dataSize, newDimensions, 100);
    for (int i = 0; i < 2000; i++) {
        // shrink, shift at random
        for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
        if (prg.nextDouble() < 0.5)
            data[i][0] += 5.0;
        else
            data[i][0] -= 5.0;
        newForest.update(data[i]);
    }
    double[] queryOne = new double[newDimensions];
    double[] queryTwo = new double[newDimensions];
    queryTwo[1] = 1;
    double originalScoreTwo = newForest.getAnomalyScore(queryTwo);
    DiVector originalAttrTwo = newForest.getAnomalyAttribution(queryTwo);
    assertTrue(originalScoreTwo > 3.0);
    assertEquals(originalScoreTwo, originalAttrTwo.getHighLowSum(), 1E-5);
    // due to -5 cluster
    assertTrue(originalAttrTwo.high[0] > 1.0);
    // due to +5 cluster
    assertTrue(originalAttrTwo.low[0] > 1.0);
    // due to +1 in query
    assertTrue(originalAttrTwo.high[1] > 1);
    assertTrue(originalAttrTwo.getHighLowSum(0) > 1.1 * originalAttrTwo.getHighLowSum(1));
    // we insert queryOne a few times to make sure it is sampled
    for (int i = 2000; i < 2000 + 5; i++) {
        double score = newForest.getAnomalyScore(queryOne);
        double score2 = newForest.getAnomalyScore(queryTwo);
        DiVector attr2 = newForest.getAnomalyAttribution(queryTwo);
        // verify
        assertTrue(score > 2.0);
        assertTrue(score2 > 2.0);
        assertEquals(attr2.getHighLowSum(), score2, 1E-5);
        for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
        newForest.update(data[i]);
    // 5 different anomalous points
    }
    double midScoreTwo = newForest.getAnomalyScore(queryTwo);
    DiVector midAttrTwo = newForest.getAnomalyAttribution(queryTwo);
    assertTrue(midScoreTwo > 2.4);
    assertEquals(midScoreTwo, midAttrTwo.getHighLowSum(), 1E-5);
    // due to -5 cluster !!!
    assertTrue(midAttrTwo.high[0] < 1);
    // due to +5 cluster !!!
    assertTrue(midAttrTwo.low[0] < 1);
    // due to +1 in query
    assertTrue(midAttrTwo.high[1] > 1);
    assertTrue(midAttrTwo.getHighLowSum(0) < 1.1 * midAttrTwo.high[1]);
    // a few more updates, which are identical
    for (int i = 2005; i < 2010; i++) {
        newForest.update(queryOne);
    }
    double finalScoreTwo = newForest.getAnomalyScore(queryTwo);
    DiVector finalAttrTwo = newForest.getAnomalyAttribution(queryTwo);
    assertTrue(finalScoreTwo > 2.4);
    assertEquals(finalScoreTwo, finalAttrTwo.getHighLowSum(), 1E-5);
    // due to -5 cluster !!!
    assertTrue(finalAttrTwo.high[0] < 0.5);
    // due to +5 cluster !!!
    assertTrue(finalAttrTwo.low[0] < 0.5);
    // due to +1 in query
    assertTrue(finalAttrTwo.high[1] > 1);
    assertTrue(2.5 * finalAttrTwo.getHighLowSum(0) < finalAttrTwo.high[1]);
// the drop in high[0] and low[0] is steep and the attribution has shifted
}
Also used : Random(java.util.Random) DiVector(com.amazon.randomcutforest.returntypes.DiVector) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData) Test(org.junit.jupiter.api.Test)

Example 14 with DiVector

use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.

the class RandomCutForestFunctionalTest method testGetAnomalyAttribution.

@ParameterizedTest
@ArgumentsSource(TestForestProvider.class)
public void testGetAnomalyAttribution(RandomCutForest forest) {
    /* This method checks that the scores and attributions are consistent */
    double[] point = { 0.0, 0.0, 0.0 };
    DiVector seenResult = forest.getAnomalyAttribution(point);
    double seenScore = forest.getAnomalyScore(point);
    assertTrue(seenResult.getHighLowSum(0) < 0.5);
    assertTrue(seenResult.getHighLowSum(1) < 0.5);
    assertTrue(seenResult.getHighLowSum(2) < 0.5);
    assertTrue(seenScore < 1.0);
    assertEquals(seenScore, seenResult.getHighLowSum(), 1E-10);
    DiVector likelyResult = forest.getApproximateAnomalyAttribution(point);
    double score = forest.getApproximateAnomalyScore(point);
    assertTrue(likelyResult.getHighLowSum(0) < 0.5);
    assertTrue(likelyResult.getHighLowSum(1) < 0.5);
    assertTrue(likelyResult.getHighLowSum(2) < 0.5);
    assertEquals(score, likelyResult.getHighLowSum(), 0.1);
    assertEquals(seenResult.getHighLowSum(), likelyResult.getHighLowSum(), 0.1);
}
Also used : DiVector(com.amazon.randomcutforest.returntypes.DiVector) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) ArgumentsSource(org.junit.jupiter.params.provider.ArgumentsSource)

Example 15 with DiVector

use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.

the class RandomCutForestFunctionalTest method testMultipleAttributions.

@ParameterizedTest
@ArgumentsSource(TestForestProvider.class)
public void testMultipleAttributions(RandomCutForest forest) {
    /**
     * We will test the attribution over random runs. Narrow tests can fail -- we
     * will keep track of the aggregate number of narrow tests and test for large
     * characterization that would be misleading in failure.
     */
    int hardPass = 0;
    int causal = 0;
    double[] point = { 6.0, 0.0, 0.0 };
    DiVector result = forest.getAnomalyAttribution(point);
    assertTrue(result.low[0] < 0.2);
    if (result.getHighLowSum(1) < 0.5)
        ++hardPass;
    if (result.getHighLowSum(2) < 0.5)
        ++hardPass;
    assertTrue(result.getHighLowSum(1) + result.getHighLowSum(2) < 1.0);
    assertTrue(result.high[0] > forest.getAnomalyScore(point) / 3);
    if (result.high[0] > 0.5 * forest.getAnomalyScore(point))
        ++causal;
    // the last line states that first coordinate was high and was a majority
    // contributor to the score
    // the previous test states that the contribution is twice the average of the 12
    // possible contributors.
    // these tests all subparts of the score at once
    point = new double[] { -6.0, 0.0, 0.0 };
    result = forest.getAnomalyAttribution(point);
    assertTrue(result.getHighLowSum() > 1.0);
    assertTrue(result.high[0] < 0.5);
    if (result.getHighLowSum(1) < 0.5)
        ++hardPass;
    if (result.getHighLowSum(2) < 0.5)
        ++hardPass;
    assertTrue(result.low[0] > forest.getAnomalyScore(point) / 3);
    if (result.low[0] > 0.5 * forest.getAnomalyScore(point))
        ++causal;
    point = new double[] { 0.0, 6.0, 0.0 };
    assertTrue(result.getHighLowSum() > 1.0);
    result = forest.getAnomalyAttribution(point);
    if (result.getHighLowSum(0) < 0.5)
        ++hardPass;
    if (result.getHighLowSum(2) < 0.5)
        ++hardPass;
    assertTrue(result.low[1] < 0.5);
    assertTrue(result.high[1] > forest.getAnomalyScore(point) / 3);
    if (result.high[1] > 0.5 * forest.getAnomalyScore(point))
        ++causal;
    point = new double[] { 0.0, -6.0, 0.0 };
    assertTrue(result.getHighLowSum() > 1.0);
    result = forest.getAnomalyAttribution(point);
    if (result.getHighLowSum(0) < 0.5)
        ++hardPass;
    if (result.getHighLowSum(2) < 0.5)
        ++hardPass;
    assertTrue(result.high[1] < 0.5);
    assertTrue(result.low[1] > forest.getAnomalyScore(point) / 3);
    if (result.low[1] > 0.5 * forest.getAnomalyScore(point))
        ++causal;
    point = new double[] { 0.0, 0.0, 6.0 };
    assertTrue(result.getHighLowSum() > 1.0);
    result = forest.getAnomalyAttribution(point);
    if (result.getHighLowSum(0) < 0.5)
        ++hardPass;
    if (result.getHighLowSum(1) < 0.5)
        ++hardPass;
    assertTrue(result.low[2] < 0.5);
    assertTrue(result.high[2] > forest.getAnomalyScore(point) / 3);
    if (result.high[2] > 0.5 * forest.getAnomalyScore(point))
        ++causal;
    point = new double[] { 0.0, 0.0, -6.0 };
    assertTrue(result.getHighLowSum() > 1.0);
    result = forest.getAnomalyAttribution(point);
    if (result.getHighLowSum(0) < 0.5)
        ++hardPass;
    if (result.getHighLowSum(1) < 0.5)
        ++hardPass;
    assertTrue(result.high[2] < 0.5);
    assertTrue(result.low[2] > forest.getAnomalyScore(point) / 3);
    if (result.low[2] > 0.5 * forest.getAnomalyScore(point))
        ++causal;
    // maximum is 6; there can be skew in one direction
    assertTrue(causal >= 5);
    point = new double[] { -3.0, 0.0, 0.0 };
    result = forest.getAnomalyAttribution(point);
    assertTrue(result.high[0] < 0.5);
    if (result.getHighLowSum(1) < 0.5)
        ++hardPass;
    if (result.getHighLowSum(2) < 0.5)
        ++hardPass;
    assertTrue(result.low[0] > forest.getAnomalyScore(point) / 3);
    /*
         * For multiple causes, the relationship of scores only hold for larger
         * distances.
         */
    point = new double[] { -3.0, 6.0, 0.0 };
    assertTrue(result.getHighLowSum() > 1.0);
    result = forest.getAnomalyAttribution(point);
    if (result.low[0] > 0.5)
        ++hardPass;
    assertTrue(result.high[0] < 0.5);
    assertTrue(result.low[1] < 0.5);
    assertTrue(result.high[1] > 0.5);
    if (result.high[1] > 0.9)
        ++hardPass;
    assertTrue(result.getHighLowSum(2) < 0.5);
    assertTrue(result.high[1] + result.low[0] > 0.8 * forest.getAnomalyScore(point));
    point = new double[] { 6.0, -3.0, 0.0 };
    assertTrue(result.getHighLowSum() > 1.0);
    result = forest.getAnomalyAttribution(point);
    assertTrue(result.low[0] < 0.5);
    assertTrue(result.high[0] > 0.5);
    if (result.high[0] > 0.9)
        ++hardPass;
    if (result.low[1] > 0.5)
        ++hardPass;
    assertTrue(result.high[1] < 0.5);
    assertTrue(result.getHighLowSum(2) < 0.5);
    assertTrue(result.high[0] + result.low[1] > 0.8 * forest.getAnomalyScore(point));
    point = new double[] { 20.0, -10.0, 0.0 };
    assertTrue(result.getHighLowSum() > 1.0);
    result = forest.getAnomalyAttribution(point);
    assertTrue(result.high[0] + result.low[1] > 0.8 * forest.getAnomalyScore(point));
    if (result.high[0] > 1.8 * result.low[1])
        ++hardPass;
    if (result.low[1] > result.high[0] / 2.2)
        ++hardPass;
    // maximum is 20
    assertTrue(hardPass >= 15);
}
Also used : DiVector(com.amazon.randomcutforest.returntypes.DiVector) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest) ArgumentsSource(org.junit.jupiter.params.provider.ArgumentsSource)

Aggregations

DiVector (com.amazon.randomcutforest.returntypes.DiVector)24 Test (org.junit.jupiter.api.Test)11 SamplerPlusTree (com.amazon.randomcutforest.executor.SamplerPlusTree)6 Random (java.util.Random)6 OneSidedConvergingDiVectorAccumulator (com.amazon.randomcutforest.returntypes.OneSidedConvergingDiVectorAccumulator)5 CommonUtils.checkArgument (com.amazon.randomcutforest.CommonUtils.checkArgument)4 CommonUtils.checkNotNull (com.amazon.randomcutforest.CommonUtils.checkNotNull)4 CommonUtils.toDoubleArray (com.amazon.randomcutforest.CommonUtils.toDoubleArray)4 CommonUtils.toFloatArray (com.amazon.randomcutforest.CommonUtils.toFloatArray)4 AnomalyAttributionVisitor (com.amazon.randomcutforest.anomalydetection.AnomalyAttributionVisitor)4 AnomalyScoreVisitor (com.amazon.randomcutforest.anomalydetection.AnomalyScoreVisitor)4 DynamicAttributionVisitor (com.amazon.randomcutforest.anomalydetection.DynamicAttributionVisitor)4 DynamicScoreVisitor (com.amazon.randomcutforest.anomalydetection.DynamicScoreVisitor)4 SimulatedTransductiveScalarScoreVisitor (com.amazon.randomcutforest.anomalydetection.SimulatedTransductiveScalarScoreVisitor)4 Config (com.amazon.randomcutforest.config.Config)4 Precision (com.amazon.randomcutforest.config.Precision)4 AbstractForestTraversalExecutor (com.amazon.randomcutforest.executor.AbstractForestTraversalExecutor)4 AbstractForestUpdateExecutor (com.amazon.randomcutforest.executor.AbstractForestUpdateExecutor)4 IStateCoordinator (com.amazon.randomcutforest.executor.IStateCoordinator)4 ParallelForestTraversalExecutor (com.amazon.randomcutforest.executor.ParallelForestTraversalExecutor)4