Search in sources :

Example 1 with DiVector

use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.

the class PredictorCorrector method trigger.

/**
 * in a high score region with a previous anomalies, we use this to determine if
 * the "residual contribution" since the last anomaly would have sufficed to
 * trigger anomaly designation on its own.
 *
 * @param candidate                  attribution of the current point in
 *                                   consideration
 * @param gap                        how long ago did the previous anomaly occur
 * @param baseDimension              number of input attributes/variables
 *                                   (before shingling)
 * @param ideal                      a form of expected attribution; can be null
 *                                   if there was no previous anomaly in the
 *                                   shingle
 * @param previousIsPotentialAnomaly is the previous point a potential anomaly
 * @return true/false if the residual (extrapolated) score would trigger anomaly
 *         designation
 */
protected boolean trigger(DiVector candidate, int gap, int baseDimension, DiVector ideal, boolean previousIsPotentialAnomaly, IRCFComputeDescriptor lastAnomalyDescriptor) {
    DiVector lastAnomalyAttribution = lastAnomalyDescriptor.getAttribution();
    double lastAnomalyScore = lastAnomalyDescriptor.getRCFScore();
    if (lastAnomalyAttribution == null) {
        return true;
    }
    checkArgument(lastAnomalyAttribution.getDimensions() == candidate.getDimensions(), " error in DiVectors");
    int dimensions = candidate.getDimensions();
    int difference = baseDimension * gap;
    if (difference < dimensions) {
        if (ideal == null) {
            double remainder = 0;
            for (int i = dimensions - difference; i < dimensions; i++) {
                remainder += candidate.getHighLowSum(i);
            }
            return thresholder.getAnomalyGrade(remainder * dimensions / difference, previousIsPotentialAnomaly, triggerFactor) > 0;
        } else {
            double differentialRemainder = 0;
            for (int i = dimensions - difference; i < dimensions; i++) {
                differentialRemainder += Math.abs(candidate.low[i] - ideal.low[i]) + Math.abs(candidate.high[i] - ideal.high[i]);
            }
            return (differentialRemainder > ignoreSimilarFactor * lastAnomalyScore) && thresholder.getAnomalyGrade(differentialRemainder * dimensions / difference, previousIsPotentialAnomaly, triggerFactor) > 0;
        }
    } else {
        if (!ignoreSimilar) {
            return true;
        }
        double sum = 0;
        for (int i = 0; i < dimensions; i++) {
            sum += Math.abs(lastAnomalyAttribution.high[i] - candidate.high[i]) + Math.abs(lastAnomalyAttribution.low[i] - candidate.low[i]);
        }
        return (sum > ignoreSimilarFactor * lastAnomalyScore);
    }
}
Also used : DiVector(com.amazon.randomcutforest.returntypes.DiVector)

Example 2 with DiVector

use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.

the class AttributionExamplesFunctionalTest method attributionUnMaskingTest.

@Test
public void attributionUnMaskingTest() {
    // starts with the same setup as rrcfTest; data corresponds to two small
    // clusters at x=+/-5.0
    // queries q_1=(0,0,0, ..., 0)
    // inserts updates (0,1,0, ..., 0) a few times
    // queries q_2=(0,1,0, ..., 0)
    // attribution of q_2 is now affected by q_1 (which is still an anomaly)
    int newDimensions = 30;
    randomSeed = 179;
    sampleSize = 256;
    RandomCutForest newForest = RandomCutForest.builder().numberOfTrees(100).sampleSize(sampleSize).dimensions(newDimensions).randomSeed(randomSeed).compact(true).boundingBoxCacheFraction(new Random().nextDouble()).timeDecay(1e-5).build();
    dataSize = 2000 + 5;
    baseMu = 0.0;
    baseSigma = 1.0;
    anomalyMu = 0.0;
    anomalySigma = 1.5;
    transitionToAnomalyProbability = 0.0;
    // ignoring anomaly cluster for now
    transitionToBaseProbability = 1.0;
    Random prg = new Random(0);
    NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
    double[][] data = generator.generateTestData(dataSize, newDimensions, 100);
    for (int i = 0; i < 2000; i++) {
        // shrink, shift at random
        for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
        if (prg.nextDouble() < 0.5)
            data[i][0] += 5.0;
        else
            data[i][0] -= 5.0;
        newForest.update(data[i]);
    }
    float[] queryOne = new float[30];
    float[] queryTwo = new float[30];
    queryTwo[1] = 1;
    double originalScoreTwo = newForest.getAnomalyScore(queryTwo);
    // testing approximation with precision 0 (no approximation)
    DiVector originalAttrTwo = newForest.getApproximateDynamicAttribution(queryTwo, 0, true, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    originalAttrTwo.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
    assertTrue(originalScoreTwo > 3.0);
    assertEquals(originalScoreTwo, originalAttrTwo.getHighLowSum(), 1E-5);
    // due to -5 cluster
    assertTrue(originalAttrTwo.high[0] > 0.75);
    // due to +5 cluster
    assertTrue(originalAttrTwo.low[0] > 0.75);
    // due to +1 in query
    assertTrue(originalAttrTwo.high[1] > 1);
    assertTrue(originalAttrTwo.getHighLowSum(0) > originalAttrTwo.getHighLowSum(1));
    double apx = newForest.getApproximateDynamicScore(queryTwo, 0.1, true, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    assertEquals(originalScoreTwo, CommonUtils.defaultScalarNormalizerFunction(apx, sampleSize), 0.2);
    assertEquals(apx, newForest.getApproximateDynamicAttribution(queryTwo, 0.1, true, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction).getHighLowSum(), 1e-5);
    // we insert queryOne a few times to make sure it is sampled
    for (int i = 2000; i < 2000 + 5; i++) {
        double score = newForest.getAnomalyScore(queryOne);
        double score2 = newForest.getAnomalyScore(queryTwo);
        DiVector attr2 = newForest.getDynamicAttribution(queryTwo, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
        attr2.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
        double score3 = newForest.getDynamicScore(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
        score3 = CommonUtils.defaultScalarNormalizerFunction(score3, sampleSize);
        DiVector attr3 = newForest.getDynamicAttribution(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
        attr3.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
        // verify
        assertTrue(score > 2.0);
        assertTrue(score2 > 2.0);
        assertTrue(score3 > 2.0);
        assertEquals(attr2.getHighLowSum(), score2, 1E-5);
        assertEquals(attr3.getHighLowSum(), score3, 1E-5);
        for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
        newForest.update(data[i]);
    // 5 different anomalous points
    }
    double midScoreTwo = newForest.getAnomalyScore(queryTwo);
    DiVector midAttrTwo = newForest.getDynamicAttribution(queryTwo, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    midAttrTwo.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
    assertTrue(midScoreTwo > 2.5);
    assertEquals(midScoreTwo, midAttrTwo.getHighLowSum(), 1E-5);
    // due to +1 in query
    assertTrue(midAttrTwo.high[1] > 1);
    assertTrue(midAttrTwo.getHighLowSum(0) < 1.2 * midAttrTwo.high[1]);
    // reversal of the dominant dimension
    // still an anomaly; but the attribution is masked by points
    double midUnmaskedScore = newForest.getDynamicScore(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    midUnmaskedScore = CommonUtils.defaultScalarNormalizerFunction(midUnmaskedScore, sampleSize);
    DiVector midUnmaskedAttr = newForest.getDynamicAttribution(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    midUnmaskedAttr.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
    assertTrue(midUnmaskedScore > 3.0);
    assertEquals(midUnmaskedScore, midUnmaskedAttr.getHighLowSum(), 1E-5);
    // due to +1 in query
    assertTrue(midUnmaskedAttr.high[1] > 1);
    assertTrue(midUnmaskedAttr.getHighLowSum(0) > midUnmaskedAttr.getHighLowSum(1));
    // a few more updates, which are identical
    for (int i = 2005; i < 2010; i++) {
        newForest.update(queryOne);
    }
    double finalScoreTwo = newForest.getAnomalyScore(queryTwo);
    DiVector finalAttrTwo = newForest.getDynamicAttribution(queryTwo, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    finalAttrTwo.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
    assertTrue(finalScoreTwo > 2.5);
    assertEquals(finalScoreTwo, finalAttrTwo.getHighLowSum(), 1E-5);
    // due to +1 in query
    assertTrue(finalAttrTwo.high[1] > 1);
    assertTrue(2 * finalAttrTwo.getHighLowSum(0) < finalAttrTwo.high[1]);
    // the drop in high[0] and low[0] is steep and the attribution has shifted
    // different thresholds
    double finalUnmaskedScore = newForest.getDynamicScore(queryTwo, 5, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    finalUnmaskedScore = CommonUtils.defaultScalarNormalizerFunction(finalUnmaskedScore, sampleSize);
    DiVector finalUnmaskedAttr = newForest.getDynamicAttribution(queryTwo, 5, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
    finalUnmaskedAttr.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
    assertTrue(finalUnmaskedScore > 3.0);
    assertEquals(finalUnmaskedScore, finalUnmaskedAttr.getHighLowSum(), 1E-5);
    // due to +1 in query
    assertTrue(finalUnmaskedAttr.high[1] > 1);
    assertTrue(finalUnmaskedAttr.getHighLowSum(0) > 0.8 * finalUnmaskedAttr.getHighLowSum(1));
// the attributions in dimension 0 continue to be reduced, but do not vanish
// or become small as in the other case; the gap is not a factor of 4
}
Also used : Random(java.util.Random) DiVector(com.amazon.randomcutforest.returntypes.DiVector) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData) Test(org.junit.jupiter.api.Test)

Example 3 with DiVector

use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.

the class RandomCutForestTest method testGetApproximateAnomalyAttribution.

@Test
public void testGetApproximateAnomalyAttribution() {
    float[] point = { 1.2f, -3.4f };
    DiVector zero = new DiVector(dimensions);
    DiVector result = forest.getApproximateAnomalyAttribution(point);
    assertFalse(forest.isOutputReady());
    assertArrayEquals(zero.high, result.high, EPSILON);
    assertArrayEquals(zero.low, result.low, EPSILON);
    doReturn(true).when(forest).isOutputReady();
    ConvergingAccumulator<DiVector> accumulator = new OneSidedConvergingDiVectorAccumulator(dimensions, RandomCutForest.DEFAULT_APPROXIMATE_ANOMALY_SCORE_HIGH_IS_CRITICAL, RandomCutForest.DEFAULT_APPROXIMATE_DYNAMIC_SCORE_PRECISION, RandomCutForest.DEFAULT_APPROXIMATE_DYNAMIC_SCORE_MIN_VALUES_ACCEPTED, numberOfTrees);
    for (int i = 0; i < numberOfTrees; i++) {
        SamplerPlusTree<Integer, float[]> component = (SamplerPlusTree<Integer, float[]>) components.get(i);
        ITree<Integer, float[]> tree = component.getTree();
        DiVector treeResult = new DiVector(dimensions);
        for (int j = 0; j < dimensions; j++) {
            treeResult.high[j] = Math.random();
            treeResult.low[j] = Math.random();
        }
        when(tree.traverse(aryEq(point), any(VisitorFactory.class))).thenReturn(treeResult);
        when(tree.getMass()).thenReturn(256);
        if (!accumulator.isConverged()) {
            accumulator.accept(treeResult);
        }
    }
    DiVector expectedResult = accumulator.getAccumulatedValue().scale(1.0 / accumulator.getValuesAccepted());
    result = forest.getApproximateAnomalyAttribution(point);
    assertArrayEquals(expectedResult.high, result.high, EPSILON);
    assertArrayEquals(expectedResult.low, result.low, EPSILON);
}
Also used : DiVector(com.amazon.randomcutforest.returntypes.DiVector) OneSidedConvergingDiVectorAccumulator(com.amazon.randomcutforest.returntypes.OneSidedConvergingDiVectorAccumulator) SamplerPlusTree(com.amazon.randomcutforest.executor.SamplerPlusTree) Test(org.junit.jupiter.api.Test)

Example 4 with DiVector

use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.

the class AnomalyAttributionVisitorTest method testAccept.

@Test
public void testAccept() {
    float[] pointToScore = { 0.0f, 0.0f };
    int treeMass = 50;
    AnomalyAttributionVisitor visitor = new AnomalyAttributionVisitor(pointToScore, treeMass, 0);
    INodeView leafNode = mock(NodeView.class);
    float[] point = new float[] { 1.0f, -2.0f };
    when(leafNode.getLeafPoint()).thenReturn(point);
    when(leafNode.getBoundingBox()).thenReturn(new BoundingBox(point, point));
    int leafMass = 3;
    when(leafNode.getMass()).thenReturn(leafMass);
    int depth = 4;
    visitor.acceptLeaf(leafNode, depth);
    DiVector result = visitor.getResult();
    double expectedScoreSum = defaultScoreUnseenFunction(depth, leafNode.getMass());
    double sumOfNewRange = 1.0 + 2.0;
    double[] expectedUnnormalizedLow = new double[] { expectedScoreSum * 1.0 / sumOfNewRange, 0.0 };
    double[] expectedUnnormalizedHigh = new double[] { 0.0, expectedScoreSum * 2.0 / sumOfNewRange };
    for (int i = 0; i < pointToScore.length; i++) {
        assertEquals(defaultScalarNormalizerFunction(expectedUnnormalizedLow[i], treeMass), result.low[i], EPSILON);
        assertEquals(defaultScalarNormalizerFunction(expectedUnnormalizedHigh[i], treeMass), result.high[i], EPSILON);
    }
    // parent does not contain pointToScore
    depth--;
    INodeView sibling = mock(NodeView.class);
    int siblingMass = 2;
    when(sibling.getMass()).thenReturn(siblingMass);
    INodeView parent = mock(NodeView.class);
    int parentMass = leafMass + siblingMass;
    when(parent.getMass()).thenReturn(parentMass);
    BoundingBox boundingBox = new BoundingBox(point, new float[] { 2.0f, -0.5f });
    when(parent.getBoundingBox()).thenReturn(boundingBox);
    visitor.accept(parent, depth);
    result = visitor.getResult();
    double expectedSumOfNewRange2 = 2.0 + 2.0;
    double expectedProbOfCut2 = (1.0 + 0.5) / expectedSumOfNewRange2;
    double[] expectedDifferenceInRangeVector2 = { 0.0, 1.0, 0.5, 0.0 };
    double expectedScore2 = defaultScoreUnseenFunction(depth, parent.getMass());
    double[] expectedUnnormalizedLow2 = new double[pointToScore.length];
    double[] expectedUnnormalizedHigh2 = new double[pointToScore.length];
    for (int i = 0; i < pointToScore.length; i++) {
        double prob = expectedDifferenceInRangeVector2[2 * i] / expectedSumOfNewRange2;
        expectedUnnormalizedHigh2[i] = prob * expectedScore2 + (1 - expectedProbOfCut2) * expectedUnnormalizedHigh[i];
        prob = expectedDifferenceInRangeVector2[2 * i + 1] / expectedSumOfNewRange2;
        expectedUnnormalizedLow2[i] = prob * expectedScore2 + (1 - expectedProbOfCut2) * expectedUnnormalizedLow[i];
    }
    for (int i = 0; i < pointToScore.length; i++) {
        assertEquals(defaultScalarNormalizerFunction(expectedUnnormalizedLow2[i], treeMass), result.low[i], EPSILON);
        assertEquals(defaultScalarNormalizerFunction(expectedUnnormalizedHigh2[i], treeMass), result.high[i], EPSILON);
    }
    // grandparent contains pointToScore
    assertFalse(visitor.pointInsideBox);
    depth--;
    INodeView grandParent = mock(NodeView.class);
    when(grandParent.getMass()).thenReturn(parentMass + 2);
    when(grandParent.getBoundingBox()).thenReturn(boundingBox.getMergedBox(new BoundingBox(new float[] { -1.0f, 1.0f }).getMergedBox(new float[] { -0.5f, -1.5f })));
    visitor.accept(grandParent, depth);
    result = visitor.getResult();
    for (int i = 0; i < pointToScore.length; i++) {
        assertEquals(defaultScalarNormalizerFunction(expectedUnnormalizedLow2[i], treeMass), result.low[i], EPSILON);
        assertEquals(defaultScalarNormalizerFunction(expectedUnnormalizedHigh2[i], treeMass), result.high[i], EPSILON);
    }
}
Also used : DiVector(com.amazon.randomcutforest.returntypes.DiVector) BoundingBox(com.amazon.randomcutforest.tree.BoundingBox) INodeView(com.amazon.randomcutforest.tree.INodeView) Test(org.junit.jupiter.api.Test)

Example 5 with DiVector

use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.

the class AnomalyAttributionVisitorTest method testNewWithIgnoreOptions.

@Test
public void testNewWithIgnoreOptions() {
    float[] point = new float[] { 1.1f, -2.2f, 3.3f };
    int treeMass = 99;
    AnomalyAttributionVisitor visitor = new AnomalyAttributionVisitor(point, treeMass, 7);
    assertFalse(visitor.pointInsideBox);
    for (int i = 0; i < point.length; i++) {
        assertFalse(visitor.coordInsideBox[i]);
    }
    assertTrue(visitor.ignoreLeaf);
    assertEquals(7, visitor.ignoreLeafMassThreshold);
    DiVector result = visitor.getResult();
    double[] zero = new double[point.length];
    assertArrayEquals(zero, result.high);
    assertArrayEquals(zero, result.low);
}
Also used : DiVector(com.amazon.randomcutforest.returntypes.DiVector) Test(org.junit.jupiter.api.Test)

Aggregations

DiVector (com.amazon.randomcutforest.returntypes.DiVector)24 Test (org.junit.jupiter.api.Test)11 SamplerPlusTree (com.amazon.randomcutforest.executor.SamplerPlusTree)6 Random (java.util.Random)6 OneSidedConvergingDiVectorAccumulator (com.amazon.randomcutforest.returntypes.OneSidedConvergingDiVectorAccumulator)5 CommonUtils.checkArgument (com.amazon.randomcutforest.CommonUtils.checkArgument)4 CommonUtils.checkNotNull (com.amazon.randomcutforest.CommonUtils.checkNotNull)4 CommonUtils.toDoubleArray (com.amazon.randomcutforest.CommonUtils.toDoubleArray)4 CommonUtils.toFloatArray (com.amazon.randomcutforest.CommonUtils.toFloatArray)4 AnomalyAttributionVisitor (com.amazon.randomcutforest.anomalydetection.AnomalyAttributionVisitor)4 AnomalyScoreVisitor (com.amazon.randomcutforest.anomalydetection.AnomalyScoreVisitor)4 DynamicAttributionVisitor (com.amazon.randomcutforest.anomalydetection.DynamicAttributionVisitor)4 DynamicScoreVisitor (com.amazon.randomcutforest.anomalydetection.DynamicScoreVisitor)4 SimulatedTransductiveScalarScoreVisitor (com.amazon.randomcutforest.anomalydetection.SimulatedTransductiveScalarScoreVisitor)4 Config (com.amazon.randomcutforest.config.Config)4 Precision (com.amazon.randomcutforest.config.Precision)4 AbstractForestTraversalExecutor (com.amazon.randomcutforest.executor.AbstractForestTraversalExecutor)4 AbstractForestUpdateExecutor (com.amazon.randomcutforest.executor.AbstractForestUpdateExecutor)4 IStateCoordinator (com.amazon.randomcutforest.executor.IStateCoordinator)4 ParallelForestTraversalExecutor (com.amazon.randomcutforest.executor.ParallelForestTraversalExecutor)4