use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.
the class PredictorCorrector method trigger.
/**
* in a high score region with a previous anomalies, we use this to determine if
* the "residual contribution" since the last anomaly would have sufficed to
* trigger anomaly designation on its own.
*
* @param candidate attribution of the current point in
* consideration
* @param gap how long ago did the previous anomaly occur
* @param baseDimension number of input attributes/variables
* (before shingling)
* @param ideal a form of expected attribution; can be null
* if there was no previous anomaly in the
* shingle
* @param previousIsPotentialAnomaly is the previous point a potential anomaly
* @return true/false if the residual (extrapolated) score would trigger anomaly
* designation
*/
protected boolean trigger(DiVector candidate, int gap, int baseDimension, DiVector ideal, boolean previousIsPotentialAnomaly, IRCFComputeDescriptor lastAnomalyDescriptor) {
DiVector lastAnomalyAttribution = lastAnomalyDescriptor.getAttribution();
double lastAnomalyScore = lastAnomalyDescriptor.getRCFScore();
if (lastAnomalyAttribution == null) {
return true;
}
checkArgument(lastAnomalyAttribution.getDimensions() == candidate.getDimensions(), " error in DiVectors");
int dimensions = candidate.getDimensions();
int difference = baseDimension * gap;
if (difference < dimensions) {
if (ideal == null) {
double remainder = 0;
for (int i = dimensions - difference; i < dimensions; i++) {
remainder += candidate.getHighLowSum(i);
}
return thresholder.getAnomalyGrade(remainder * dimensions / difference, previousIsPotentialAnomaly, triggerFactor) > 0;
} else {
double differentialRemainder = 0;
for (int i = dimensions - difference; i < dimensions; i++) {
differentialRemainder += Math.abs(candidate.low[i] - ideal.low[i]) + Math.abs(candidate.high[i] - ideal.high[i]);
}
return (differentialRemainder > ignoreSimilarFactor * lastAnomalyScore) && thresholder.getAnomalyGrade(differentialRemainder * dimensions / difference, previousIsPotentialAnomaly, triggerFactor) > 0;
}
} else {
if (!ignoreSimilar) {
return true;
}
double sum = 0;
for (int i = 0; i < dimensions; i++) {
sum += Math.abs(lastAnomalyAttribution.high[i] - candidate.high[i]) + Math.abs(lastAnomalyAttribution.low[i] - candidate.low[i]);
}
return (sum > ignoreSimilarFactor * lastAnomalyScore);
}
}
use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.
the class AttributionExamplesFunctionalTest method attributionUnMaskingTest.
@Test
public void attributionUnMaskingTest() {
// starts with the same setup as rrcfTest; data corresponds to two small
// clusters at x=+/-5.0
// queries q_1=(0,0,0, ..., 0)
// inserts updates (0,1,0, ..., 0) a few times
// queries q_2=(0,1,0, ..., 0)
// attribution of q_2 is now affected by q_1 (which is still an anomaly)
int newDimensions = 30;
randomSeed = 179;
sampleSize = 256;
RandomCutForest newForest = RandomCutForest.builder().numberOfTrees(100).sampleSize(sampleSize).dimensions(newDimensions).randomSeed(randomSeed).compact(true).boundingBoxCacheFraction(new Random().nextDouble()).timeDecay(1e-5).build();
dataSize = 2000 + 5;
baseMu = 0.0;
baseSigma = 1.0;
anomalyMu = 0.0;
anomalySigma = 1.5;
transitionToAnomalyProbability = 0.0;
// ignoring anomaly cluster for now
transitionToBaseProbability = 1.0;
Random prg = new Random(0);
NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
double[][] data = generator.generateTestData(dataSize, newDimensions, 100);
for (int i = 0; i < 2000; i++) {
// shrink, shift at random
for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
if (prg.nextDouble() < 0.5)
data[i][0] += 5.0;
else
data[i][0] -= 5.0;
newForest.update(data[i]);
}
float[] queryOne = new float[30];
float[] queryTwo = new float[30];
queryTwo[1] = 1;
double originalScoreTwo = newForest.getAnomalyScore(queryTwo);
// testing approximation with precision 0 (no approximation)
DiVector originalAttrTwo = newForest.getApproximateDynamicAttribution(queryTwo, 0, true, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
originalAttrTwo.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
assertTrue(originalScoreTwo > 3.0);
assertEquals(originalScoreTwo, originalAttrTwo.getHighLowSum(), 1E-5);
// due to -5 cluster
assertTrue(originalAttrTwo.high[0] > 0.75);
// due to +5 cluster
assertTrue(originalAttrTwo.low[0] > 0.75);
// due to +1 in query
assertTrue(originalAttrTwo.high[1] > 1);
assertTrue(originalAttrTwo.getHighLowSum(0) > originalAttrTwo.getHighLowSum(1));
double apx = newForest.getApproximateDynamicScore(queryTwo, 0.1, true, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
assertEquals(originalScoreTwo, CommonUtils.defaultScalarNormalizerFunction(apx, sampleSize), 0.2);
assertEquals(apx, newForest.getApproximateDynamicAttribution(queryTwo, 0.1, true, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction).getHighLowSum(), 1e-5);
// we insert queryOne a few times to make sure it is sampled
for (int i = 2000; i < 2000 + 5; i++) {
double score = newForest.getAnomalyScore(queryOne);
double score2 = newForest.getAnomalyScore(queryTwo);
DiVector attr2 = newForest.getDynamicAttribution(queryTwo, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
attr2.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
double score3 = newForest.getDynamicScore(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
score3 = CommonUtils.defaultScalarNormalizerFunction(score3, sampleSize);
DiVector attr3 = newForest.getDynamicAttribution(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
attr3.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
// verify
assertTrue(score > 2.0);
assertTrue(score2 > 2.0);
assertTrue(score3 > 2.0);
assertEquals(attr2.getHighLowSum(), score2, 1E-5);
assertEquals(attr3.getHighLowSum(), score3, 1E-5);
for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
newForest.update(data[i]);
// 5 different anomalous points
}
double midScoreTwo = newForest.getAnomalyScore(queryTwo);
DiVector midAttrTwo = newForest.getDynamicAttribution(queryTwo, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
midAttrTwo.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
assertTrue(midScoreTwo > 2.5);
assertEquals(midScoreTwo, midAttrTwo.getHighLowSum(), 1E-5);
// due to +1 in query
assertTrue(midAttrTwo.high[1] > 1);
assertTrue(midAttrTwo.getHighLowSum(0) < 1.2 * midAttrTwo.high[1]);
// reversal of the dominant dimension
// still an anomaly; but the attribution is masked by points
double midUnmaskedScore = newForest.getDynamicScore(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
midUnmaskedScore = CommonUtils.defaultScalarNormalizerFunction(midUnmaskedScore, sampleSize);
DiVector midUnmaskedAttr = newForest.getDynamicAttribution(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
midUnmaskedAttr.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
assertTrue(midUnmaskedScore > 3.0);
assertEquals(midUnmaskedScore, midUnmaskedAttr.getHighLowSum(), 1E-5);
// due to +1 in query
assertTrue(midUnmaskedAttr.high[1] > 1);
assertTrue(midUnmaskedAttr.getHighLowSum(0) > midUnmaskedAttr.getHighLowSum(1));
// a few more updates, which are identical
for (int i = 2005; i < 2010; i++) {
newForest.update(queryOne);
}
double finalScoreTwo = newForest.getAnomalyScore(queryTwo);
DiVector finalAttrTwo = newForest.getDynamicAttribution(queryTwo, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
finalAttrTwo.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
assertTrue(finalScoreTwo > 2.5);
assertEquals(finalScoreTwo, finalAttrTwo.getHighLowSum(), 1E-5);
// due to +1 in query
assertTrue(finalAttrTwo.high[1] > 1);
assertTrue(2 * finalAttrTwo.getHighLowSum(0) < finalAttrTwo.high[1]);
// the drop in high[0] and low[0] is steep and the attribution has shifted
// different thresholds
double finalUnmaskedScore = newForest.getDynamicScore(queryTwo, 5, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
finalUnmaskedScore = CommonUtils.defaultScalarNormalizerFunction(finalUnmaskedScore, sampleSize);
DiVector finalUnmaskedAttr = newForest.getDynamicAttribution(queryTwo, 5, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
finalUnmaskedAttr.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
assertTrue(finalUnmaskedScore > 3.0);
assertEquals(finalUnmaskedScore, finalUnmaskedAttr.getHighLowSum(), 1E-5);
// due to +1 in query
assertTrue(finalUnmaskedAttr.high[1] > 1);
assertTrue(finalUnmaskedAttr.getHighLowSum(0) > 0.8 * finalUnmaskedAttr.getHighLowSum(1));
// the attributions in dimension 0 continue to be reduced, but do not vanish
// or become small as in the other case; the gap is not a factor of 4
}
use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.
the class RandomCutForestTest method testGetApproximateAnomalyAttribution.
@Test
public void testGetApproximateAnomalyAttribution() {
float[] point = { 1.2f, -3.4f };
DiVector zero = new DiVector(dimensions);
DiVector result = forest.getApproximateAnomalyAttribution(point);
assertFalse(forest.isOutputReady());
assertArrayEquals(zero.high, result.high, EPSILON);
assertArrayEquals(zero.low, result.low, EPSILON);
doReturn(true).when(forest).isOutputReady();
ConvergingAccumulator<DiVector> accumulator = new OneSidedConvergingDiVectorAccumulator(dimensions, RandomCutForest.DEFAULT_APPROXIMATE_ANOMALY_SCORE_HIGH_IS_CRITICAL, RandomCutForest.DEFAULT_APPROXIMATE_DYNAMIC_SCORE_PRECISION, RandomCutForest.DEFAULT_APPROXIMATE_DYNAMIC_SCORE_MIN_VALUES_ACCEPTED, numberOfTrees);
for (int i = 0; i < numberOfTrees; i++) {
SamplerPlusTree<Integer, float[]> component = (SamplerPlusTree<Integer, float[]>) components.get(i);
ITree<Integer, float[]> tree = component.getTree();
DiVector treeResult = new DiVector(dimensions);
for (int j = 0; j < dimensions; j++) {
treeResult.high[j] = Math.random();
treeResult.low[j] = Math.random();
}
when(tree.traverse(aryEq(point), any(VisitorFactory.class))).thenReturn(treeResult);
when(tree.getMass()).thenReturn(256);
if (!accumulator.isConverged()) {
accumulator.accept(treeResult);
}
}
DiVector expectedResult = accumulator.getAccumulatedValue().scale(1.0 / accumulator.getValuesAccepted());
result = forest.getApproximateAnomalyAttribution(point);
assertArrayEquals(expectedResult.high, result.high, EPSILON);
assertArrayEquals(expectedResult.low, result.low, EPSILON);
}
use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.
the class AnomalyAttributionVisitorTest method testAccept.
@Test
public void testAccept() {
float[] pointToScore = { 0.0f, 0.0f };
int treeMass = 50;
AnomalyAttributionVisitor visitor = new AnomalyAttributionVisitor(pointToScore, treeMass, 0);
INodeView leafNode = mock(NodeView.class);
float[] point = new float[] { 1.0f, -2.0f };
when(leafNode.getLeafPoint()).thenReturn(point);
when(leafNode.getBoundingBox()).thenReturn(new BoundingBox(point, point));
int leafMass = 3;
when(leafNode.getMass()).thenReturn(leafMass);
int depth = 4;
visitor.acceptLeaf(leafNode, depth);
DiVector result = visitor.getResult();
double expectedScoreSum = defaultScoreUnseenFunction(depth, leafNode.getMass());
double sumOfNewRange = 1.0 + 2.0;
double[] expectedUnnormalizedLow = new double[] { expectedScoreSum * 1.0 / sumOfNewRange, 0.0 };
double[] expectedUnnormalizedHigh = new double[] { 0.0, expectedScoreSum * 2.0 / sumOfNewRange };
for (int i = 0; i < pointToScore.length; i++) {
assertEquals(defaultScalarNormalizerFunction(expectedUnnormalizedLow[i], treeMass), result.low[i], EPSILON);
assertEquals(defaultScalarNormalizerFunction(expectedUnnormalizedHigh[i], treeMass), result.high[i], EPSILON);
}
// parent does not contain pointToScore
depth--;
INodeView sibling = mock(NodeView.class);
int siblingMass = 2;
when(sibling.getMass()).thenReturn(siblingMass);
INodeView parent = mock(NodeView.class);
int parentMass = leafMass + siblingMass;
when(parent.getMass()).thenReturn(parentMass);
BoundingBox boundingBox = new BoundingBox(point, new float[] { 2.0f, -0.5f });
when(parent.getBoundingBox()).thenReturn(boundingBox);
visitor.accept(parent, depth);
result = visitor.getResult();
double expectedSumOfNewRange2 = 2.0 + 2.0;
double expectedProbOfCut2 = (1.0 + 0.5) / expectedSumOfNewRange2;
double[] expectedDifferenceInRangeVector2 = { 0.0, 1.0, 0.5, 0.0 };
double expectedScore2 = defaultScoreUnseenFunction(depth, parent.getMass());
double[] expectedUnnormalizedLow2 = new double[pointToScore.length];
double[] expectedUnnormalizedHigh2 = new double[pointToScore.length];
for (int i = 0; i < pointToScore.length; i++) {
double prob = expectedDifferenceInRangeVector2[2 * i] / expectedSumOfNewRange2;
expectedUnnormalizedHigh2[i] = prob * expectedScore2 + (1 - expectedProbOfCut2) * expectedUnnormalizedHigh[i];
prob = expectedDifferenceInRangeVector2[2 * i + 1] / expectedSumOfNewRange2;
expectedUnnormalizedLow2[i] = prob * expectedScore2 + (1 - expectedProbOfCut2) * expectedUnnormalizedLow[i];
}
for (int i = 0; i < pointToScore.length; i++) {
assertEquals(defaultScalarNormalizerFunction(expectedUnnormalizedLow2[i], treeMass), result.low[i], EPSILON);
assertEquals(defaultScalarNormalizerFunction(expectedUnnormalizedHigh2[i], treeMass), result.high[i], EPSILON);
}
// grandparent contains pointToScore
assertFalse(visitor.pointInsideBox);
depth--;
INodeView grandParent = mock(NodeView.class);
when(grandParent.getMass()).thenReturn(parentMass + 2);
when(grandParent.getBoundingBox()).thenReturn(boundingBox.getMergedBox(new BoundingBox(new float[] { -1.0f, 1.0f }).getMergedBox(new float[] { -0.5f, -1.5f })));
visitor.accept(grandParent, depth);
result = visitor.getResult();
for (int i = 0; i < pointToScore.length; i++) {
assertEquals(defaultScalarNormalizerFunction(expectedUnnormalizedLow2[i], treeMass), result.low[i], EPSILON);
assertEquals(defaultScalarNormalizerFunction(expectedUnnormalizedHigh2[i], treeMass), result.high[i], EPSILON);
}
}
use of com.amazon.randomcutforest.returntypes.DiVector in project random-cut-forest-by-aws by aws.
the class AnomalyAttributionVisitorTest method testNewWithIgnoreOptions.
@Test
public void testNewWithIgnoreOptions() {
float[] point = new float[] { 1.1f, -2.2f, 3.3f };
int treeMass = 99;
AnomalyAttributionVisitor visitor = new AnomalyAttributionVisitor(point, treeMass, 7);
assertFalse(visitor.pointInsideBox);
for (int i = 0; i < point.length; i++) {
assertFalse(visitor.coordInsideBox[i]);
}
assertTrue(visitor.ignoreLeaf);
assertEquals(7, visitor.ignoreLeafMassThreshold);
DiVector result = visitor.getResult();
double[] zero = new double[point.length];
assertArrayEquals(zero, result.high);
assertArrayEquals(zero, result.low);
}
Aggregations