use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class RandomCutForestMapperTest method testRoundTripForCompactForest.
@ParameterizedTest
@MethodSource("compactForestProvider")
public void testRoundTripForCompactForest(RandomCutForest forest) {
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(sampleSize, dimensions)) {
forest.update(point);
}
RandomCutForest forest2 = mapper.toModel(mapper.toState(forest));
assertCompactForestEquals(forest, forest2);
}
use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class AttributionExamplesFunctionalTest method attributionUnMaskingTest.
@Test
public void attributionUnMaskingTest() {
// starts with the same setup as rrcfTest; data corresponds to two small
// clusters at x=+/-5.0
// queries q_1=(0,0,0, ..., 0)
// inserts updates (0,1,0, ..., 0) a few times
// queries q_2=(0,1,0, ..., 0)
// attribution of q_2 is now affected by q_1 (which is still an anomaly)
int newDimensions = 30;
randomSeed = 179;
sampleSize = 256;
RandomCutForest newForest = RandomCutForest.builder().numberOfTrees(100).sampleSize(sampleSize).dimensions(newDimensions).randomSeed(randomSeed).compact(true).boundingBoxCacheFraction(new Random().nextDouble()).timeDecay(1e-5).build();
dataSize = 2000 + 5;
baseMu = 0.0;
baseSigma = 1.0;
anomalyMu = 0.0;
anomalySigma = 1.5;
transitionToAnomalyProbability = 0.0;
// ignoring anomaly cluster for now
transitionToBaseProbability = 1.0;
Random prg = new Random(0);
NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
double[][] data = generator.generateTestData(dataSize, newDimensions, 100);
for (int i = 0; i < 2000; i++) {
// shrink, shift at random
for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
if (prg.nextDouble() < 0.5)
data[i][0] += 5.0;
else
data[i][0] -= 5.0;
newForest.update(data[i]);
}
float[] queryOne = new float[30];
float[] queryTwo = new float[30];
queryTwo[1] = 1;
double originalScoreTwo = newForest.getAnomalyScore(queryTwo);
// testing approximation with precision 0 (no approximation)
DiVector originalAttrTwo = newForest.getApproximateDynamicAttribution(queryTwo, 0, true, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
originalAttrTwo.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
assertTrue(originalScoreTwo > 3.0);
assertEquals(originalScoreTwo, originalAttrTwo.getHighLowSum(), 1E-5);
// due to -5 cluster
assertTrue(originalAttrTwo.high[0] > 0.75);
// due to +5 cluster
assertTrue(originalAttrTwo.low[0] > 0.75);
// due to +1 in query
assertTrue(originalAttrTwo.high[1] > 1);
assertTrue(originalAttrTwo.getHighLowSum(0) > originalAttrTwo.getHighLowSum(1));
double apx = newForest.getApproximateDynamicScore(queryTwo, 0.1, true, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
assertEquals(originalScoreTwo, CommonUtils.defaultScalarNormalizerFunction(apx, sampleSize), 0.2);
assertEquals(apx, newForest.getApproximateDynamicAttribution(queryTwo, 0.1, true, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction).getHighLowSum(), 1e-5);
// we insert queryOne a few times to make sure it is sampled
for (int i = 2000; i < 2000 + 5; i++) {
double score = newForest.getAnomalyScore(queryOne);
double score2 = newForest.getAnomalyScore(queryTwo);
DiVector attr2 = newForest.getDynamicAttribution(queryTwo, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
attr2.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
double score3 = newForest.getDynamicScore(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
score3 = CommonUtils.defaultScalarNormalizerFunction(score3, sampleSize);
DiVector attr3 = newForest.getDynamicAttribution(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
attr3.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
// verify
assertTrue(score > 2.0);
assertTrue(score2 > 2.0);
assertTrue(score3 > 2.0);
assertEquals(attr2.getHighLowSum(), score2, 1E-5);
assertEquals(attr3.getHighLowSum(), score3, 1E-5);
for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
newForest.update(data[i]);
// 5 different anomalous points
}
double midScoreTwo = newForest.getAnomalyScore(queryTwo);
DiVector midAttrTwo = newForest.getDynamicAttribution(queryTwo, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
midAttrTwo.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
assertTrue(midScoreTwo > 2.5);
assertEquals(midScoreTwo, midAttrTwo.getHighLowSum(), 1E-5);
// due to +1 in query
assertTrue(midAttrTwo.high[1] > 1);
assertTrue(midAttrTwo.getHighLowSum(0) < 1.2 * midAttrTwo.high[1]);
// reversal of the dominant dimension
// still an anomaly; but the attribution is masked by points
double midUnmaskedScore = newForest.getDynamicScore(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
midUnmaskedScore = CommonUtils.defaultScalarNormalizerFunction(midUnmaskedScore, sampleSize);
DiVector midUnmaskedAttr = newForest.getDynamicAttribution(queryTwo, 1, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
midUnmaskedAttr.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
assertTrue(midUnmaskedScore > 3.0);
assertEquals(midUnmaskedScore, midUnmaskedAttr.getHighLowSum(), 1E-5);
// due to +1 in query
assertTrue(midUnmaskedAttr.high[1] > 1);
assertTrue(midUnmaskedAttr.getHighLowSum(0) > midUnmaskedAttr.getHighLowSum(1));
// a few more updates, which are identical
for (int i = 2005; i < 2010; i++) {
newForest.update(queryOne);
}
double finalScoreTwo = newForest.getAnomalyScore(queryTwo);
DiVector finalAttrTwo = newForest.getDynamicAttribution(queryTwo, 0, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
finalAttrTwo.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
assertTrue(finalScoreTwo > 2.5);
assertEquals(finalScoreTwo, finalAttrTwo.getHighLowSum(), 1E-5);
// due to +1 in query
assertTrue(finalAttrTwo.high[1] > 1);
assertTrue(2 * finalAttrTwo.getHighLowSum(0) < finalAttrTwo.high[1]);
// the drop in high[0] and low[0] is steep and the attribution has shifted
// different thresholds
double finalUnmaskedScore = newForest.getDynamicScore(queryTwo, 5, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
finalUnmaskedScore = CommonUtils.defaultScalarNormalizerFunction(finalUnmaskedScore, sampleSize);
DiVector finalUnmaskedAttr = newForest.getDynamicAttribution(queryTwo, 5, CommonUtils::defaultScoreSeenFunction, CommonUtils::defaultScoreUnseenFunction, CommonUtils::defaultDampFunction);
finalUnmaskedAttr.componentwiseTransform(x -> CommonUtils.defaultScalarNormalizerFunction(x, sampleSize));
assertTrue(finalUnmaskedScore > 3.0);
assertEquals(finalUnmaskedScore, finalUnmaskedAttr.getHighLowSum(), 1E-5);
// due to +1 in query
assertTrue(finalUnmaskedAttr.high[1] > 1);
assertTrue(finalUnmaskedAttr.getHighLowSum(0) > 0.8 * finalUnmaskedAttr.getHighLowSum(1));
// the attributions in dimension 0 continue to be reduced, but do not vanish
// or become small as in the other case; the gap is not a factor of 4
}
use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class RandomCutForestConsistencyFunctionalTest method testConsistentScoring.
@Test
public void testConsistentScoring() {
RandomCutForest.Builder<?> builder = RandomCutForest.builder().dimensions(dimensions).sampleSize(sampleSize).randomSeed(randomSeed);
RandomCutForest pointerCachedSequential = builder.compact(false).boundingBoxCacheFraction(1.0).parallelExecutionEnabled(false).build();
RandomCutForest pointerCachedParallel = builder.compact(false).boundingBoxCacheFraction(1.0).parallelExecutionEnabled(true).build();
RandomCutForest pointerCachedRandomSequential = builder.compact(false).boundingBoxCacheFraction(new Random().nextDouble()).parallelExecutionEnabled(false).build();
RandomCutForest pointerCachedRandomParallel = builder.compact(false).boundingBoxCacheFraction(new Random().nextDouble()).parallelExecutionEnabled(true).build();
RandomCutForest pointerUncachedSequential = builder.compact(false).boundingBoxCacheFraction(0.0).parallelExecutionEnabled(false).build();
RandomCutForest pointerUncachedParallel = builder.compact(false).boundingBoxCacheFraction(0.0).parallelExecutionEnabled(true).build();
RandomCutForest compactCachedSequential = builder.compact(true).boundingBoxCacheFraction(1.0).parallelExecutionEnabled(false).build();
RandomCutForest compactCachedParallel = builder.compact(true).boundingBoxCacheFraction(1.0).parallelExecutionEnabled(true).build();
RandomCutForest compactUncachedSequential = builder.compact(true).boundingBoxCacheFraction(0.0).parallelExecutionEnabled(false).build();
RandomCutForest compactUncachedParallel = builder.compact(true).boundingBoxCacheFraction(0.0).parallelExecutionEnabled(true).build();
RandomCutForest compactCachedRandomSequential = builder.compact(true).boundingBoxCacheFraction(new Random().nextDouble()).parallelExecutionEnabled(false).build();
RandomCutForest compactCachedRandomParallel = builder.compact(true).boundingBoxCacheFraction(new Random().nextDouble()).parallelExecutionEnabled(true).build();
NormalMixtureTestData testData = new NormalMixtureTestData();
double delta = 1e-10;
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions, 99)) {
double score = pointerCachedSequential.getAnomalyScore(point);
if (score > 0) {
anomalies++;
}
assertEquals(score, pointerCachedParallel.getAnomalyScore(point), delta);
assertEquals(score, pointerUncachedSequential.getAnomalyScore(point), delta);
assertEquals(score, pointerUncachedParallel.getAnomalyScore(point), delta);
assertEquals(score, compactCachedSequential.getAnomalyScore(point), delta);
assertEquals(score, compactCachedParallel.getAnomalyScore(point), delta);
assertEquals(score, compactUncachedSequential.getAnomalyScore(point), delta);
assertEquals(score, compactUncachedParallel.getAnomalyScore(point), delta);
assertEquals(score, pointerCachedRandomSequential.getAnomalyScore(point), delta);
assertEquals(score, pointerCachedRandomParallel.getAnomalyScore(point), delta);
assertEquals(score, compactCachedRandomSequential.getAnomalyScore(point), delta);
assertEquals(score, compactCachedRandomParallel.getAnomalyScore(point), delta);
pointerCachedSequential.update(point);
pointerCachedParallel.update(point);
pointerUncachedSequential.update(point);
pointerUncachedParallel.update(point);
pointerCachedRandomSequential.update(point);
pointerCachedRandomParallel.update(point);
compactCachedSequential.update(point);
compactCachedParallel.update(point);
compactUncachedSequential.update(point);
compactUncachedParallel.update(point);
compactCachedRandomSequential.update(point);
compactCachedRandomParallel.update(point);
}
// verify that the test is nontrivial
assertTrue(anomalies > 0);
}
use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class RandomCutForestShingledFunctionalTest method oneTimeSetUp.
@BeforeAll
public static void oneTimeSetUp() {
numberOfTrees = 100;
sampleSize = 256;
dimensions = 2;
randomSeed = 123;
shingleSize = 3;
shingleBuilder = new ShingleBuilder(dimensions, shingleSize);
forest = RandomCutForest.builder().numberOfTrees(numberOfTrees).sampleSize(sampleSize).dimensions(shingleBuilder.getShingledPointSize()).randomSeed(randomSeed).centerOfMassEnabled(true).storeSequenceIndexesEnabled(true).build();
dataSize = 10_000;
baseMu = 0.0;
baseSigma = 1.0;
anomalyMu = 5.0;
anomalySigma = 1.5;
transitionToAnomalyProbability = 0.01;
transitionToBaseProbability = 0.4;
NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
double[][] data = generator.generateTestData(dataSize, dimensions);
for (int i = 0; i < dataSize; i++) {
shingleBuilder.addPoint(data[i]);
if (shingleBuilder.isFull()) {
forest.update(shingleBuilder.getShingle());
}
}
}
use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class ConditionalFieldTest method SimpleTest.
@Test
public void SimpleTest() {
int newDimensions = 30;
randomSeed = 101;
sampleSize = 256;
RandomCutForest newForest = RandomCutForest.builder().numberOfTrees(100).sampleSize(sampleSize).dimensions(newDimensions).randomSeed(randomSeed).compact(true).boundingBoxCacheFraction(0.0).build();
dataSize = 2000 + 5;
baseMu = 0.0;
baseSigma = 1.0;
anomalyMu = 0.0;
anomalySigma = 1.0;
transitionToAnomalyProbability = 0.0;
// ignoring anomaly cluster for now
transitionToBaseProbability = 1.0;
Random prg = new Random(0);
NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
double[][] data = generator.generateTestData(dataSize, newDimensions, 100);
for (int i = 0; i < 2000; i++) {
// shrink, shift at random
for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
if (prg.nextDouble() < 0.5)
data[i][0] += 5.0;
else
data[i][0] -= 5.0;
newForest.update(data[i]);
}
float[] queryOne = new float[newDimensions];
float[] queryTwo = new float[newDimensions];
queryTwo[1] = 1;
ConditionalSampleSummary summary = newForest.getConditionalFieldSummary(queryOne, 1, new int[] { 0 }, 1);
assert (summary.summaryPoints.length == 2);
assert (summary.relativeLikelihood.length == 2);
assert (Math.abs(summary.summaryPoints[0][0] - 5.0) < 0.01 || Math.abs(summary.summaryPoints[0][0] + 5.0) < 0.01);
assert (Math.abs(summary.summaryPoints[1][0] - 5.0) < 0.01 || Math.abs(summary.summaryPoints[1][0] + 5.0) < 0.01);
assert (summary.relativeLikelihood[0] > 0.25);
assert (summary.relativeLikelihood[1] > 0.25);
summary = newForest.getConditionalFieldSummary(queryTwo, 1, new int[] { 0 }, 1);
assert (summary.summaryPoints.length == 2);
assert (summary.relativeLikelihood.length == 2);
assertEquals(summary.summaryPoints[0][1], 1, 1e-6);
assertEquals(summary.summaryPoints[1][1], 1, 1e-6);
assert (Math.abs(summary.summaryPoints[0][0] - 5.0) < 0.01 || Math.abs(summary.summaryPoints[0][0] + 5.0) < 0.01);
assert (Math.abs(summary.summaryPoints[1][0] - 5.0) < 0.01 || Math.abs(summary.summaryPoints[1][0] + 5.0) < 0.01);
assert (summary.relativeLikelihood[0] > 0.25);
assert (summary.relativeLikelihood[1] > 0.25);
}
Aggregations