use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class RandomCutForestFunctionalTest method testSideEffectsB.
@ParameterizedTest
@ArgumentsSource(TestForestProvider.class)
public void testSideEffectsB(RandomCutForest forest) {
/* the changes to score and attribution should be in sync */
DiVector initial = forest.getAnomalyAttribution(new double[] { 0.0, 0.0, 0.0 });
NormalMixtureTestData generator2 = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
double[][] newData = generator2.generateTestData(dataSize, dimensions);
for (int i = 0; i < dataSize; i++) {
forest.getAnomalyAttribution(newData[i]);
}
double newScore = forest.getAnomalyScore(new double[] { 0.0, 0.0, 0.0 });
DiVector newVector = forest.getAnomalyAttribution(new double[] { 0.0, 0.0, 0.0 });
assertEquals(initial.getHighLowSum(), newVector.getHighLowSum(), 10E-10);
assertEquals(initial.getHighLowSum(), newScore, 1E-10);
assertArrayEquals(initial.high, newVector.high, 1E-10);
assertArrayEquals(initial.low, newVector.low, 1E-10);
}
use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class RandomCutForestFunctionalTest method testShadowBuffer.
@Test
public void testShadowBuffer() {
/**
* This test checks that the attribution *DOES NOT* change as a ratio as more
* copies of the points are added. The shadowbox in
* the @DirectionalAttributionVisitor allows us to simulate a deletion without
* performing a deletion.
*
* The goal is to measure the attribution and have many copies of the same point
* and eventually the attribution will become uniform in all directions.
*
* we create a new forest so that other tests are unaffected.
*/
numberOfTrees = 100;
sampleSize = 256;
dimensions = 3;
randomSeed = 123;
RandomCutForest newForest = RandomCutForest.builder().numberOfTrees(numberOfTrees).sampleSize(sampleSize).dimensions(dimensions).randomSeed(randomSeed).centerOfMassEnabled(true).timeDecay(1e-5).storeSequenceIndexesEnabled(true).build();
dataSize = 10_000;
baseMu = 0.0;
baseSigma = 1.0;
anomalyMu = 5.0;
anomalySigma = 1.5;
transitionToAnomalyProbability = 0.01;
transitionToBaseProbability = 0.4;
NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
double[][] data = generator.generateTestData(dataSize, dimensions);
for (int i = 0; i < dataSize; i++) {
newForest.update(data[i]);
}
double[] point = new double[] { -8.0, -8.0, 0.0 };
DiVector result = newForest.getAnomalyAttribution(point);
double score = newForest.getAnomalyScore(point);
assertEquals(score, result.getHighLowSum(), 1E-5);
assertTrue(score > 2);
assertTrue(result.getHighLowSum(2) < 0.2);
// 256/10_000
for (int i = 0; i < 5; i++) {
newForest.update(point);
}
DiVector newResult = newForest.getAnomalyAttribution(point);
double newScore = newForest.getAnomalyScore(point);
assertEquals(newScore, newResult.getHighLowSum(), 1E-5);
assertTrue(newScore < score);
for (int j = 0; j < 3; j++) {
// relationship holds at larger values
if (result.high[j] > 0.2) {
assertEquals(score * newResult.high[j], newScore * result.high[j], 0.1 * score);
} else {
assertTrue(newResult.high[j] < 0.2);
}
if (result.low[j] > 0.2) {
assertEquals(score * newResult.low[j], newScore * result.low[j], 0.1 * score);
} else {
assertTrue(newResult.low[j] < 0.2);
}
}
// this will make the point an inlier
for (int i = 0; i < 5000; i++) {
newForest.update(point);
}
DiVector finalResult = newForest.getAnomalyAttribution(point);
double finalScore = newForest.getAnomalyScore(point);
assertTrue(finalScore < 1);
assertEquals(finalScore, finalResult.getHighLowSum(), 1E-5);
for (int j = 0; j < 3; j++) {
// relationship holds at larger values
if (finalResult.high[j] > 0.2) {
assertEquals(score * finalResult.high[j], finalScore * result.high[j], 0.1 * score);
} else {
assertTrue(newResult.high[j] < 0.2);
}
if (finalResult.low[j] > 0.2) {
assertEquals(score * finalResult.low[j], finalScore * result.low[j], 0.1 * score);
} else {
assertTrue(finalResult.low[j] < 0.2);
}
}
}
use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class AttributionExamplesFunctionalTest method RRCFattributionTest.
@Test
public void RRCFattributionTest() {
// starts with the same setup as rrcfTest; data corresponds to two small
// clusters at x=+/-5.0
// queries q_1=(0,0,0, ..., 0)
// inserts updates (0,1,0, ..., 0) a few times
// queries q_2=(0,1,0, ..., 0)
// attribution of q_2 is now affected by q_1 (which is still an anomaly)
int newDimensions = 30;
randomSeed = 101;
sampleSize = 256;
RandomCutForest newForest = RandomCutForest.builder().numberOfTrees(100).sampleSize(sampleSize).dimensions(newDimensions).randomSeed(randomSeed).compact(true).boundingBoxCacheFraction(0.0).build();
dataSize = 2000 + 5;
baseMu = 0.0;
baseSigma = 1.0;
anomalyMu = 0.0;
anomalySigma = 1.0;
transitionToAnomalyProbability = 0.0;
// ignoring anomaly cluster for now
transitionToBaseProbability = 1.0;
Random prg = new Random(0);
NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
double[][] data = generator.generateTestData(dataSize, newDimensions, 100);
for (int i = 0; i < 2000; i++) {
// shrink, shift at random
for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
if (prg.nextDouble() < 0.5)
data[i][0] += 5.0;
else
data[i][0] -= 5.0;
newForest.update(data[i]);
}
double[] queryOne = new double[newDimensions];
double[] queryTwo = new double[newDimensions];
queryTwo[1] = 1;
double originalScoreTwo = newForest.getAnomalyScore(queryTwo);
DiVector originalAttrTwo = newForest.getAnomalyAttribution(queryTwo);
assertTrue(originalScoreTwo > 3.0);
assertEquals(originalScoreTwo, originalAttrTwo.getHighLowSum(), 1E-5);
// due to -5 cluster
assertTrue(originalAttrTwo.high[0] > 1.0);
// due to +5 cluster
assertTrue(originalAttrTwo.low[0] > 1.0);
// due to +1 in query
assertTrue(originalAttrTwo.high[1] > 1);
assertTrue(originalAttrTwo.getHighLowSum(0) > 1.1 * originalAttrTwo.getHighLowSum(1));
// we insert queryOne a few times to make sure it is sampled
for (int i = 2000; i < 2000 + 5; i++) {
double score = newForest.getAnomalyScore(queryOne);
double score2 = newForest.getAnomalyScore(queryTwo);
DiVector attr2 = newForest.getAnomalyAttribution(queryTwo);
// verify
assertTrue(score > 2.0);
assertTrue(score2 > 2.0);
assertEquals(attr2.getHighLowSum(), score2, 1E-5);
for (int j = 0; j < newDimensions; j++) data[i][j] *= 0.01;
newForest.update(data[i]);
// 5 different anomalous points
}
double midScoreTwo = newForest.getAnomalyScore(queryTwo);
DiVector midAttrTwo = newForest.getAnomalyAttribution(queryTwo);
assertTrue(midScoreTwo > 2.4);
assertEquals(midScoreTwo, midAttrTwo.getHighLowSum(), 1E-5);
// due to -5 cluster !!!
assertTrue(midAttrTwo.high[0] < 1);
// due to +5 cluster !!!
assertTrue(midAttrTwo.low[0] < 1);
// due to +1 in query
assertTrue(midAttrTwo.high[1] > 1);
assertTrue(midAttrTwo.getHighLowSum(0) < 1.1 * midAttrTwo.high[1]);
// a few more updates, which are identical
for (int i = 2005; i < 2010; i++) {
newForest.update(queryOne);
}
double finalScoreTwo = newForest.getAnomalyScore(queryTwo);
DiVector finalAttrTwo = newForest.getAnomalyAttribution(queryTwo);
assertTrue(finalScoreTwo > 2.4);
assertEquals(finalScoreTwo, finalAttrTwo.getHighLowSum(), 1E-5);
// due to -5 cluster !!!
assertTrue(finalAttrTwo.high[0] < 0.5);
// due to +5 cluster !!!
assertTrue(finalAttrTwo.low[0] < 0.5);
// due to +1 in query
assertTrue(finalAttrTwo.high[1] > 1);
assertTrue(2.5 * finalAttrTwo.getHighLowSum(0) < finalAttrTwo.high[1]);
// the drop in high[0] and low[0] is steep and the attribution has shifted
}
use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class RandomCutForestConsistencyFunctionalTest method testConsistentScoringSinglePrecision.
@Test
public void testConsistentScoringSinglePrecision() {
RandomCutForest.Builder<?> builder = RandomCutForest.builder().dimensions(dimensions).sampleSize(sampleSize).randomSeed(randomSeed).parallelExecutionEnabled(false).compact(true);
RandomCutForest compactFloatCached = builder.boundingBoxCacheFraction(1.0).precision(Precision.FLOAT_32).build();
RandomCutForest compactFloatCachedParallel = builder.boundingBoxCacheFraction(1.0).precision(Precision.FLOAT_32).parallelExecutionEnabled(true).build();
RandomCutForest compactFloatUncached = builder.boundingBoxCacheFraction(0.0).precision(Precision.FLOAT_32).build();
RandomCutForest compactFloatCachedRandom = builder.boundingBoxCacheFraction(new Random().nextDouble()).precision(Precision.FLOAT_32).build();
RandomCutForest compactFloatCachedRandomParallel = builder.boundingBoxCacheFraction(new Random().nextDouble()).precision(Precision.FLOAT_32).parallelExecutionEnabled(true).build();
RandomCutForest compactFloatUncachedParallel = builder.boundingBoxCacheFraction(0.0).precision(Precision.FLOAT_32).parallelExecutionEnabled(true).build();
RandomCutForest compactDoubleCached = builder.boundingBoxCacheFraction(1.0).precision(Precision.FLOAT_64).build();
NormalMixtureTestData testData = new NormalMixtureTestData();
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions, 99)) {
double score = compactFloatCached.getAnomalyScore(point);
if (score > 0) {
anomalies++;
}
assertEquals(score, compactFloatUncached.getAnomalyScore(point), 1e-10);
assertEquals(score, compactFloatUncachedParallel.getAnomalyScore(point), 1e-10);
assertEquals(score, compactFloatCachedRandom.getAnomalyScore(point), 1e-10);
assertEquals(score, compactFloatCachedRandomParallel.getAnomalyScore(point), 1e-10);
// we expect some loss of precision when comparing to the score computed as a
// double
assertEquals(score, compactDoubleCached.getAnomalyScore(point), 1e-2);
compactFloatCached.update(point);
compactFloatCachedParallel.update(point);
compactFloatUncached.update(point);
compactFloatUncachedParallel.update(point);
compactFloatCachedRandom.update(point);
compactFloatCachedRandomParallel.update(point);
compactDoubleCached.update(point);
}
// verify that the test is nontrivial
assertTrue(anomalies > 0);
}
use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class RandomCutForestFunctionalTest method testSideEffectsA.
@ParameterizedTest
@ArgumentsSource(TestForestProvider.class)
public void testSideEffectsA(RandomCutForest forest) {
double score = forest.getAnomalyScore(new double[] { 0.0, 0.0, 0.0 });
NormalMixtureTestData generator2 = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
double[][] newData = generator2.generateTestData(dataSize, dimensions);
for (int i = 0; i < dataSize; i++) {
forest.getAnomalyScore(newData[i]);
}
double newScore = forest.getAnomalyScore(new double[] { 0.0, 0.0, 0.0 });
assertEquals(score, newScore, 10E-10);
}
Aggregations