use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class RandomCutForestFunctionalTest method oneTimeSetUp.
@BeforeAll
public static void oneTimeSetUp() {
// this is a stochastic dataset and will have different values for different
// runs
numberOfTrees = 100;
sampleSize = 256;
dimensions = 3;
randomSeed = 123;
parallelExecutionForest = RandomCutForest.builder().numberOfTrees(numberOfTrees).sampleSize(sampleSize).dimensions(dimensions).randomSeed(randomSeed).centerOfMassEnabled(true).storeSequenceIndexesEnabled(true).build();
singleThreadedForest = RandomCutForest.builder().numberOfTrees(numberOfTrees).sampleSize(sampleSize).dimensions(dimensions).randomSeed(randomSeed).centerOfMassEnabled(true).storeSequenceIndexesEnabled(true).parallelExecutionEnabled(false).build();
dataSize = 10_000;
baseMu = 0.0;
baseSigma = 1.0;
anomalyMu = 5.0;
anomalySigma = 1.5;
transitionToAnomalyProbability = 0.01;
transitionToBaseProbability = 0.4;
NormalMixtureTestData generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
double[][] data = generator.generateTestData(dataSize, dimensions);
for (int i = 0; i < dataSize; i++) {
parallelExecutionForest.update(data[i]);
singleThreadedForest.update(data[i]);
}
}
use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class HyperTreeTest method setup.
// ===========================================================
@BeforeAll
public static void setup() {
dataSize = 2000;
// this is a tree test
numberOfTrees = 1;
sampleSize = 256;
dimensions = 30;
baseMu = 0.0;
baseSigma = 1.0;
anomalyMu = 0.0;
anomalySigma = 1.5;
transitionToAnomalyProbability = 0.0;
// ignoring anomaly cluster for now
transitionToBaseProbability = 1.0;
generator = new NormalMixtureTestData(baseMu, baseSigma, anomalyMu, anomalySigma, transitionToAnomalyProbability, transitionToBaseProbability);
}
use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class DynamicSampling method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_64;
int dataSize = 4 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
RandomCutForest forest2 = RandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
int first_anomalies = 0;
int second_anomalies = 0;
forest2.setTimeDecay(10 * forest2.getTimeDecay());
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
if (forest.getAnomalyScore(point) > 1.0) {
first_anomalies++;
}
if (forest2.getAnomalyScore(point) > 1.0) {
second_anomalies++;
}
forest.update(point);
forest2.update(point);
}
System.out.println("Unusual scores: forest one " + first_anomalies + ", second one " + second_anomalies);
// should be roughly equal
first_anomalies = second_anomalies = 0;
testData = new NormalMixtureTestData(-3, 40);
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
if (forest.getAnomalyScore(point) > 1.0) {
first_anomalies++;
}
if (forest2.getAnomalyScore(point) > 1.0) {
second_anomalies++;
}
forest.update(point);
forest2.update(point);
}
System.out.println("Unusual scores: forest one " + first_anomalies + ", second one " + second_anomalies);
// forest2 should adapt faster
first_anomalies = second_anomalies = 0;
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
RandomCutForest copyForest = mapper.toModel(mapper.toState(forest));
copyForest.setTimeDecay(50 * forest.getTimeDecay());
// force an adjustment to catch up
testData = new NormalMixtureTestData(-10, -40);
int forced_change_anomalies = 0;
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
if (forest.getAnomalyScore(point) > 1.0) {
first_anomalies++;
}
if (forest2.getAnomalyScore(point) > 1.0) {
second_anomalies++;
}
if (copyForest.getAnomalyScore(point) > 1.0) {
forced_change_anomalies++;
}
copyForest.update(point);
forest.update(point);
forest2.update(point);
}
// both should show the similar rate of adjustment
System.out.println("Unusual scores: forest one " + first_anomalies + ", second one " + second_anomalies + ", forced (first) " + forced_change_anomalies);
}
use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class Thresholded1DGaussianMix method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int shingleSize = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_32;
int dataSize = 4 * sampleSize;
// change this to try different number of attributes,
// this parameter is not expected to be larger than 5 for this example
int baseDimensions = 1;
int count = 0;
int dimensions = baseDimensions * shingleSize;
ThresholdedRandomCutForest forest = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).precision(precision).anomalyRate(0.01).forestMode(ForestMode.TIME_AUGMENTED).build();
long seed = new Random().nextLong();
System.out.println("Anomalies would correspond to a run, based on a change of state.");
System.out.println("Each change is normal <-> anomaly; so after the second change the data is normal");
System.out.println("seed = " + seed);
NormalMixtureTestData normalMixtureTestData = new NormalMixtureTestData(10, 1.0, 50, 2.0, 0.01, 0.1);
MultiDimDataWithKey dataWithKeys = normalMixtureTestData.generateTestDataWithKey(dataSize, 1, 0);
int keyCounter = 0;
for (double[] point : dataWithKeys.data) {
AnomalyDescriptor result = forest.process(point, count);
if (keyCounter < dataWithKeys.changeIndices.length && result.getInternalTimeStamp() == dataWithKeys.changeIndices[keyCounter]) {
System.out.println("timestamp " + (result.getInputTimestamp()) + " CHANGE");
++keyCounter;
}
if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
System.out.println("timestamp " + (count) + " CHANGE ");
++keyCounter;
}
if (result.getAnomalyGrade() != 0) {
System.out.print("timestamp " + (count) + " RESULT value ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getCurrentInput()[i] + ", ");
}
System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
if (result.isExpectedValuesPresent()) {
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
System.out.print(-result.getRelativeIndex() + " steps ago, instead of ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getPastValues()[i] + ", ");
}
System.out.print("expected ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
if (result.getPastValues()[i] != result.getExpectedValuesList()[0][i]) {
System.out.print("( " + (result.getPastValues()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
}
}
} else {
System.out.print("expected ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
if (result.getCurrentInput()[i] != result.getExpectedValuesList()[0][i]) {
System.out.print("( " + (result.getCurrentInput()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
}
}
}
}
System.out.println();
}
++count;
}
}
use of com.amazon.randomcutforest.testutils.NormalMixtureTestData in project random-cut-forest-by-aws by aws.
the class ObjectStreamExample method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 10;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_32;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
int dataSize = 1000 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
forest.update(point);
}
// Convert to an array of bytes and print the size
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
byte[] bytes = serialize(mapper.toState(forest));
System.out.printf("Object output stream size = %d bytes%n", bytes.length);
// Restore from object stream and compare anomaly scores produced by the two
// forests
RandomCutForestState state2 = (RandomCutForestState) deserialize(bytes);
RandomCutForest forest2 = mapper.toModel(state2);
int testSize = 100;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// first validate that this was a nontrivial test
if (anomalies == 0) {
throw new IllegalStateException("test data did not produce any anomalies");
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
Aggregations