use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.
the class ConsistencyTest method TimeAugmentedTest.
@ParameterizedTest
@EnumSource(TransformMethod.class)
public void TimeAugmentedTest(TransformMethod transformMethod) {
int sampleSize = 256;
int baseDimensions = 1;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
// test is exact equality, reducing the number of trials
int numTrials = 1;
// and using fewer trees to speed up test
int numberOfTrees = 30;
int length = 10 * sampleSize;
int dataSize = 2 * length;
for (int i = 0; i < numTrials; i++) {
Precision precision = Precision.FLOAT_32;
long seed = new Random().nextLong();
System.out.println("seed = " + seed);
// TransformMethod transformMethod = TransformMethod.NONE;
ThresholdedRandomCutForest first = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.TIME_AUGMENTED).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
// ensuring that the parameters are the same; otherwise the grades/scores cannot
// be the same
// weighTime has to be 0 in the above
first.setLowerThreshold(1.1);
second.setLowerThreshold(1.1);
first.setHorizon(0.75);
second.setHorizon(0.75);
Random noise = new Random(0);
// change the last argument seed for a different run
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50, 100, 5, seed, baseDimensions);
int count = 0;
for (int j = 0; j < length; j++) {
long timestamp = 100 * count + noise.nextInt(10) - 5;
AnomalyDescriptor result = first.process(dataWithKeys.data[j], timestamp);
AnomalyDescriptor test = second.process(dataWithKeys.data[j], timestamp);
checkArgument(Math.abs(result.getRCFScore() - test.getRCFScore()) < 1e-10, " error");
checkArgument(Math.abs(result.getAnomalyGrade() - test.getAnomalyGrade()) < 1e-10, " error");
++count;
}
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
for (int j = length; j < 2 * length; j++) {
// can be a different gap
long timestamp = 150 * count + noise.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(dataWithKeys.data[count], timestamp);
AnomalyDescriptor secondResult = second.process(dataWithKeys.data[count], timestamp);
AnomalyDescriptor thirdResult = third.process(dataWithKeys.data[count], timestamp);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getAnomalyGrade(), secondResult.getAnomalyGrade(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getAnomalyGrade(), thirdResult.getAnomalyGrade(), 1e-10);
}
}
}
use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.
the class ThresholdedTime method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int shingleSize = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_32;
int dataSize = 4 * sampleSize;
// change this to try different number of attributes,
// this parameter is not expected to be larger than 5 for this example
int baseDimensions = 1;
int count = 0;
int dimensions = baseDimensions * shingleSize;
ThresholdedRandomCutForest forest = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.TIME_AUGMENTED).normalizeTime(true).build();
long seed = new Random().nextLong();
double[] data = new double[] { 1.0 };
System.out.println("seed = " + seed);
NormalMixtureTestData normalMixtureTestData = new NormalMixtureTestData(10, 50);
MultiDimDataWithKey dataWithKeys = normalMixtureTestData.generateTestDataWithKey(dataSize, 1, 0);
/**
* the anomalies will move from normal -> anomalous -> normal starts from normal
*/
boolean anomalyState = false;
int keyCounter = 0;
for (double[] point : dataWithKeys.data) {
long time = (long) (1000L * count + Math.floor(10 * point[0]));
AnomalyDescriptor result = forest.process(data, time);
if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
System.out.print("Sequence " + count + " stamp " + (result.getInternalTimeStamp()) + " CHANGE ");
if (!anomalyState) {
System.out.println(" to Distribution 1 ");
} else {
System.out.println(" to Distribution 0 ");
}
anomalyState = !anomalyState;
++keyCounter;
}
if (result.getAnomalyGrade() != 0) {
System.out.print("Sequence " + count + " stamp " + (result.getInternalTimeStamp()) + " RESULT ");
System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
if (result.isExpectedValuesPresent()) {
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
System.out.print(-result.getRelativeIndex() + " steps ago, instead of stamp " + result.getPastTimeStamp());
System.out.print(", expected timestamp " + result.getExpectedTimeStamp() + " ( " + (result.getPastTimeStamp() - result.getExpectedTimeStamp() + ")"));
} else {
System.out.print("expected " + result.getExpectedTimeStamp() + " ( " + (result.getInternalTimeStamp() - result.getExpectedTimeStamp() + ")"));
}
}
System.out.println();
}
++count;
}
}
use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.
the class ProtostuffExampleWithDynamicLambda method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_64;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
int dataSize = 4 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
forest.update(point);
}
// Convert to an array of bytes and print the size
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
LinkedBuffer buffer = LinkedBuffer.allocate(512);
byte[] bytes;
try {
RandomCutForestState state = mapper.toState(forest);
bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
} finally {
buffer.clear();
}
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("protostuff size = %d bytes%n", bytes.length);
// Restore from protostuff and compare anomaly scores produced by the two
// forests
RandomCutForestState state2 = schema.newMessage();
ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
RandomCutForest forest2 = mapper.toModel(state2);
double saveLambda = forest.getTimeDecay();
forest.setTimeDecay(10 * forest.getTimeDecay());
forest2.setTimeDecay(10 * forest2.getTimeDecay());
for (int i = 0; i < numberOfTrees; i++) {
CompactSampler sampler = (CompactSampler) ((SamplerPlusTree) forest.getComponents().get(i)).getSampler();
CompactSampler sampler2 = (CompactSampler) ((SamplerPlusTree) forest2.getComponents().get(i)).getSampler();
if (sampler.getMaxSequenceIndex() != sampler2.getMaxSequenceIndex()) {
throw new IllegalStateException("Incorrect sampler state");
}
if (sampler.getMostRecentTimeDecayUpdate() != sampler2.getMostRecentTimeDecayUpdate()) {
throw new IllegalStateException("Incorrect sampler state");
}
if (sampler2.getMostRecentTimeDecayUpdate() != dataSize - 1) {
throw new IllegalStateException("Incorrect sampler state");
}
}
int testSize = 100;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// first validate that this was a nontrivial test
if (anomalies == 0) {
throw new IllegalStateException("test data did not produce any anomalies");
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.
the class JsonExample method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_64;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
int dataSize = 4 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
forest.update(point);
}
// Convert to JSON and print the number of bytes
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
ObjectMapper jsonMapper = new ObjectMapper();
String json = jsonMapper.writeValueAsString(mapper.toState(forest));
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("JSON size = %d bytes%n", json.getBytes().length);
// Restore from JSON and compare anomaly scores produced by the two forests
RandomCutForest forest2 = mapper.toModel(jsonMapper.readValue(json, RandomCutForestState.class));
int testSize = 100;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// first validate that this was a nontrivial test
if (anomalies == 0) {
throw new IllegalStateException("test data did not produce any anomalies");
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.
the class ProtostuffExample method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 10;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_32;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
int dataSize = 1000 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
forest.update(point);
}
// Convert to an array of bytes and print the size
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
LinkedBuffer buffer = LinkedBuffer.allocate(512);
byte[] bytes;
try {
RandomCutForestState state = mapper.toState(forest);
bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
} finally {
buffer.clear();
}
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("protostuff size = %d bytes%n", bytes.length);
// Restore from protostuff and compare anomaly scores produced by the two
// forests
RandomCutForestState state2 = schema.newMessage();
ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
RandomCutForest forest2 = mapper.toModel(state2);
int testSize = 100;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// first validate that this was a nontrivial test
if (anomalies == 0) {
throw new IllegalStateException("test data did not produce any anomalies");
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
Aggregations