use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class JsonExample method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_64;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
int dataSize = 4 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
forest.update(point);
}
// Convert to JSON and print the number of bytes
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
ObjectMapper jsonMapper = new ObjectMapper();
String json = jsonMapper.writeValueAsString(mapper.toState(forest));
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("JSON size = %d bytes%n", json.getBytes().length);
// Restore from JSON and compare anomaly scores produced by the two forests
RandomCutForest forest2 = mapper.toModel(jsonMapper.readValue(json, RandomCutForestState.class));
int testSize = 100;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// first validate that this was a nontrivial test
if (anomalies == 0) {
throw new IllegalStateException("test data did not produce any anomalies");
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class ProtostuffExample method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 10;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_32;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
int dataSize = 1000 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
forest.update(point);
}
// Convert to an array of bytes and print the size
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
LinkedBuffer buffer = LinkedBuffer.allocate(512);
byte[] bytes;
try {
RandomCutForestState state = mapper.toState(forest);
bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
} finally {
buffer.clear();
}
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("protostuff size = %d bytes%n", bytes.length);
// Restore from protostuff and compare anomaly scores produced by the two
// forests
RandomCutForestState state2 = schema.newMessage();
ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
RandomCutForest forest2 = mapper.toModel(state2);
int testSize = 100;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// first validate that this was a nontrivial test
if (anomalies == 0) {
throw new IllegalStateException("test data did not produce any anomalies");
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class ProtostuffExampleWithShingles method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 10;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_64;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).shingleSize(dimensions).build();
int count = 1;
int dataSize = 1000 * sampleSize;
for (double[] point : generateShingledData(dataSize, dimensions, 0)) {
forest.update(point);
}
// Convert to an array of bytes and print the size
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
mapper.setSaveTreeStateEnabled(false);
Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
LinkedBuffer buffer = LinkedBuffer.allocate(512);
byte[] bytes;
try {
RandomCutForestState state = mapper.toState(forest);
bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
} finally {
buffer.clear();
}
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("protostuff size = %d bytes%n", bytes.length);
// Restore from protostuff and compare anomaly scores produced by the two
// forests
RandomCutForestState state2 = schema.newMessage();
ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
RandomCutForest forest2 = mapper.toModel(state2);
int testSize = 10000;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : generateShingledData(testSize, dimensions, 2)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class DynamicThroughput method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_64;
int dataSize = 10 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
// generate data once to eliminate caching issues
testData.generateTestData(dataSize, dimensions);
testData.generateTestData(sampleSize, dimensions);
for (int i = 0; i < 5; i++) {
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
RandomCutForest forest2 = RandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
forest2.setBoundingBoxCacheFraction(i * 0.25);
int anomalies = 0;
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
if (Math.abs(score - score2) > 1e-10) {
anomalies++;
}
forest.update(point);
forest2.update(point);
}
Instant start = Instant.now();
for (double[] point : testData.generateTestData(sampleSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
if (Math.abs(score - score2) > 1e-10) {
anomalies++;
}
forest.update(point);
forest2.update(point);
}
Instant finish = Instant.now();
// first validate that this was a nontrivial test
if (anomalies > 0) {
throw new IllegalStateException("score mismatch");
}
System.out.println("So far so good! Caching fraction = " + (i * 0.25) + ", Time =" + Duration.between(start, finish).toMillis() + " ms (note only one forest is changing)");
}
}
use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class BoxCacheTest method testChangingBoundingBoxFloat32.
@Test
public void testChangingBoundingBoxFloat32() {
int dimensions = 4;
int numberOfTrees = 1;
int sampleSize = 64;
int dataSize = 1000 * sampleSize;
Random random = new Random();
long seed = random.nextLong();
double[][] big = RandomCutForestTest.generateShingledData(dataSize, dimensions, 2);
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(Precision.FLOAT_32).randomSeed(seed).boundingBoxCacheFraction(0).build();
RandomCutForest otherForest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(Precision.FLOAT_32).randomSeed(seed).boundingBoxCacheFraction(1).build();
int num = 0;
for (double[] point : big) {
++num;
if (num % sampleSize == 0) {
forest.setBoundingBoxCacheFraction(random.nextDouble());
}
assertEquals(forest.getAnomalyScore(point), otherForest.getAnomalyScore(point));
forest.update(point);
otherForest.update(point);
}
}
Aggregations