use of com.amazon.randomcutforest.state.RandomCutForestState in project random-cut-forest-by-aws by aws.
the class ProtostuffExampleWithDynamicLambda method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_64;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
int dataSize = 4 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
forest.update(point);
}
// Convert to an array of bytes and print the size
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
LinkedBuffer buffer = LinkedBuffer.allocate(512);
byte[] bytes;
try {
RandomCutForestState state = mapper.toState(forest);
bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
} finally {
buffer.clear();
}
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("protostuff size = %d bytes%n", bytes.length);
// Restore from protostuff and compare anomaly scores produced by the two
// forests
RandomCutForestState state2 = schema.newMessage();
ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
RandomCutForest forest2 = mapper.toModel(state2);
double saveLambda = forest.getTimeDecay();
forest.setTimeDecay(10 * forest.getTimeDecay());
forest2.setTimeDecay(10 * forest2.getTimeDecay());
for (int i = 0; i < numberOfTrees; i++) {
CompactSampler sampler = (CompactSampler) ((SamplerPlusTree) forest.getComponents().get(i)).getSampler();
CompactSampler sampler2 = (CompactSampler) ((SamplerPlusTree) forest2.getComponents().get(i)).getSampler();
if (sampler.getMaxSequenceIndex() != sampler2.getMaxSequenceIndex()) {
throw new IllegalStateException("Incorrect sampler state");
}
if (sampler.getMostRecentTimeDecayUpdate() != sampler2.getMostRecentTimeDecayUpdate()) {
throw new IllegalStateException("Incorrect sampler state");
}
if (sampler2.getMostRecentTimeDecayUpdate() != dataSize - 1) {
throw new IllegalStateException("Incorrect sampler state");
}
}
int testSize = 100;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// first validate that this was a nontrivial test
if (anomalies == 0) {
throw new IllegalStateException("test data did not produce any anomalies");
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
use of com.amazon.randomcutforest.state.RandomCutForestState in project random-cut-forest-by-aws by aws.
the class ProtostuffExample method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 10;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_32;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
int dataSize = 1000 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
forest.update(point);
}
// Convert to an array of bytes and print the size
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
LinkedBuffer buffer = LinkedBuffer.allocate(512);
byte[] bytes;
try {
RandomCutForestState state = mapper.toState(forest);
bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
} finally {
buffer.clear();
}
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("protostuff size = %d bytes%n", bytes.length);
// Restore from protostuff and compare anomaly scores produced by the two
// forests
RandomCutForestState state2 = schema.newMessage();
ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
RandomCutForest forest2 = mapper.toModel(state2);
int testSize = 100;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// first validate that this was a nontrivial test
if (anomalies == 0) {
throw new IllegalStateException("test data did not produce any anomalies");
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
use of com.amazon.randomcutforest.state.RandomCutForestState in project random-cut-forest-by-aws by aws.
the class ProtostuffExampleWithShingles method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 10;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_64;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).shingleSize(dimensions).build();
int count = 1;
int dataSize = 1000 * sampleSize;
for (double[] point : generateShingledData(dataSize, dimensions, 0)) {
forest.update(point);
}
// Convert to an array of bytes and print the size
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
mapper.setSaveTreeStateEnabled(false);
Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
LinkedBuffer buffer = LinkedBuffer.allocate(512);
byte[] bytes;
try {
RandomCutForestState state = mapper.toState(forest);
bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
} finally {
buffer.clear();
}
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("protostuff size = %d bytes%n", bytes.length);
// Restore from protostuff and compare anomaly scores produced by the two
// forests
RandomCutForestState state2 = schema.newMessage();
ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
RandomCutForest forest2 = mapper.toModel(state2);
int testSize = 10000;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : generateShingledData(testSize, dimensions, 2)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
use of com.amazon.randomcutforest.state.RandomCutForestState in project random-cut-forest-by-aws by aws.
the class RandomCutForestTest method testUpdateAfterRoundTripLargeNodeStore.
@Test
public void testUpdateAfterRoundTripLargeNodeStore() {
int dimensions = 5;
for (int trials = 0; trials < 10; trials++) {
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(1).sampleSize(20000).precision(Precision.FLOAT_32).build();
Random r = new Random();
for (int i = 0; i < 30000 + new Random().nextInt(300); i++) {
forest.update(r.ints(dimensions, 0, 50).asDoubleStream().toArray());
}
// serialize + deserialize
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveTreeStateEnabled(true);
mapper.setSaveExecutorContextEnabled(true);
RandomCutForestState state = mapper.toState(forest);
RandomCutForest forest2 = mapper.toModel(state);
// update re-instantiated forest
for (int i = 0; i < 10000; i++) {
double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
double score = forest.getAnomalyScore(point);
assertEquals(score, forest2.getAnomalyScore(point), 1E-10);
forest2.update(point);
forest.update(point);
}
}
}
use of com.amazon.randomcutforest.state.RandomCutForestState in project random-cut-forest-by-aws by aws.
the class StateMapperShingledBenchmark method roundTripFromState.
@Benchmark
@OperationsPerInvocation(NUM_TEST_SAMPLES)
public RandomCutForestState roundTripFromState(BenchmarkState state, Blackhole blackhole) {
RandomCutForestState forestState = state.forestState;
double[][] testData = state.testData;
for (int i = 0; i < NUM_TEST_SAMPLES; i++) {
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
mapper.setSaveTreeStateEnabled(state.saveTreeState);
RandomCutForest forest = mapper.toModel(forestState);
double score = forest.getAnomalyScore(testData[i]);
blackhole.consume(score);
forest.update(testData[i]);
forestState = mapper.toState(forest);
}
return forestState;
}
Aggregations