use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class ConsistencyTest method InternalShinglingTest.
@Test
public void InternalShinglingTest() {
int sampleSize = 256;
int baseDimensions = 2;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
// just once since testing exact equality
int numTrials = 1;
int length = 400 * sampleSize;
for (int i = 0; i < numTrials; i++) {
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(true).shingleSize(shingleSize).randomSeed(seed).build();
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).build();
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(length, 50, 100, 5, seed + i, baseDimensions);
for (double[] point : dataWithKeys.data) {
AnomalyDescriptor firstResult = first.process(point, 0L);
assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-10);
forest.update(point);
}
}
}
use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testConversions.
@Test
public void testConversions() {
int dimensions = 10;
for (int trials = 0; trials < 10; trials++) {
long seed = new Random().nextLong();
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(false).randomSeed(seed).build();
// note shingleSize == 1
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).anomalyRate(0.01).build();
Random r = new Random();
for (int i = 0; i < new Random().nextInt(1000); i++) {
double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
first.process(point, 0L);
forest.update(point);
}
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
mapper.setSaveTreeStateEnabled(true);
mapper.setPartialTreeStateEnabled(true);
RandomCutForest copyForest = mapper.toModel(mapper.toState(forest));
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest(copyForest, 0.01, null);
//
for (int i = 0; i < new Random().nextInt(1000); i++) {
double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-10);
forest.update(point);
}
// serialize + deserialize
ThresholdedRandomCutForestMapper newMapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = newMapper.toModel(newMapper.toState(second));
// update re-instantiated forest
for (int i = 0; i < 100; i++) {
double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
AnomalyDescriptor thirdResult = third.process(point, 0L);
double score = forest.getAnomalyScore(point);
assertEquals(score, firstResult.getRCFScore(), 1e-10);
assertEquals(score, secondResult.getRCFScore(), 1e-10);
assertEquals(score, thirdResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getDataConfidence(), thirdResult.getDataConfidence(), 1e-10);
forest.update(point);
}
}
}
use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testRoundTripStandardShingleSizeOne.
@Test
public void testRoundTripStandardShingleSizeOne() {
int dimensions = 10;
for (int trials = 0; trials < 1; trials++) {
long seed = new Random().nextLong();
RandomCutForest.Builder<?> builder = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed);
// note shingleSize == 1
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).anomalyRate(0.01).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).anomalyRate(0.01).forestMode(ForestMode.STANDARD).internalShinglingEnabled(false).build();
RandomCutForest forest = builder.build();
Random r = new Random();
for (int i = 0; i < 2000 + new Random().nextInt(1000); i++) {
double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
assertEquals(firstResult.getDataConfidence(), secondResult.getDataConfidence(), 1e-10);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-10);
forest.update(point);
}
// serialize + deserialize
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
// update re-instantiated forest
for (int i = 0; i < 100; i++) {
double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
AnomalyDescriptor thirdResult = third.process(point, 0L);
double score = forest.getAnomalyScore(point);
assertEquals(score, firstResult.getRCFScore(), 1e-10);
assertEquals(score, secondResult.getRCFScore(), 1e-10);
assertEquals(score, thirdResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getDataConfidence(), secondResult.getDataConfidence(), 1e-10);
forest.update(point);
}
}
}
use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class ProtostuffExampleWithDynamicLambda method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_64;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
int dataSize = 4 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
forest.update(point);
}
// Convert to an array of bytes and print the size
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
LinkedBuffer buffer = LinkedBuffer.allocate(512);
byte[] bytes;
try {
RandomCutForestState state = mapper.toState(forest);
bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
} finally {
buffer.clear();
}
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("protostuff size = %d bytes%n", bytes.length);
// Restore from protostuff and compare anomaly scores produced by the two
// forests
RandomCutForestState state2 = schema.newMessage();
ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
RandomCutForest forest2 = mapper.toModel(state2);
double saveLambda = forest.getTimeDecay();
forest.setTimeDecay(10 * forest.getTimeDecay());
forest2.setTimeDecay(10 * forest2.getTimeDecay());
for (int i = 0; i < numberOfTrees; i++) {
CompactSampler sampler = (CompactSampler) ((SamplerPlusTree) forest.getComponents().get(i)).getSampler();
CompactSampler sampler2 = (CompactSampler) ((SamplerPlusTree) forest2.getComponents().get(i)).getSampler();
if (sampler.getMaxSequenceIndex() != sampler2.getMaxSequenceIndex()) {
throw new IllegalStateException("Incorrect sampler state");
}
if (sampler.getMostRecentTimeDecayUpdate() != sampler2.getMostRecentTimeDecayUpdate()) {
throw new IllegalStateException("Incorrect sampler state");
}
if (sampler2.getMostRecentTimeDecayUpdate() != dataSize - 1) {
throw new IllegalStateException("Incorrect sampler state");
}
}
int testSize = 100;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// first validate that this was a nontrivial test
if (anomalies == 0) {
throw new IllegalStateException("test data did not produce any anomalies");
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
use of com.amazon.randomcutforest.RandomCutForest in project random-cut-forest-by-aws by aws.
the class DynamicDensity method run.
/**
* plot the dynamic_density_example using any tool in gnuplot one can plot the
* directions to higher density via do for [i=0:358:2] {plot
* "dynamic_density_example" index (i+1) u 1:2:3:4 w vectors t ""} or the raw
* density at the points via do for [i=0:358:2] {plot "dynamic_density_example"
* index i w p pt 7 palette t ""}
*
* @throws Exception
*/
@Override
public void run() throws Exception {
int newDimensions = 2;
long randomSeed = 123;
RandomCutForest newForest = RandomCutForest.builder().numberOfTrees(100).sampleSize(256).dimensions(newDimensions).randomSeed(randomSeed).timeDecay(1.0 / 800).centerOfMassEnabled(true).build();
String name = "dynamic_density_example";
BufferedWriter file = new BufferedWriter(new FileWriter(name));
double[][] data = generate(1000);
double[] queryPoint;
for (int degree = 0; degree < 360; degree += 2) {
for (double[] datum : data) {
newForest.update(rotateClockWise(datum, -2 * PI * degree / 360));
}
for (double[] datum : data) {
queryPoint = rotateClockWise(datum, -2 * PI * degree / 360);
DensityOutput density = newForest.getSimpleDensity(queryPoint);
double value = density.getDensity(0.001, 2);
file.append(queryPoint[0] + " " + queryPoint[1] + " " + value + "\n");
}
file.append("\n");
file.append("\n");
for (double x = -0.95; x < 1; x += 0.1) {
for (double y = -0.95; y < 1; y += 0.1) {
DensityOutput density = newForest.getSimpleDensity(new double[] { x, y });
double aboveInY = density.getDirectionalDensity(0.001, 2).low[1];
double belowInY = density.getDirectionalDensity(0.001, 2).high[1];
double toTheLeft = density.getDirectionalDensity(0.001, 2).high[0];
double toTheRight = density.getDirectionalDensity(0.001, 2).low[0];
double len = Math.sqrt(aboveInY * aboveInY + belowInY * belowInY + toTheLeft * toTheLeft + toTheRight * toTheRight);
file.append(x + " " + y + " " + ((toTheRight - toTheLeft) * 0.05 / len) + " " + ((aboveInY - belowInY) * 0.05 / len) + "\n");
}
}
file.append("\n");
file.append("\n");
}
file.close();
}
Aggregations