use of com.amazon.randomcutforest.state.RandomCutForestMapper in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapper method toState.
@Override
public ThresholdedRandomCutForestState toState(ThresholdedRandomCutForest model) {
ThresholdedRandomCutForestState state = new ThresholdedRandomCutForestState();
RandomCutForestMapper randomCutForestMapper = new RandomCutForestMapper();
randomCutForestMapper.setPartialTreeStateEnabled(true);
randomCutForestMapper.setSaveTreeStateEnabled(true);
randomCutForestMapper.setCompressionEnabled(true);
randomCutForestMapper.setSaveCoordinatorStateEnabled(true);
randomCutForestMapper.setSaveExecutorContextEnabled(true);
state.setForestState(randomCutForestMapper.toState(model.getForest()));
BasicThresholderMapper thresholderMapper = new BasicThresholderMapper();
state.setThresholderState(thresholderMapper.toState(model.getThresholder()));
PreprocessorMapper preprocessorMapper = new PreprocessorMapper();
state.setPreprocessorStates(new PreprocessorState[] { preprocessorMapper.toState((Preprocessor) model.getPreprocessor()) });
state.setTriggerFactor(model.getPredictorCorrector().getTriggerFactor());
state.setIgnoreSimilar(model.getPredictorCorrector().isIgnoreSimilar());
state.setIgnoreSimilarFactor(model.getPredictorCorrector().getIgnoreSimilarFactor());
state.setNumberOfAttributors(model.getPredictorCorrector().getNumberOfAttributors());
state.setForestMode(model.getForestMode().name());
state.setTransformMethod(model.getTransformMethod().name());
IRCFComputeDescriptor descriptor = model.getLastAnomalyDescriptor();
state.setLastAnomalyTimeStamp(descriptor.getInternalTimeStamp());
state.setLastAnomalyScore(descriptor.getRCFScore());
state.setLastAnomalyAttribution(new DiVectorMapper().toState(descriptor.getAttribution()));
state.setLastAnomalyPoint(descriptor.getRCFPoint());
state.setLastExpectedPoint(descriptor.getExpectedRCFPoint());
state.setLastRelativeIndex(descriptor.getRelativeIndex());
return state;
}
use of com.amazon.randomcutforest.state.RandomCutForestMapper in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testConversions.
@Test
public void testConversions() {
int dimensions = 10;
for (int trials = 0; trials < 10; trials++) {
long seed = new Random().nextLong();
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).internalShinglingEnabled(false).randomSeed(seed).build();
// note shingleSize == 1
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(false).anomalyRate(0.01).build();
Random r = new Random();
for (int i = 0; i < new Random().nextInt(1000); i++) {
double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
first.process(point, 0L);
forest.update(point);
}
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
mapper.setSaveTreeStateEnabled(true);
mapper.setPartialTreeStateEnabled(true);
RandomCutForest copyForest = mapper.toModel(mapper.toState(forest));
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest(copyForest, 0.01, null);
//
for (int i = 0; i < new Random().nextInt(1000); i++) {
double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), forest.getAnomalyScore(point), 1e-10);
forest.update(point);
}
// serialize + deserialize
ThresholdedRandomCutForestMapper newMapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = newMapper.toModel(newMapper.toState(second));
// update re-instantiated forest
for (int i = 0; i < 100; i++) {
double[] point = r.ints(dimensions, 0, 50).asDoubleStream().toArray();
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
AnomalyDescriptor thirdResult = third.process(point, 0L);
double score = forest.getAnomalyScore(point);
assertEquals(score, firstResult.getRCFScore(), 1e-10);
assertEquals(score, secondResult.getRCFScore(), 1e-10);
assertEquals(score, thirdResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getDataConfidence(), thirdResult.getDataConfidence(), 1e-10);
forest.update(point);
}
}
}
use of com.amazon.randomcutforest.state.RandomCutForestMapper in project random-cut-forest-by-aws by aws.
the class ProtostuffExampleWithDynamicLambda method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_64;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
int dataSize = 4 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
forest.update(point);
}
// Convert to an array of bytes and print the size
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
LinkedBuffer buffer = LinkedBuffer.allocate(512);
byte[] bytes;
try {
RandomCutForestState state = mapper.toState(forest);
bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
} finally {
buffer.clear();
}
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("protostuff size = %d bytes%n", bytes.length);
// Restore from protostuff and compare anomaly scores produced by the two
// forests
RandomCutForestState state2 = schema.newMessage();
ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
RandomCutForest forest2 = mapper.toModel(state2);
double saveLambda = forest.getTimeDecay();
forest.setTimeDecay(10 * forest.getTimeDecay());
forest2.setTimeDecay(10 * forest2.getTimeDecay());
for (int i = 0; i < numberOfTrees; i++) {
CompactSampler sampler = (CompactSampler) ((SamplerPlusTree) forest.getComponents().get(i)).getSampler();
CompactSampler sampler2 = (CompactSampler) ((SamplerPlusTree) forest2.getComponents().get(i)).getSampler();
if (sampler.getMaxSequenceIndex() != sampler2.getMaxSequenceIndex()) {
throw new IllegalStateException("Incorrect sampler state");
}
if (sampler.getMostRecentTimeDecayUpdate() != sampler2.getMostRecentTimeDecayUpdate()) {
throw new IllegalStateException("Incorrect sampler state");
}
if (sampler2.getMostRecentTimeDecayUpdate() != dataSize - 1) {
throw new IllegalStateException("Incorrect sampler state");
}
}
int testSize = 100;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// first validate that this was a nontrivial test
if (anomalies == 0) {
throw new IllegalStateException("test data did not produce any anomalies");
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
use of com.amazon.randomcutforest.state.RandomCutForestMapper in project random-cut-forest-by-aws by aws.
the class JsonExample method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_64;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
int dataSize = 4 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
forest.update(point);
}
// Convert to JSON and print the number of bytes
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
ObjectMapper jsonMapper = new ObjectMapper();
String json = jsonMapper.writeValueAsString(mapper.toState(forest));
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("JSON size = %d bytes%n", json.getBytes().length);
// Restore from JSON and compare anomaly scores produced by the two forests
RandomCutForest forest2 = mapper.toModel(jsonMapper.readValue(json, RandomCutForestState.class));
int testSize = 100;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// first validate that this was a nontrivial test
if (anomalies == 0) {
throw new IllegalStateException("test data did not produce any anomalies");
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
use of com.amazon.randomcutforest.state.RandomCutForestMapper in project random-cut-forest-by-aws by aws.
the class ProtostuffExample method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 10;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_32;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
int dataSize = 1000 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
forest.update(point);
}
// Convert to an array of bytes and print the size
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
LinkedBuffer buffer = LinkedBuffer.allocate(512);
byte[] bytes;
try {
RandomCutForestState state = mapper.toState(forest);
bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
} finally {
buffer.clear();
}
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("protostuff size = %d bytes%n", bytes.length);
// Restore from protostuff and compare anomaly scores produced by the two
// forests
RandomCutForestState state2 = schema.newMessage();
ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
RandomCutForest forest2 = mapper.toModel(state2);
int testSize = 100;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : testData.generateTestData(testSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// first validate that this was a nontrivial test
if (anomalies == 0) {
throw new IllegalStateException("test data did not produce any anomalies");
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
Aggregations