use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.
the class ProtostuffExampleWithShingles method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 10;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_64;
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).shingleSize(dimensions).build();
int count = 1;
int dataSize = 1000 * sampleSize;
for (double[] point : generateShingledData(dataSize, dimensions, 0)) {
forest.update(point);
}
// Convert to an array of bytes and print the size
RandomCutForestMapper mapper = new RandomCutForestMapper();
mapper.setSaveExecutorContextEnabled(true);
mapper.setSaveTreeStateEnabled(false);
Schema<RandomCutForestState> schema = RuntimeSchema.getSchema(RandomCutForestState.class);
LinkedBuffer buffer = LinkedBuffer.allocate(512);
byte[] bytes;
try {
RandomCutForestState state = mapper.toState(forest);
bytes = ProtostuffIOUtil.toByteArray(state, schema, buffer);
} finally {
buffer.clear();
}
System.out.printf("dimensions = %d, numberOfTrees = %d, sampleSize = %d, precision = %s%n", dimensions, numberOfTrees, sampleSize, precision);
System.out.printf("protostuff size = %d bytes%n", bytes.length);
// Restore from protostuff and compare anomaly scores produced by the two
// forests
RandomCutForestState state2 = schema.newMessage();
ProtostuffIOUtil.mergeFrom(bytes, state2, schema);
RandomCutForest forest2 = mapper.toModel(state2);
int testSize = 10000;
double delta = Math.log(sampleSize) / Math.log(2) * 0.05;
int differences = 0;
int anomalies = 0;
for (double[] point : generateShingledData(testSize, dimensions, 2)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
// also scored as an anomaly by the other forest
if (score > 1 || score2 > 1) {
anomalies++;
if (Math.abs(score - score2) > delta) {
differences++;
}
}
forest.update(point);
forest2.update(point);
}
// validate that the two forests agree on anomaly scores
if (differences >= 0.01 * testSize) {
throw new IllegalStateException("restored forest does not agree with original forest");
}
System.out.println("Looks good!");
}
use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.
the class DynamicThroughput method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int dimensions = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_64;
int dataSize = 10 * sampleSize;
NormalMixtureTestData testData = new NormalMixtureTestData();
// generate data once to eliminate caching issues
testData.generateTestData(dataSize, dimensions);
testData.generateTestData(sampleSize, dimensions);
for (int i = 0; i < 5; i++) {
RandomCutForest forest = RandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
RandomCutForest forest2 = RandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).sampleSize(sampleSize).precision(precision).build();
forest2.setBoundingBoxCacheFraction(i * 0.25);
int anomalies = 0;
for (double[] point : testData.generateTestData(dataSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
if (Math.abs(score - score2) > 1e-10) {
anomalies++;
}
forest.update(point);
forest2.update(point);
}
Instant start = Instant.now();
for (double[] point : testData.generateTestData(sampleSize, dimensions)) {
double score = forest.getAnomalyScore(point);
double score2 = forest2.getAnomalyScore(point);
if (Math.abs(score - score2) > 1e-10) {
anomalies++;
}
forest.update(point);
forest2.update(point);
}
Instant finish = Instant.now();
// first validate that this was a nontrivial test
if (anomalies > 0) {
throw new IllegalStateException("score mismatch");
}
System.out.println("So far so good! Caching fraction = " + (i * 0.25) + ", Time =" + Duration.between(start, finish).toMillis() + " ms (note only one forest is changing)");
}
}
use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.
the class ThresholdedInternalShinglingExample method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int shingleSize = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_32;
int dataSize = 4 * sampleSize;
// change this to try different number of attributes,
// this parameter is not expected to be larger than 5 for this example
int baseDimensions = 1;
long count = 0;
int dimensions = baseDimensions * shingleSize;
TransformMethod transformMethod = TransformMethod.NORMALIZE_DIFFERENCE;
ThresholdedRandomCutForest forest = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.TIME_AUGMENTED).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
// ensuring that the parameters are the same; otherwise the grades/scores cannot
// be the same
// weighTime has to be 0
forest.setLowerThreshold(1.1);
second.setLowerThreshold(1.1);
forest.setHorizon(0.75);
second.setHorizon(0.75);
long seed = new Random().nextLong();
Random noise = new Random(0);
System.out.println("seed = " + seed);
// change the last argument seed for a different run
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50, 100, 5, seed, baseDimensions);
int keyCounter = 0;
for (double[] point : dataWithKeys.data) {
// idea is that we expect the arrival order to be roughly 100 apart (say
// seconds)
// then the noise corresponds to a jitter; one can try TIME_AUGMENTED and
// .normalizeTime(true)
long timestamp = 100 * count + noise.nextInt(10) - 5;
AnomalyDescriptor result = forest.process(point, timestamp);
AnomalyDescriptor test = second.process(point, timestamp);
checkArgument(Math.abs(result.getRCFScore() - test.getRCFScore()) < 1e-10, " error");
checkArgument(Math.abs(result.getAnomalyGrade() - test.getAnomalyGrade()) < 1e-10, " error");
if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
System.out.println("timestamp " + count + " CHANGE " + Arrays.toString(dataWithKeys.changes[keyCounter]));
++keyCounter;
}
if (result.getAnomalyGrade() != 0) {
System.out.print("timestamp " + count + " RESULT value " + result.getInternalTimeStamp() + " ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getCurrentInput()[i] + ", ");
}
System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
System.out.print(-result.getRelativeIndex() + " steps ago, ");
}
if (result.isExpectedValuesPresent()) {
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
System.out.print("instead of ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getPastValues()[i] + ", ");
}
System.out.print("expected ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
if (result.getPastValues()[i] != result.getExpectedValuesList()[0][i]) {
System.out.print("( " + (result.getPastValues()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
}
}
} else {
System.out.print("expected ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
if (result.getCurrentInput()[i] != result.getExpectedValuesList()[0][i]) {
System.out.print("( " + (result.getCurrentInput()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
}
}
}
} else {
System.out.print("insufficient data to provide expected values");
}
System.out.println();
}
++count;
}
}
use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.
the class V1JsonToV3StateConverter method convert.
/**
* the function merges a collection of RCF-1.0 models with same model parameters
* and fixes the number of trees in the new model (which has to be less or equal
* than the sum of the old models) The conversion uses the execution context of
* the first forest and can be adjusted subsequently by setters
*
* @param serializedForests A non-empty list of forests (together having more
* trees than numberOfTrees)
* @param numberOfTrees the new number of trees
* @param precision the precision of the new forest
* @return a merged RCF with the first numberOfTrees trees
*/
public RandomCutForestState convert(List<V1SerializedRandomCutForest> serializedForests, int numberOfTrees, Precision precision) {
checkArgument(serializedForests.size() > 0, "incorrect usage of convert");
checkArgument(numberOfTrees > 0, "incorrect parameter");
int sum = 0;
for (int i = 0; i < serializedForests.size(); i++) {
sum += serializedForests.get(i).getNumberOfTrees();
}
checkArgument(sum >= numberOfTrees, "incorrect parameters");
RandomCutForestState state = new RandomCutForestState();
state.setNumberOfTrees(numberOfTrees);
state.setDimensions(serializedForests.get(0).getDimensions());
state.setTimeDecay(serializedForests.get(0).getLambda());
state.setSampleSize(serializedForests.get(0).getSampleSize());
state.setShingleSize(1);
state.setCenterOfMassEnabled(serializedForests.get(0).isCenterOfMassEnabled());
state.setOutputAfter(serializedForests.get(0).getOutputAfter());
state.setStoreSequenceIndexesEnabled(serializedForests.get(0).isStoreSequenceIndexesEnabled());
state.setTotalUpdates(serializedForests.get(0).getExecutor().getExecutor().getTotalUpdates());
state.setCompact(true);
state.setInternalShinglingEnabled(false);
state.setBoundingBoxCacheFraction(1.0);
state.setSaveSamplerStateEnabled(true);
state.setSaveTreeStateEnabled(false);
state.setSaveCoordinatorStateEnabled(true);
state.setPrecision(precision.name());
state.setCompressed(false);
state.setPartialTreeState(false);
ExecutionContext executionContext = new ExecutionContext();
executionContext.setParallelExecutionEnabled(serializedForests.get(0).isParallelExecutionEnabled());
executionContext.setThreadPoolSize(serializedForests.get(0).getThreadPoolSize());
state.setExecutionContext(executionContext);
SamplerConverter samplerConverter = new SamplerConverter(state.getDimensions(), state.getNumberOfTrees() * state.getSampleSize() + 1, precision, numberOfTrees);
serializedForests.stream().flatMap(f -> Arrays.stream(f.getExecutor().getExecutor().getTreeUpdaters())).limit(numberOfTrees).map(V1SerializedRandomCutForest.TreeUpdater::getSampler).forEach(samplerConverter::addSampler);
state.setPointStoreState(samplerConverter.getPointStoreState(precision));
state.setCompactSamplerStates(samplerConverter.compactSamplerStates);
return state;
}
use of com.amazon.randomcutforest.config.Precision in project random-cut-forest-by-aws by aws.
the class ConsistencyTest method ImputeTest.
@ParameterizedTest
@EnumSource(TransformMethod.class)
public void ImputeTest(TransformMethod transformMethod) {
int sampleSize = 256;
int baseDimensions = 1;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
// test is exact equality, reducing the number of trials
int numTrials = 1;
// and using fewer trees to speed up test
int numberOfTrees = 30;
int length = 10 * sampleSize;
int dataSize = 2 * length;
for (int i = 0; i < numTrials; i++) {
Precision precision = Precision.FLOAT_32;
long seed = new Random().nextLong();
System.out.println("seed = " + seed);
double[] weights = new double[] { 1.7, 4.2 };
ThresholdedRandomCutForest first = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).weights(weights).build();
ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STREAMING_IMPUTE).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).weights(weights).build();
// ensuring that the parameters are the same; otherwise the grades/scores cannot
// be the same
// weighTime has to be 0 in the above
first.setLowerThreshold(1.1);
second.setLowerThreshold(1.1);
first.setHorizon(0.75);
second.setHorizon(0.75);
Random noise = new Random(0);
// change the last argument seed for a different run
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50, 100, 5, seed, baseDimensions);
for (int j = 0; j < length; j++) {
// gap has to be asymptotically same
long timestamp = 100 * j + noise.nextInt(10) - 5;
AnomalyDescriptor result = first.process(dataWithKeys.data[j], 0L);
AnomalyDescriptor test = second.process(dataWithKeys.data[j], timestamp);
assertEquals(result.getRCFScore(), test.getRCFScore(), 1e-6);
assertEquals(result.getAnomalyGrade(), test.getAnomalyGrade(), 1e-6);
}
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
for (int j = length; j < 2 * length; j++) {
// has to be the same gap
long timestamp = 100 * j + noise.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(dataWithKeys.data[j], 0L);
AnomalyDescriptor thirdResult = third.process(dataWithKeys.data[j], timestamp);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-6);
assertEquals(firstResult.getAnomalyGrade(), thirdResult.getAnomalyGrade(), 1e-6);
}
}
}
Aggregations