use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testRoundTripTimeAugmented.
@ParameterizedTest
@EnumSource(value = TransformMethod.class)
public void testRoundTripTimeAugmented(TransformMethod method) {
int sampleSize = 256;
int baseDimensions = 1;
int shingleSize = 8;
int dimensions = baseDimensions * shingleSize;
long seed = new Random().nextLong();
double value = 1.0 + 0.25 * new Random().nextDouble();
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.TIME_AUGMENTED).internalShinglingEnabled(true).shingleSize(shingleSize).transformMethod(method).anomalyRate(0.01).adjustThreshold(true).weights(new double[] { 1.0 }).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).forestMode(ForestMode.TIME_AUGMENTED).internalShinglingEnabled(true).shingleSize(shingleSize).transformMethod(method).anomalyRate(0.01).adjustThreshold(true).weights(new double[] { 1.0 }).build();
first.setLowerThreshold(value);
second.setLowerThreshold(value);
Random r = new Random();
long count = 0;
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(10 * sampleSize, 50, 100, 5, seed, baseDimensions);
for (double[] point : dataWithKeys.data) {
long stamp = 100 * count + r.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(point, stamp);
AnomalyDescriptor secondResult = second.process(point, stamp);
++count;
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
if (firstResult.getAnomalyGrade() > 0) {
assertEquals(secondResult.getAnomalyGrade(), firstResult.getAnomalyGrade(), 1e-10);
assert (firstResult.getRCFScore() >= value);
}
}
// serialize + deserialize
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
MultiDimDataWithKey testData = ShingledMultiDimDataWithKeys.getMultiDimData(100, 50, 100, 5, seed, baseDimensions);
// update re-instantiated forest
for (double[] point : testData.data) {
long stamp = 100 * count + r.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
AnomalyDescriptor thirdResult = third.process(point, 0L);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getAnomalyGrade(), thirdResult.getAnomalyGrade(), 1e-10);
++count;
}
}
use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapperTest method testRoundTripStandard.
@ParameterizedTest
@EnumSource(value = TransformMethod.class)
public void testRoundTripStandard(TransformMethod method) {
int sampleSize = 256;
int baseDimensions = 1;
int shingleSize = 8;
int dimensions = baseDimensions * shingleSize;
long seed = 0;
new Random().nextLong();
ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).transformMethod(method).adjustThreshold(true).boundingBoxCacheFraction(0).weights(new double[] { 1.0 }).build();
ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).transformMethod(method).adjustThreshold(true).weights(new double[] { 1.0 }).build();
double value = 0.75 + 0.5 * new Random().nextDouble();
first.setLowerThreshold(value);
second.setLowerThreshold(value);
Random r = new Random();
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(10 * sampleSize, 50, 100, 5, seed, baseDimensions);
for (double[] point : dataWithKeys.data) {
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
if (firstResult.getAnomalyGrade() > 0) {
assertEquals(secondResult.getAnomalyGrade(), firstResult.getAnomalyGrade(), 1e-10);
assert (firstResult.getRCFScore() >= value);
}
}
// serialize + deserialize
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
MultiDimDataWithKey testData = ShingledMultiDimDataWithKeys.getMultiDimData(100, 50, 100, 5, seed, baseDimensions);
// update re-instantiated forest
for (double[] point : testData.data) {
AnomalyDescriptor firstResult = first.process(point, 0L);
AnomalyDescriptor secondResult = second.process(point, 0L);
AnomalyDescriptor thirdResult = third.process(point, 0L);
assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
}
}
use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class ThresholdedTime method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int shingleSize = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_32;
int dataSize = 4 * sampleSize;
// change this to try different number of attributes,
// this parameter is not expected to be larger than 5 for this example
int baseDimensions = 1;
int count = 0;
int dimensions = baseDimensions * shingleSize;
ThresholdedRandomCutForest forest = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.TIME_AUGMENTED).normalizeTime(true).build();
long seed = new Random().nextLong();
double[] data = new double[] { 1.0 };
System.out.println("seed = " + seed);
NormalMixtureTestData normalMixtureTestData = new NormalMixtureTestData(10, 50);
MultiDimDataWithKey dataWithKeys = normalMixtureTestData.generateTestDataWithKey(dataSize, 1, 0);
/**
* the anomalies will move from normal -> anomalous -> normal starts from normal
*/
boolean anomalyState = false;
int keyCounter = 0;
for (double[] point : dataWithKeys.data) {
long time = (long) (1000L * count + Math.floor(10 * point[0]));
AnomalyDescriptor result = forest.process(data, time);
if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
System.out.print("Sequence " + count + " stamp " + (result.getInternalTimeStamp()) + " CHANGE ");
if (!anomalyState) {
System.out.println(" to Distribution 1 ");
} else {
System.out.println(" to Distribution 0 ");
}
anomalyState = !anomalyState;
++keyCounter;
}
if (result.getAnomalyGrade() != 0) {
System.out.print("Sequence " + count + " stamp " + (result.getInternalTimeStamp()) + " RESULT ");
System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
if (result.isExpectedValuesPresent()) {
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
System.out.print(-result.getRelativeIndex() + " steps ago, instead of stamp " + result.getPastTimeStamp());
System.out.print(", expected timestamp " + result.getExpectedTimeStamp() + " ( " + (result.getPastTimeStamp() - result.getExpectedTimeStamp() + ")"));
} else {
System.out.print("expected " + result.getExpectedTimeStamp() + " ( " + (result.getInternalTimeStamp() - result.getExpectedTimeStamp() + ")"));
}
}
System.out.println();
}
++count;
}
}
use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class ThresholdedInternalShinglingExample method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int shingleSize = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_32;
int dataSize = 4 * sampleSize;
// change this to try different number of attributes,
// this parameter is not expected to be larger than 5 for this example
int baseDimensions = 1;
long count = 0;
int dimensions = baseDimensions * shingleSize;
TransformMethod transformMethod = TransformMethod.NORMALIZE_DIFFERENCE;
ThresholdedRandomCutForest forest = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.TIME_AUGMENTED).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
// ensuring that the parameters are the same; otherwise the grades/scores cannot
// be the same
// weighTime has to be 0
forest.setLowerThreshold(1.1);
second.setLowerThreshold(1.1);
forest.setHorizon(0.75);
second.setHorizon(0.75);
long seed = new Random().nextLong();
Random noise = new Random(0);
System.out.println("seed = " + seed);
// change the last argument seed for a different run
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50, 100, 5, seed, baseDimensions);
int keyCounter = 0;
for (double[] point : dataWithKeys.data) {
// idea is that we expect the arrival order to be roughly 100 apart (say
// seconds)
// then the noise corresponds to a jitter; one can try TIME_AUGMENTED and
// .normalizeTime(true)
long timestamp = 100 * count + noise.nextInt(10) - 5;
AnomalyDescriptor result = forest.process(point, timestamp);
AnomalyDescriptor test = second.process(point, timestamp);
checkArgument(Math.abs(result.getRCFScore() - test.getRCFScore()) < 1e-10, " error");
checkArgument(Math.abs(result.getAnomalyGrade() - test.getAnomalyGrade()) < 1e-10, " error");
if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
System.out.println("timestamp " + count + " CHANGE " + Arrays.toString(dataWithKeys.changes[keyCounter]));
++keyCounter;
}
if (result.getAnomalyGrade() != 0) {
System.out.print("timestamp " + count + " RESULT value " + result.getInternalTimeStamp() + " ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getCurrentInput()[i] + ", ");
}
System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
System.out.print(-result.getRelativeIndex() + " steps ago, ");
}
if (result.isExpectedValuesPresent()) {
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
System.out.print("instead of ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getPastValues()[i] + ", ");
}
System.out.print("expected ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
if (result.getPastValues()[i] != result.getExpectedValuesList()[0][i]) {
System.out.print("( " + (result.getPastValues()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
}
}
} else {
System.out.print("expected ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
if (result.getCurrentInput()[i] != result.getExpectedValuesList()[0][i]) {
System.out.print("( " + (result.getCurrentInput()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
}
}
}
} else {
System.out.print("insufficient data to provide expected values");
}
System.out.println();
}
++count;
}
}
use of com.amazon.randomcutforest.testutils.MultiDimDataWithKey in project random-cut-forest-by-aws by aws.
the class ConsistencyTest method ImputeTest.
@ParameterizedTest
@EnumSource(TransformMethod.class)
public void ImputeTest(TransformMethod transformMethod) {
int sampleSize = 256;
int baseDimensions = 1;
int shingleSize = 4;
int dimensions = baseDimensions * shingleSize;
// test is exact equality, reducing the number of trials
int numTrials = 1;
// and using fewer trees to speed up test
int numberOfTrees = 30;
int length = 10 * sampleSize;
int dataSize = 2 * length;
for (int i = 0; i < numTrials; i++) {
Precision precision = Precision.FLOAT_32;
long seed = new Random().nextLong();
System.out.println("seed = " + seed);
double[] weights = new double[] { 1.7, 4.2 };
ThresholdedRandomCutForest first = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).weights(weights).build();
ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STREAMING_IMPUTE).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).weights(weights).build();
// ensuring that the parameters are the same; otherwise the grades/scores cannot
// be the same
// weighTime has to be 0 in the above
first.setLowerThreshold(1.1);
second.setLowerThreshold(1.1);
first.setHorizon(0.75);
second.setHorizon(0.75);
Random noise = new Random(0);
// change the last argument seed for a different run
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50, 100, 5, seed, baseDimensions);
for (int j = 0; j < length; j++) {
// gap has to be asymptotically same
long timestamp = 100 * j + noise.nextInt(10) - 5;
AnomalyDescriptor result = first.process(dataWithKeys.data[j], 0L);
AnomalyDescriptor test = second.process(dataWithKeys.data[j], timestamp);
assertEquals(result.getRCFScore(), test.getRCFScore(), 1e-6);
assertEquals(result.getAnomalyGrade(), test.getAnomalyGrade(), 1e-6);
}
ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
for (int j = length; j < 2 * length; j++) {
// has to be the same gap
long timestamp = 100 * j + noise.nextInt(10) - 5;
AnomalyDescriptor firstResult = first.process(dataWithKeys.data[j], 0L);
AnomalyDescriptor thirdResult = third.process(dataWithKeys.data[j], timestamp);
assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-6);
assertEquals(firstResult.getAnomalyGrade(), thirdResult.getAnomalyGrade(), 1e-6);
}
}
}
Aggregations