use of com.amazon.randomcutforest.config.TransformMethod in project random-cut-forest-by-aws by aws.
the class ThresholdedInternalShinglingExample method run.
@Override
public void run() throws Exception {
// Create and populate a random cut forest
int shingleSize = 4;
int numberOfTrees = 50;
int sampleSize = 256;
Precision precision = Precision.FLOAT_32;
int dataSize = 4 * sampleSize;
// change this to try different number of attributes,
// this parameter is not expected to be larger than 5 for this example
int baseDimensions = 1;
long count = 0;
int dimensions = baseDimensions * shingleSize;
TransformMethod transformMethod = TransformMethod.NORMALIZE_DIFFERENCE;
ThresholdedRandomCutForest forest = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.TIME_AUGMENTED).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
// ensuring that the parameters are the same; otherwise the grades/scores cannot
// be the same
// weighTime has to be 0
forest.setLowerThreshold(1.1);
second.setLowerThreshold(1.1);
forest.setHorizon(0.75);
second.setHorizon(0.75);
long seed = new Random().nextLong();
Random noise = new Random(0);
System.out.println("seed = " + seed);
// change the last argument seed for a different run
MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50, 100, 5, seed, baseDimensions);
int keyCounter = 0;
for (double[] point : dataWithKeys.data) {
// idea is that we expect the arrival order to be roughly 100 apart (say
// seconds)
// then the noise corresponds to a jitter; one can try TIME_AUGMENTED and
// .normalizeTime(true)
long timestamp = 100 * count + noise.nextInt(10) - 5;
AnomalyDescriptor result = forest.process(point, timestamp);
AnomalyDescriptor test = second.process(point, timestamp);
checkArgument(Math.abs(result.getRCFScore() - test.getRCFScore()) < 1e-10, " error");
checkArgument(Math.abs(result.getAnomalyGrade() - test.getAnomalyGrade()) < 1e-10, " error");
if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
System.out.println("timestamp " + count + " CHANGE " + Arrays.toString(dataWithKeys.changes[keyCounter]));
++keyCounter;
}
if (result.getAnomalyGrade() != 0) {
System.out.print("timestamp " + count + " RESULT value " + result.getInternalTimeStamp() + " ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getCurrentInput()[i] + ", ");
}
System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
System.out.print(-result.getRelativeIndex() + " steps ago, ");
}
if (result.isExpectedValuesPresent()) {
if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
System.out.print("instead of ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getPastValues()[i] + ", ");
}
System.out.print("expected ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
if (result.getPastValues()[i] != result.getExpectedValuesList()[0][i]) {
System.out.print("( " + (result.getPastValues()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
}
}
} else {
System.out.print("expected ");
for (int i = 0; i < baseDimensions; i++) {
System.out.print(result.getExpectedValuesList()[0][i] + ", ");
if (result.getCurrentInput()[i] != result.getExpectedValuesList()[0][i]) {
System.out.print("( " + (result.getCurrentInput()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
}
}
}
} else {
System.out.print("insufficient data to provide expected values");
}
System.out.println();
}
++count;
}
}
use of com.amazon.randomcutforest.config.TransformMethod in project random-cut-forest-by-aws by aws.
the class ThresholdedRandomCutForestMapper method toModel.
@Override
public ThresholdedRandomCutForest toModel(ThresholdedRandomCutForestState state, long seed) {
RandomCutForestMapper randomCutForestMapper = new RandomCutForestMapper();
BasicThresholderMapper thresholderMapper = new BasicThresholderMapper();
PreprocessorMapper preprocessorMapper = new PreprocessorMapper();
RandomCutForest forest = randomCutForestMapper.toModel(state.getForestState());
BasicThresholder thresholder = thresholderMapper.toModel(state.getThresholderState());
Preprocessor preprocessor = preprocessorMapper.toModel(state.getPreprocessorStates()[0]);
ForestMode forestMode = ForestMode.valueOf(state.getForestMode());
TransformMethod transformMethod = TransformMethod.valueOf(state.getTransformMethod());
RCFComputeDescriptor descriptor = new RCFComputeDescriptor(null, 0L);
descriptor.setRCFScore(state.getLastAnomalyScore());
descriptor.setInternalTimeStamp(state.getLastAnomalyTimeStamp());
descriptor.setAttribution(new DiVectorMapper().toModel(state.getLastAnomalyAttribution()));
descriptor.setRCFPoint(state.getLastAnomalyPoint());
descriptor.setExpectedRCFPoint(state.getLastExpectedPoint());
descriptor.setRelativeIndex(state.getLastRelativeIndex());
descriptor.setForestMode(forestMode);
descriptor.setTransformMethod(transformMethod);
descriptor.setImputationMethod(ImputationMethod.valueOf(state.getPreprocessorStates()[0].getImputationMethod()));
PredictorCorrector predictorCorrector = new PredictorCorrector(thresholder);
predictorCorrector.setIgnoreSimilar(state.isIgnoreSimilar());
predictorCorrector.setIgnoreSimilarFactor(state.getIgnoreSimilarFactor());
predictorCorrector.setTriggerFactor(state.getTriggerFactor());
predictorCorrector.setNumberOfAttributors(state.getNumberOfAttributors());
return new ThresholdedRandomCutForest(forestMode, transformMethod, forest, predictorCorrector, preprocessor, descriptor);
}
use of com.amazon.randomcutforest.config.TransformMethod in project random-cut-forest-by-aws by aws.
the class PredictorCorrector method applyBasicCorrector.
/**
* a first stage corrector that attempts to fix the after effects of a previous
* anomaly which may be in the shingle, or just preceding the shingle
*
* @param point the current (transformed) point under evaluation
* @param gap the relative position of the previous anomaly being
* corrected
* @param shingleSize size of the shingle
* @param baseDimensions number of dimensions in each shingle
* @return the score of the corrected point
*/
double[] applyBasicCorrector(double[] point, int gap, int shingleSize, int baseDimensions, IRCFComputeDescriptor lastAnomalyDescriptor) {
checkArgument(gap >= 0 && gap <= shingleSize, "incorrect invocation");
double[] correctedPoint = Arrays.copyOf(point, point.length);
double[] lastExpectedPoint = lastAnomalyDescriptor.getExpectedRCFPoint();
double[] lastAnomalyPoint = lastAnomalyDescriptor.getRCFPoint();
int lastRelativeIndex = lastAnomalyDescriptor.getRelativeIndex();
if (gap < shingleSize) {
System.arraycopy(lastExpectedPoint, gap * baseDimensions, correctedPoint, 0, point.length - gap * baseDimensions);
}
if (lastRelativeIndex == 0) {
// is is possible to fix other cases, but is more complicated
TransformMethod transformMethod = lastAnomalyDescriptor.getTransformMethod();
if (transformMethod == TransformMethod.DIFFERENCE || transformMethod == TransformMethod.NORMALIZE_DIFFERENCE) {
for (int y = 0; y < baseDimensions; y++) {
correctedPoint[point.length - gap * baseDimensions + y] += lastAnomalyPoint[point.length - baseDimensions + y] - lastExpectedPoint[point.length - baseDimensions + y];
}
} else if (lastAnomalyDescriptor.getForestMode() == ForestMode.TIME_AUGMENTED) {
// definitely correct the time dimension which is always differenced
// this applies to the non-differenced cases
correctedPoint[point.length - (gap - 1) * baseDimensions - 1] += lastAnomalyPoint[point.length - 1] - lastExpectedPoint[point.length - 1];
}
}
return correctedPoint;
}
Aggregations