Search in sources :

Example 6 with ThresholdedRandomCutForest

use of com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest in project random-cut-forest-by-aws by aws.

the class ThresholdedRandomCutForestMapperTest method testRoundTripStandard.

@ParameterizedTest
@EnumSource(value = TransformMethod.class)
public void testRoundTripStandard(TransformMethod method) {
    int sampleSize = 256;
    int baseDimensions = 1;
    int shingleSize = 8;
    int dimensions = baseDimensions * shingleSize;
    long seed = 0;
    new Random().nextLong();
    ThresholdedRandomCutForest first = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).transformMethod(method).adjustThreshold(true).boundingBoxCacheFraction(0).weights(new double[] { 1.0 }).build();
    ThresholdedRandomCutForest second = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).precision(Precision.FLOAT_32).randomSeed(seed).internalShinglingEnabled(true).shingleSize(shingleSize).anomalyRate(0.01).transformMethod(method).adjustThreshold(true).weights(new double[] { 1.0 }).build();
    double value = 0.75 + 0.5 * new Random().nextDouble();
    first.setLowerThreshold(value);
    second.setLowerThreshold(value);
    Random r = new Random();
    MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(10 * sampleSize, 50, 100, 5, seed, baseDimensions);
    for (double[] point : dataWithKeys.data) {
        AnomalyDescriptor firstResult = first.process(point, 0L);
        AnomalyDescriptor secondResult = second.process(point, 0L);
        assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
        if (firstResult.getAnomalyGrade() > 0) {
            assertEquals(secondResult.getAnomalyGrade(), firstResult.getAnomalyGrade(), 1e-10);
            assert (firstResult.getRCFScore() >= value);
        }
    }
    // serialize + deserialize
    ThresholdedRandomCutForestMapper mapper = new ThresholdedRandomCutForestMapper();
    ThresholdedRandomCutForest third = mapper.toModel(mapper.toState(second));
    MultiDimDataWithKey testData = ShingledMultiDimDataWithKeys.getMultiDimData(100, 50, 100, 5, seed, baseDimensions);
    // update re-instantiated forest
    for (double[] point : testData.data) {
        AnomalyDescriptor firstResult = first.process(point, 0L);
        AnomalyDescriptor secondResult = second.process(point, 0L);
        AnomalyDescriptor thirdResult = third.process(point, 0L);
        assertEquals(firstResult.getRCFScore(), secondResult.getRCFScore(), 1e-10);
        assertEquals(firstResult.getRCFScore(), thirdResult.getRCFScore(), 1e-10);
    }
}
Also used : Random(java.util.Random) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) EnumSource(org.junit.jupiter.params.provider.EnumSource) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 7 with ThresholdedRandomCutForest

use of com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest in project random-cut-forest-by-aws by aws.

the class ThresholdedTime method run.

@Override
public void run() throws Exception {
    // Create and populate a random cut forest
    int shingleSize = 4;
    int numberOfTrees = 50;
    int sampleSize = 256;
    Precision precision = Precision.FLOAT_32;
    int dataSize = 4 * sampleSize;
    // change this to try different number of attributes,
    // this parameter is not expected to be larger than 5 for this example
    int baseDimensions = 1;
    int count = 0;
    int dimensions = baseDimensions * shingleSize;
    ThresholdedRandomCutForest forest = new ThresholdedRandomCutForest.Builder<>().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.TIME_AUGMENTED).normalizeTime(true).build();
    long seed = new Random().nextLong();
    double[] data = new double[] { 1.0 };
    System.out.println("seed = " + seed);
    NormalMixtureTestData normalMixtureTestData = new NormalMixtureTestData(10, 50);
    MultiDimDataWithKey dataWithKeys = normalMixtureTestData.generateTestDataWithKey(dataSize, 1, 0);
    /**
     * the anomalies will move from normal -> anomalous -> normal starts from normal
     */
    boolean anomalyState = false;
    int keyCounter = 0;
    for (double[] point : dataWithKeys.data) {
        long time = (long) (1000L * count + Math.floor(10 * point[0]));
        AnomalyDescriptor result = forest.process(data, time);
        if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
            System.out.print("Sequence " + count + " stamp " + (result.getInternalTimeStamp()) + " CHANGE ");
            if (!anomalyState) {
                System.out.println(" to Distribution 1 ");
            } else {
                System.out.println(" to Distribution 0 ");
            }
            anomalyState = !anomalyState;
            ++keyCounter;
        }
        if (result.getAnomalyGrade() != 0) {
            System.out.print("Sequence " + count + " stamp " + (result.getInternalTimeStamp()) + " RESULT ");
            System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
            if (result.isExpectedValuesPresent()) {
                if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
                    System.out.print(-result.getRelativeIndex() + " steps ago, instead of stamp " + result.getPastTimeStamp());
                    System.out.print(", expected timestamp " + result.getExpectedTimeStamp() + " ( " + (result.getPastTimeStamp() - result.getExpectedTimeStamp() + ")"));
                } else {
                    System.out.print("expected " + result.getExpectedTimeStamp() + " ( " + (result.getInternalTimeStamp() - result.getExpectedTimeStamp() + ")"));
                }
            }
            System.out.println();
        }
        ++count;
    }
}
Also used : Random(java.util.Random) Precision(com.amazon.randomcutforest.config.Precision) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) NormalMixtureTestData(com.amazon.randomcutforest.testutils.NormalMixtureTestData) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)

Example 8 with ThresholdedRandomCutForest

use of com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest in project random-cut-forest-by-aws by aws.

the class ThresholdedInternalShinglingExample method run.

@Override
public void run() throws Exception {
    // Create and populate a random cut forest
    int shingleSize = 4;
    int numberOfTrees = 50;
    int sampleSize = 256;
    Precision precision = Precision.FLOAT_32;
    int dataSize = 4 * sampleSize;
    // change this to try different number of attributes,
    // this parameter is not expected to be larger than 5 for this example
    int baseDimensions = 1;
    long count = 0;
    int dimensions = baseDimensions * shingleSize;
    TransformMethod transformMethod = TransformMethod.NORMALIZE_DIFFERENCE;
    ThresholdedRandomCutForest forest = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.STANDARD).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
    ThresholdedRandomCutForest second = ThresholdedRandomCutForest.builder().compact(true).dimensions(dimensions).randomSeed(0).numberOfTrees(numberOfTrees).shingleSize(shingleSize).sampleSize(sampleSize).internalShinglingEnabled(true).precision(precision).anomalyRate(0.01).forestMode(ForestMode.TIME_AUGMENTED).weightTime(0).transformMethod(transformMethod).normalizeTime(true).outputAfter(32).initialAcceptFraction(0.125).build();
    // ensuring that the parameters are the same; otherwise the grades/scores cannot
    // be the same
    // weighTime has to be 0
    forest.setLowerThreshold(1.1);
    second.setLowerThreshold(1.1);
    forest.setHorizon(0.75);
    second.setHorizon(0.75);
    long seed = new Random().nextLong();
    Random noise = new Random(0);
    System.out.println("seed = " + seed);
    // change the last argument seed for a different run
    MultiDimDataWithKey dataWithKeys = ShingledMultiDimDataWithKeys.getMultiDimData(dataSize + shingleSize - 1, 50, 100, 5, seed, baseDimensions);
    int keyCounter = 0;
    for (double[] point : dataWithKeys.data) {
        // idea is that we expect the arrival order to be roughly 100 apart (say
        // seconds)
        // then the noise corresponds to a jitter; one can try TIME_AUGMENTED and
        // .normalizeTime(true)
        long timestamp = 100 * count + noise.nextInt(10) - 5;
        AnomalyDescriptor result = forest.process(point, timestamp);
        AnomalyDescriptor test = second.process(point, timestamp);
        checkArgument(Math.abs(result.getRCFScore() - test.getRCFScore()) < 1e-10, " error");
        checkArgument(Math.abs(result.getAnomalyGrade() - test.getAnomalyGrade()) < 1e-10, " error");
        if (keyCounter < dataWithKeys.changeIndices.length && count == dataWithKeys.changeIndices[keyCounter]) {
            System.out.println("timestamp " + count + " CHANGE " + Arrays.toString(dataWithKeys.changes[keyCounter]));
            ++keyCounter;
        }
        if (result.getAnomalyGrade() != 0) {
            System.out.print("timestamp " + count + " RESULT value " + result.getInternalTimeStamp() + " ");
            for (int i = 0; i < baseDimensions; i++) {
                System.out.print(result.getCurrentInput()[i] + ", ");
            }
            System.out.print("score " + result.getRCFScore() + ", grade " + result.getAnomalyGrade() + ", ");
            if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
                System.out.print(-result.getRelativeIndex() + " steps ago, ");
            }
            if (result.isExpectedValuesPresent()) {
                if (result.getRelativeIndex() != 0 && result.isStartOfAnomaly()) {
                    System.out.print("instead of ");
                    for (int i = 0; i < baseDimensions; i++) {
                        System.out.print(result.getPastValues()[i] + ", ");
                    }
                    System.out.print("expected ");
                    for (int i = 0; i < baseDimensions; i++) {
                        System.out.print(result.getExpectedValuesList()[0][i] + ", ");
                        if (result.getPastValues()[i] != result.getExpectedValuesList()[0][i]) {
                            System.out.print("( " + (result.getPastValues()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
                        }
                    }
                } else {
                    System.out.print("expected ");
                    for (int i = 0; i < baseDimensions; i++) {
                        System.out.print(result.getExpectedValuesList()[0][i] + ", ");
                        if (result.getCurrentInput()[i] != result.getExpectedValuesList()[0][i]) {
                            System.out.print("( " + (result.getCurrentInput()[i] - result.getExpectedValuesList()[0][i]) + " ) ");
                        }
                    }
                }
            } else {
                System.out.print("insufficient data to provide expected values");
            }
            System.out.println();
        }
        ++count;
    }
}
Also used : Random(java.util.Random) Precision(com.amazon.randomcutforest.config.Precision) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) MultiDimDataWithKey(com.amazon.randomcutforest.testutils.MultiDimDataWithKey) TransformMethod(com.amazon.randomcutforest.config.TransformMethod) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)

Example 9 with ThresholdedRandomCutForest

use of com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest in project ml-commons by opensearch-project.

the class FixedInTimeRandomCutForest method predict.

@Override
public MLOutput predict(DataFrame dataFrame, Model model) {
    if (model == null) {
        throw new IllegalArgumentException("No model found for FIT RCF prediction.");
    }
    ThresholdedRandomCutForestState state = (ThresholdedRandomCutForestState) ModelSerDeSer.deserialize(model.getContent());
    ThresholdedRandomCutForest forest = trcfMapper.toModel(state);
    List<Map<String, Object>> predictResult = process(dataFrame, forest);
    return MLPredictionOutput.builder().predictionResult(DataFrameBuilder.load(predictResult)).build();
}
Also used : ThresholdedRandomCutForestState(com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestState) HashMap(java.util.HashMap) Map(java.util.Map) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)

Example 10 with ThresholdedRandomCutForest

use of com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest in project ml-commons by opensearch-project.

the class FixedInTimeRandomCutForest method process.

private List<Map<String, Object>> process(DataFrame dataFrame, ThresholdedRandomCutForest forest) {
    List<Double> pointList = new ArrayList<>();
    ColumnMeta[] columnMetas = dataFrame.columnMetas();
    List<Map<String, Object>> predictResult = new ArrayList<>();
    for (int rowNum = 0; rowNum < dataFrame.size(); rowNum++) {
        Row row = dataFrame.getRow(rowNum);
        long timestamp = -1;
        for (int i = 0; i < columnMetas.length; i++) {
            ColumnMeta columnMeta = columnMetas[i];
            ColumnValue value = row.getValue(i);
            // TODO: sort dataframe by time field with asc order. Currently consider the date already sorted by time.
            if (timeField != null && timeField.equals(columnMeta.getName())) {
                ColumnType columnType = columnMeta.getColumnType();
                if (columnType == ColumnType.LONG) {
                    timestamp = value.longValue();
                } else if (columnType == ColumnType.STRING) {
                    try {
                        timestamp = simpleDateFormat.parse(value.stringValue()).getTime();
                    } catch (ParseException e) {
                        log.error("Failed to parse timestamp " + value.stringValue(), e);
                        throw new MLValidationException("Failed to parse timestamp " + value.stringValue());
                    }
                } else {
                    throw new MLValidationException("Wrong data type of time field. Should use LONG or STRING, but got " + columnType);
                }
            } else {
                pointList.add(value.doubleValue());
            }
        }
        double[] point = pointList.stream().mapToDouble(d -> d).toArray();
        pointList.clear();
        Map<String, Object> result = new HashMap<>();
        AnomalyDescriptor process = forest.process(point, timestamp);
        result.put(timeField, timestamp);
        result.put("score", process.getRCFScore());
        result.put("anomaly_grade", process.getAnomalyGrade());
        predictResult.add(result);
    }
    return predictResult;
}
Also used : MLOutput(org.opensearch.ml.common.parameter.MLOutput) Precision(com.amazon.randomcutforest.config.Precision) ThresholdedRandomCutForestMapper(com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper) SimpleDateFormat(java.text.SimpleDateFormat) MLValidationException(org.opensearch.ml.common.exception.MLValidationException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FunctionName(org.opensearch.ml.common.parameter.FunctionName) Map(java.util.Map) MLAlgoParams(org.opensearch.ml.common.parameter.MLAlgoParams) FitRCFParams(org.opensearch.ml.common.parameter.FitRCFParams) DataFrameBuilder(org.opensearch.ml.common.dataframe.DataFrameBuilder) ParseException(java.text.ParseException) DateFormat(java.text.DateFormat) Row(org.opensearch.ml.common.dataframe.Row) ColumnValue(org.opensearch.ml.common.dataframe.ColumnValue) TimeZone(java.util.TimeZone) MLPredictionOutput(org.opensearch.ml.common.parameter.MLPredictionOutput) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) Function(org.opensearch.ml.engine.annotation.Function) ThresholdedRandomCutForestState(com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestState) List(java.util.List) ColumnType(org.opensearch.ml.common.dataframe.ColumnType) Model(org.opensearch.ml.common.parameter.Model) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) ModelSerDeSer(org.opensearch.ml.engine.utils.ModelSerDeSer) Log4j2(lombok.extern.log4j.Log4j2) Optional(java.util.Optional) ForestMode(com.amazon.randomcutforest.config.ForestMode) TrainAndPredictable(org.opensearch.ml.engine.TrainAndPredictable) ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) ColumnType(org.opensearch.ml.common.dataframe.ColumnType) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) MLValidationException(org.opensearch.ml.common.exception.MLValidationException) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) ColumnValue(org.opensearch.ml.common.dataframe.ColumnValue) Row(org.opensearch.ml.common.dataframe.Row) ParseException(java.text.ParseException) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ThresholdedRandomCutForest (com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)20 AnomalyDescriptor (com.amazon.randomcutforest.parkservices.AnomalyDescriptor)16 Random (java.util.Random)15 MultiDimDataWithKey (com.amazon.randomcutforest.testutils.MultiDimDataWithKey)13 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)11 RandomCutForest (com.amazon.randomcutforest.RandomCutForest)5 Precision (com.amazon.randomcutforest.config.Precision)5 EnumSource (org.junit.jupiter.params.provider.EnumSource)5 Test (org.junit.jupiter.api.Test)4 ThresholdedRandomCutForestState (com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestState)3 HashMap (java.util.HashMap)3 Map (java.util.Map)3 ForestMode (com.amazon.randomcutforest.config.ForestMode)2 TransformMethod (com.amazon.randomcutforest.config.TransformMethod)2 RandomCutForestMapper (com.amazon.randomcutforest.state.RandomCutForestMapper)2 NormalMixtureTestData (com.amazon.randomcutforest.testutils.NormalMixtureTestData)2 MethodSource (org.junit.jupiter.params.provider.MethodSource)2 Model (org.opensearch.ml.common.parameter.Model)2 IRCFComputeDescriptor (com.amazon.randomcutforest.parkservices.IRCFComputeDescriptor)1 PredictorCorrector (com.amazon.randomcutforest.parkservices.PredictorCorrector)1