Search in sources :

Example 6 with Row

use of org.opensearch.ml.common.dataframe.Row in project ml-commons by opensearch-project.

the class AnomalyDetectionLibSVMTest method predict.

@Test
public void predict() {
    Model model = anomalyDetection.train(trainDataFrame);
    MLPredictionOutput output = (MLPredictionOutput) anomalyDetection.predict(predictionDataFrame, model);
    DataFrame predictions = output.getPredictionResult();
    int i = 0;
    int truePositive = 0;
    int falsePositive = 0;
    int totalPositive = 0;
    for (Row row : predictions) {
        String type = row.getValue(1).stringValue();
        if (predictionLabels.get(i) == Event.EventType.ANOMALOUS) {
            totalPositive++;
            if ("ANOMALOUS".equals(type)) {
                truePositive++;
            }
        } else if ("ANOMALOUS".equals(type)) {
            falsePositive++;
        }
        i++;
    }
    float precision = (float) truePositive / (truePositive + falsePositive);
    float recall = (float) truePositive / totalPositive;
    Assert.assertEquals(0.7, precision, 0.01);
    Assert.assertEquals(1.0, recall, 0.01);
}
Also used : Model(org.opensearch.ml.common.parameter.Model) MLPredictionOutput(org.opensearch.ml.common.parameter.MLPredictionOutput) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame) Row(org.opensearch.ml.common.dataframe.Row) Test(org.junit.Test)

Example 7 with Row

use of org.opensearch.ml.common.dataframe.Row in project ml-commons by opensearch-project.

the class BatchRandomCutForest method process.

private List<Map<String, Object>> process(DataFrame dataFrame, RandomCutForest forest, Integer actualTrainingDataSize) {
    List<Double> pointList = new ArrayList<>();
    ColumnMeta[] columnMetas = dataFrame.columnMetas();
    List<Map<String, Object>> predictResult = new ArrayList<>();
    for (int rowNum = 0; rowNum < dataFrame.size(); rowNum++) {
        for (int i = 0; i < columnMetas.length; i++) {
            Row row = dataFrame.getRow(rowNum);
            ColumnValue value = row.getValue(i);
            pointList.add(value.doubleValue());
        }
        double[] point = pointList.stream().mapToDouble(d -> d).toArray();
        pointList.clear();
        double anomalyScore = forest.getAnomalyScore(point);
        if (actualTrainingDataSize == null || rowNum < actualTrainingDataSize) {
            forest.update(point);
        }
        Map<String, Object> result = new HashMap<>();
        result.put("score", anomalyScore);
        result.put("anomalous", anomalyScore > anomalyScoreThreshold);
        predictResult.add(result);
    }
    return predictResult;
}
Also used : MLOutput(org.opensearch.ml.common.parameter.MLOutput) RandomCutForestState(com.amazon.randomcutforest.state.RandomCutForestState) Row(org.opensearch.ml.common.dataframe.Row) ColumnValue(org.opensearch.ml.common.dataframe.ColumnValue) MLPredictionOutput(org.opensearch.ml.common.parameter.MLPredictionOutput) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) Function(org.opensearch.ml.engine.annotation.Function) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) List(java.util.List) Model(org.opensearch.ml.common.parameter.Model) FunctionName(org.opensearch.ml.common.parameter.FunctionName) ModelSerDeSer(org.opensearch.ml.engine.utils.ModelSerDeSer) Map(java.util.Map) Log4j2(lombok.extern.log4j.Log4j2) MLAlgoParams(org.opensearch.ml.common.parameter.MLAlgoParams) Optional(java.util.Optional) TrainAndPredictable(org.opensearch.ml.engine.TrainAndPredictable) DataFrameBuilder(org.opensearch.ml.common.dataframe.DataFrameBuilder) RandomCutForestMapper(com.amazon.randomcutforest.state.RandomCutForestMapper) ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) BatchRCFParams(org.opensearch.ml.common.parameter.BatchRCFParams) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) ColumnValue(org.opensearch.ml.common.dataframe.ColumnValue) Row(org.opensearch.ml.common.dataframe.Row) HashMap(java.util.HashMap) Map(java.util.Map)

Example 8 with Row

use of org.opensearch.ml.common.dataframe.Row in project ml-commons by opensearch-project.

the class BatchRandomCutForestTest method verifyPredictionResult.

private void verifyPredictionResult(MLPredictionOutput output) {
    DataFrame predictions = output.getPredictionResult();
    Assert.assertEquals(dataSize, predictions.size());
    int anomalyCount = 0;
    for (int i = 0; i < dataSize; i++) {
        Row row = predictions.getRow(i);
        if (i % 100 == 0) {
            if (row.getValue(0).doubleValue() > 0.01) {
                anomalyCount++;
            }
        }
    }
    // total anomalies 5
    Assert.assertTrue("Fewer anomaly detected: " + anomalyCount, anomalyCount > 1);
}
Also used : DataFrame(org.opensearch.ml.common.dataframe.DataFrame) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame) Row(org.opensearch.ml.common.dataframe.Row)

Aggregations

Row (org.opensearch.ml.common.dataframe.Row)8 DataFrame (org.opensearch.ml.common.dataframe.DataFrame)6 ColumnValue (org.opensearch.ml.common.dataframe.ColumnValue)5 ArrayList (java.util.ArrayList)4 ColumnMeta (org.opensearch.ml.common.dataframe.ColumnMeta)4 DefaultDataFrame (org.opensearch.ml.common.dataframe.DefaultDataFrame)4 MLPredictionOutput (org.opensearch.ml.common.parameter.MLPredictionOutput)3 Model (org.opensearch.ml.common.parameter.Model)3 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 Optional (java.util.Optional)2 Log4j2 (lombok.extern.log4j.Log4j2)2 Before (org.junit.Before)2 DataFrameBuilder (org.opensearch.ml.common.dataframe.DataFrameBuilder)2 FunctionName (org.opensearch.ml.common.parameter.FunctionName)2 MLAlgoParams (org.opensearch.ml.common.parameter.MLAlgoParams)2 MLOutput (org.opensearch.ml.common.parameter.MLOutput)2 TrainAndPredictable (org.opensearch.ml.engine.TrainAndPredictable)2 Function (org.opensearch.ml.engine.annotation.Function)2