Search in sources :

Example 6 with ColumnMeta

use of org.opensearch.ml.common.dataframe.ColumnMeta in project ml-commons by opensearch-project.

the class BatchRandomCutForest method process.

private List<Map<String, Object>> process(DataFrame dataFrame, RandomCutForest forest, Integer actualTrainingDataSize) {
    List<Double> pointList = new ArrayList<>();
    ColumnMeta[] columnMetas = dataFrame.columnMetas();
    List<Map<String, Object>> predictResult = new ArrayList<>();
    for (int rowNum = 0; rowNum < dataFrame.size(); rowNum++) {
        for (int i = 0; i < columnMetas.length; i++) {
            Row row = dataFrame.getRow(rowNum);
            ColumnValue value = row.getValue(i);
            pointList.add(value.doubleValue());
        }
        double[] point = pointList.stream().mapToDouble(d -> d).toArray();
        pointList.clear();
        double anomalyScore = forest.getAnomalyScore(point);
        if (actualTrainingDataSize == null || rowNum < actualTrainingDataSize) {
            forest.update(point);
        }
        Map<String, Object> result = new HashMap<>();
        result.put("score", anomalyScore);
        result.put("anomalous", anomalyScore > anomalyScoreThreshold);
        predictResult.add(result);
    }
    return predictResult;
}
Also used : MLOutput(org.opensearch.ml.common.parameter.MLOutput) RandomCutForestState(com.amazon.randomcutforest.state.RandomCutForestState) Row(org.opensearch.ml.common.dataframe.Row) ColumnValue(org.opensearch.ml.common.dataframe.ColumnValue) MLPredictionOutput(org.opensearch.ml.common.parameter.MLPredictionOutput) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) Function(org.opensearch.ml.engine.annotation.Function) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) List(java.util.List) Model(org.opensearch.ml.common.parameter.Model) FunctionName(org.opensearch.ml.common.parameter.FunctionName) ModelSerDeSer(org.opensearch.ml.engine.utils.ModelSerDeSer) Map(java.util.Map) Log4j2(lombok.extern.log4j.Log4j2) MLAlgoParams(org.opensearch.ml.common.parameter.MLAlgoParams) Optional(java.util.Optional) TrainAndPredictable(org.opensearch.ml.engine.TrainAndPredictable) DataFrameBuilder(org.opensearch.ml.common.dataframe.DataFrameBuilder) RandomCutForestMapper(com.amazon.randomcutforest.state.RandomCutForestMapper) ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) BatchRCFParams(org.opensearch.ml.common.parameter.BatchRCFParams) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) ColumnValue(org.opensearch.ml.common.dataframe.ColumnValue) Row(org.opensearch.ml.common.dataframe.Row) HashMap(java.util.HashMap) Map(java.util.Map)

Example 7 with ColumnMeta

use of org.opensearch.ml.common.dataframe.ColumnMeta in project ml-commons by opensearch-project.

the class BatchRandomCutForestTest method constructRCFDataFrame.

private DataFrame constructRCFDataFrame(boolean predict) {
    ColumnMeta[] columnMetas = new ColumnMeta[] { new ColumnMeta("value", ColumnType.INTEGER) };
    DataFrame dataFrame = new DefaultDataFrame(columnMetas);
    for (int i = 0; i < dataSize; i++) {
        if (predict && i % 100 == 0) {
            dataFrame.appendRow(new Object[] { ThreadLocalRandom.current().nextInt(100, 1000) });
        } else {
            dataFrame.appendRow(new Object[] { ThreadLocalRandom.current().nextInt(1, 10) });
        }
    }
    return dataFrame;
}
Also used : ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame)

Example 8 with ColumnMeta

use of org.opensearch.ml.common.dataframe.ColumnMeta in project ml-commons by opensearch-project.

the class SampleAlgoTest method constructDataFrame.

private DataFrame constructDataFrame(int dataSize) {
    ColumnMeta[] columnMetas = new ColumnMeta[] { new ColumnMeta("value", ColumnType.INTEGER) };
    DataFrame dataFrame = new DefaultDataFrame(columnMetas);
    for (int i = 0; i < dataSize; i++) {
        dataFrame.appendRow(new Object[] { i });
    }
    return dataFrame;
}
Also used : ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame)

Example 9 with ColumnMeta

use of org.opensearch.ml.common.dataframe.ColumnMeta in project ml-commons by opensearch-project.

the class KMeansHelper method constructKMeansDataFrame.

public static DataFrame constructKMeansDataFrame(int size) {
    ColumnMeta[] columnMetas = new ColumnMeta[] { new ColumnMeta("f1", ColumnType.DOUBLE), new ColumnMeta("f2", ColumnType.DOUBLE) };
    DataFrame dataFrame = DataFrameBuilder.emptyDataFrame(columnMetas);
    Random random = new Random(1);
    MultivariateNormalDistribution g1 = new MultivariateNormalDistribution(new JDKRandomGenerator(random.nextInt()), new double[] { 0.0, 0.0 }, new double[][] { { 2.0, 1.0 }, { 1.0, 2.0 } });
    MultivariateNormalDistribution g2 = new MultivariateNormalDistribution(new JDKRandomGenerator(random.nextInt()), new double[] { 10.0, 10.0 }, new double[][] { { 2.0, 1.0 }, { 1.0, 2.0 } });
    MultivariateNormalDistribution[] normalDistributions = new MultivariateNormalDistribution[] { g1, g2 };
    for (int i = 0; i < size; ++i) {
        int id = 0;
        if (Math.random() < 0.5) {
            id = 1;
        }
        double[] sample = normalDistributions[id].sample();
        dataFrame.appendRow(Arrays.stream(sample).boxed().toArray(Double[]::new));
    }
    return dataFrame;
}
Also used : ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) Random(java.util.Random) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) JDKRandomGenerator(org.apache.commons.math3.random.JDKRandomGenerator) MultivariateNormalDistribution(org.apache.commons.math3.distribution.MultivariateNormalDistribution)

Aggregations

ColumnMeta (org.opensearch.ml.common.dataframe.ColumnMeta)9 DataFrame (org.opensearch.ml.common.dataframe.DataFrame)9 DefaultDataFrame (org.opensearch.ml.common.dataframe.DefaultDataFrame)6 ArrayList (java.util.ArrayList)5 ColumnValue (org.opensearch.ml.common.dataframe.ColumnValue)5 Row (org.opensearch.ml.common.dataframe.Row)4 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 Optional (java.util.Optional)2 Log4j2 (lombok.extern.log4j.Log4j2)2 Before (org.junit.Before)2 DataFrameBuilder (org.opensearch.ml.common.dataframe.DataFrameBuilder)2 DoubleValue (org.opensearch.ml.common.dataframe.DoubleValue)2 FunctionName (org.opensearch.ml.common.parameter.FunctionName)2 MLAlgoParams (org.opensearch.ml.common.parameter.MLAlgoParams)2 MLOutput (org.opensearch.ml.common.parameter.MLOutput)2 MLPredictionOutput (org.opensearch.ml.common.parameter.MLPredictionOutput)2 Model (org.opensearch.ml.common.parameter.Model)2 TrainAndPredictable (org.opensearch.ml.engine.TrainAndPredictable)2