Search in sources :

Example 1 with ColumnMeta

use of org.opensearch.ml.common.dataframe.ColumnMeta in project ml-commons by opensearch-project.

the class MLPredictionOutputTest method setUp.

@Before
public void setUp() {
    ColumnMeta[] columnMetas = new ColumnMeta[] { new ColumnMeta("test", ColumnType.INTEGER) };
    List<Row> rows = new ArrayList<>();
    rows.add(new Row(new ColumnValue[] { new IntValue(1) }));
    rows.add(new Row(new ColumnValue[] { new IntValue(2) }));
    DataFrame dataFrame = new DefaultDataFrame(columnMetas, rows);
    output = MLPredictionOutput.builder().taskId("test_task_id").status("test_status").predictionResult(dataFrame).build();
}
Also used : ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) ArrayList(java.util.ArrayList) ColumnValue(org.opensearch.ml.common.dataframe.ColumnValue) Row(org.opensearch.ml.common.dataframe.Row) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame) IntValue(org.opensearch.ml.common.dataframe.IntValue) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame) Before(org.junit.Before)

Example 2 with ColumnMeta

use of org.opensearch.ml.common.dataframe.ColumnMeta in project ml-commons by opensearch-project.

the class FixedInTimeRandomCutForestTest method constructRCFDataFrame.

private DataFrame constructRCFDataFrame(boolean predict) {
    ColumnMeta[] columnMetas = new ColumnMeta[] { new ColumnMeta("timestamp", ColumnType.LONG), new ColumnMeta("value", ColumnType.INTEGER) };
    DataFrame dataFrame = new DefaultDataFrame(columnMetas);
    long startTime = 1643677200000l;
    for (int i = 0; i < dataSize; i++) {
        // 1 minute interval
        long time = startTime + i * 1000 * 60;
        if (predict && i % 100 == 0) {
            dataFrame.appendRow(new Object[] { time, ThreadLocalRandom.current().nextInt(100, 1000) });
        } else {
            dataFrame.appendRow(new Object[] { time, ThreadLocalRandom.current().nextInt(1, 10) });
        }
    }
    return dataFrame;
}
Also used : ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame)

Example 3 with ColumnMeta

use of org.opensearch.ml.common.dataframe.ColumnMeta in project ml-commons by opensearch-project.

the class FixedInTimeRandomCutForest method process.

private List<Map<String, Object>> process(DataFrame dataFrame, ThresholdedRandomCutForest forest) {
    List<Double> pointList = new ArrayList<>();
    ColumnMeta[] columnMetas = dataFrame.columnMetas();
    List<Map<String, Object>> predictResult = new ArrayList<>();
    for (int rowNum = 0; rowNum < dataFrame.size(); rowNum++) {
        Row row = dataFrame.getRow(rowNum);
        long timestamp = -1;
        for (int i = 0; i < columnMetas.length; i++) {
            ColumnMeta columnMeta = columnMetas[i];
            ColumnValue value = row.getValue(i);
            // TODO: sort dataframe by time field with asc order. Currently consider the date already sorted by time.
            if (timeField != null && timeField.equals(columnMeta.getName())) {
                ColumnType columnType = columnMeta.getColumnType();
                if (columnType == ColumnType.LONG) {
                    timestamp = value.longValue();
                } else if (columnType == ColumnType.STRING) {
                    try {
                        timestamp = simpleDateFormat.parse(value.stringValue()).getTime();
                    } catch (ParseException e) {
                        log.error("Failed to parse timestamp " + value.stringValue(), e);
                        throw new MLValidationException("Failed to parse timestamp " + value.stringValue());
                    }
                } else {
                    throw new MLValidationException("Wrong data type of time field. Should use LONG or STRING, but got " + columnType);
                }
            } else {
                pointList.add(value.doubleValue());
            }
        }
        double[] point = pointList.stream().mapToDouble(d -> d).toArray();
        pointList.clear();
        Map<String, Object> result = new HashMap<>();
        AnomalyDescriptor process = forest.process(point, timestamp);
        result.put(timeField, timestamp);
        result.put("score", process.getRCFScore());
        result.put("anomaly_grade", process.getAnomalyGrade());
        predictResult.add(result);
    }
    return predictResult;
}
Also used : MLOutput(org.opensearch.ml.common.parameter.MLOutput) Precision(com.amazon.randomcutforest.config.Precision) ThresholdedRandomCutForestMapper(com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper) SimpleDateFormat(java.text.SimpleDateFormat) MLValidationException(org.opensearch.ml.common.exception.MLValidationException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FunctionName(org.opensearch.ml.common.parameter.FunctionName) Map(java.util.Map) MLAlgoParams(org.opensearch.ml.common.parameter.MLAlgoParams) FitRCFParams(org.opensearch.ml.common.parameter.FitRCFParams) DataFrameBuilder(org.opensearch.ml.common.dataframe.DataFrameBuilder) ParseException(java.text.ParseException) DateFormat(java.text.DateFormat) Row(org.opensearch.ml.common.dataframe.Row) ColumnValue(org.opensearch.ml.common.dataframe.ColumnValue) TimeZone(java.util.TimeZone) MLPredictionOutput(org.opensearch.ml.common.parameter.MLPredictionOutput) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) Function(org.opensearch.ml.engine.annotation.Function) ThresholdedRandomCutForestState(com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestState) List(java.util.List) ColumnType(org.opensearch.ml.common.dataframe.ColumnType) Model(org.opensearch.ml.common.parameter.Model) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) ModelSerDeSer(org.opensearch.ml.engine.utils.ModelSerDeSer) Log4j2(lombok.extern.log4j.Log4j2) Optional(java.util.Optional) ForestMode(com.amazon.randomcutforest.config.ForestMode) TrainAndPredictable(org.opensearch.ml.engine.TrainAndPredictable) ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) ColumnType(org.opensearch.ml.common.dataframe.ColumnType) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) MLValidationException(org.opensearch.ml.common.exception.MLValidationException) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) ColumnValue(org.opensearch.ml.common.dataframe.ColumnValue) Row(org.opensearch.ml.common.dataframe.Row) ParseException(java.text.ParseException) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with ColumnMeta

use of org.opensearch.ml.common.dataframe.ColumnMeta in project ml-commons by opensearch-project.

the class AnomalyDetectionLibSVMTest method constructDataFrame.

private DataFrame constructDataFrame(Dataset<Event> data, boolean training, List<Event.EventType> labels) {
    Iterator<Example<Event>> iterator = data.iterator();
    List<ColumnMeta> columns = null;
    DataFrame dataFrame = null;
    while (iterator.hasNext()) {
        Example<Event> example = iterator.next();
        if (columns == null) {
            columns = new ArrayList<>();
            List<ColumnValue> columnValues = new ArrayList<>();
            for (Feature feature : example) {
                columns.add(new ColumnMeta(feature.getName(), ColumnType.DOUBLE));
                columnValues.add(new DoubleValue(feature.getValue()));
            }
            ColumnMeta[] columnMetas = columns.toArray(new ColumnMeta[columns.size()]);
            dataFrame = new DefaultDataFrame(columnMetas);
            addRow(columnValues, training, example, dataFrame, labels);
        } else {
            List<ColumnValue> columnValues = new ArrayList<>();
            for (Feature feature : example) {
                columnValues.add(new DoubleValue(feature.getValue()));
            }
            addRow(columnValues, training, example, dataFrame, labels);
        }
    }
    return dataFrame;
}
Also used : ArrayList(java.util.ArrayList) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame) Feature(org.tribuo.Feature) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame) ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) DoubleValue(org.opensearch.ml.common.dataframe.DoubleValue) Example(org.tribuo.Example) Event(org.tribuo.anomaly.Event) ColumnValue(org.opensearch.ml.common.dataframe.ColumnValue)

Example 5 with ColumnMeta

use of org.opensearch.ml.common.dataframe.ColumnMeta in project ml-commons by opensearch-project.

the class MLInputTest method setUp.

@Before
public void setUp() throws Exception {
    final ColumnMeta[] columnMetas = new ColumnMeta[] { new ColumnMeta("test", ColumnType.DOUBLE) };
    List<Row> rows = new ArrayList<>();
    rows.add(new Row(new ColumnValue[] { new DoubleValue(1.0) }));
    rows.add(new Row(new ColumnValue[] { new DoubleValue(2.0) }));
    rows.add(new Row(new ColumnValue[] { new DoubleValue(3.0) }));
    DataFrame dataFrame = new DefaultDataFrame(columnMetas, rows);
    input = MLInput.builder().algorithm(algorithm).parameters(LinearRegressionParams.builder().learningRate(0.1).build()).inputDataset(DataFrameInputDataset.builder().dataFrame(dataFrame).build()).build();
}
Also used : ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) DoubleValue(org.opensearch.ml.common.dataframe.DoubleValue) ArrayList(java.util.ArrayList) ColumnValue(org.opensearch.ml.common.dataframe.ColumnValue) Row(org.opensearch.ml.common.dataframe.Row) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame) Before(org.junit.Before)

Aggregations

ColumnMeta (org.opensearch.ml.common.dataframe.ColumnMeta)9 DataFrame (org.opensearch.ml.common.dataframe.DataFrame)9 DefaultDataFrame (org.opensearch.ml.common.dataframe.DefaultDataFrame)6 ArrayList (java.util.ArrayList)5 ColumnValue (org.opensearch.ml.common.dataframe.ColumnValue)5 Row (org.opensearch.ml.common.dataframe.Row)4 HashMap (java.util.HashMap)2 List (java.util.List)2 Map (java.util.Map)2 Optional (java.util.Optional)2 Log4j2 (lombok.extern.log4j.Log4j2)2 Before (org.junit.Before)2 DataFrameBuilder (org.opensearch.ml.common.dataframe.DataFrameBuilder)2 DoubleValue (org.opensearch.ml.common.dataframe.DoubleValue)2 FunctionName (org.opensearch.ml.common.parameter.FunctionName)2 MLAlgoParams (org.opensearch.ml.common.parameter.MLAlgoParams)2 MLOutput (org.opensearch.ml.common.parameter.MLOutput)2 MLPredictionOutput (org.opensearch.ml.common.parameter.MLPredictionOutput)2 Model (org.opensearch.ml.common.parameter.Model)2 TrainAndPredictable (org.opensearch.ml.engine.TrainAndPredictable)2