Search in sources :

Example 11 with DataFrame

use of org.opensearch.ml.common.dataframe.DataFrame in project ml-commons by opensearch-project.

the class FixedInTimeRandomCutForest method process.

private List<Map<String, Object>> process(DataFrame dataFrame, ThresholdedRandomCutForest forest) {
    List<Double> pointList = new ArrayList<>();
    ColumnMeta[] columnMetas = dataFrame.columnMetas();
    List<Map<String, Object>> predictResult = new ArrayList<>();
    for (int rowNum = 0; rowNum < dataFrame.size(); rowNum++) {
        Row row = dataFrame.getRow(rowNum);
        long timestamp = -1;
        for (int i = 0; i < columnMetas.length; i++) {
            ColumnMeta columnMeta = columnMetas[i];
            ColumnValue value = row.getValue(i);
            // TODO: sort dataframe by time field with asc order. Currently consider the date already sorted by time.
            if (timeField != null && timeField.equals(columnMeta.getName())) {
                ColumnType columnType = columnMeta.getColumnType();
                if (columnType == ColumnType.LONG) {
                    timestamp = value.longValue();
                } else if (columnType == ColumnType.STRING) {
                    try {
                        timestamp = simpleDateFormat.parse(value.stringValue()).getTime();
                    } catch (ParseException e) {
                        log.error("Failed to parse timestamp " + value.stringValue(), e);
                        throw new MLValidationException("Failed to parse timestamp " + value.stringValue());
                    }
                } else {
                    throw new MLValidationException("Wrong data type of time field. Should use LONG or STRING, but got " + columnType);
                }
            } else {
                pointList.add(value.doubleValue());
            }
        }
        double[] point = pointList.stream().mapToDouble(d -> d).toArray();
        pointList.clear();
        Map<String, Object> result = new HashMap<>();
        AnomalyDescriptor process = forest.process(point, timestamp);
        result.put(timeField, timestamp);
        result.put("score", process.getRCFScore());
        result.put("anomaly_grade", process.getAnomalyGrade());
        predictResult.add(result);
    }
    return predictResult;
}
Also used : MLOutput(org.opensearch.ml.common.parameter.MLOutput) Precision(com.amazon.randomcutforest.config.Precision) ThresholdedRandomCutForestMapper(com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper) SimpleDateFormat(java.text.SimpleDateFormat) MLValidationException(org.opensearch.ml.common.exception.MLValidationException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) FunctionName(org.opensearch.ml.common.parameter.FunctionName) Map(java.util.Map) MLAlgoParams(org.opensearch.ml.common.parameter.MLAlgoParams) FitRCFParams(org.opensearch.ml.common.parameter.FitRCFParams) DataFrameBuilder(org.opensearch.ml.common.dataframe.DataFrameBuilder) ParseException(java.text.ParseException) DateFormat(java.text.DateFormat) Row(org.opensearch.ml.common.dataframe.Row) ColumnValue(org.opensearch.ml.common.dataframe.ColumnValue) TimeZone(java.util.TimeZone) MLPredictionOutput(org.opensearch.ml.common.parameter.MLPredictionOutput) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) Function(org.opensearch.ml.engine.annotation.Function) ThresholdedRandomCutForestState(com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestState) List(java.util.List) ColumnType(org.opensearch.ml.common.dataframe.ColumnType) Model(org.opensearch.ml.common.parameter.Model) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) ModelSerDeSer(org.opensearch.ml.engine.utils.ModelSerDeSer) Log4j2(lombok.extern.log4j.Log4j2) Optional(java.util.Optional) ForestMode(com.amazon.randomcutforest.config.ForestMode) TrainAndPredictable(org.opensearch.ml.engine.TrainAndPredictable) ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) ColumnType(org.opensearch.ml.common.dataframe.ColumnType) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) MLValidationException(org.opensearch.ml.common.exception.MLValidationException) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) ColumnValue(org.opensearch.ml.common.dataframe.ColumnValue) Row(org.opensearch.ml.common.dataframe.Row) ParseException(java.text.ParseException) HashMap(java.util.HashMap) Map(java.util.Map)

Example 12 with DataFrame

use of org.opensearch.ml.common.dataframe.DataFrame in project ml-commons by opensearch-project.

the class MLEngineTest method trainKMeansModel.

private Model trainKMeansModel() {
    KMeansParams parameters = KMeansParams.builder().centroids(2).iterations(10).distanceType(KMeansParams.DistanceType.EUCLIDEAN).build();
    DataFrame trainDataFrame = constructKMeansDataFrame(100);
    MLInputDataset inputDataset = DataFrameInputDataset.builder().dataFrame(trainDataFrame).build();
    Input mlInput = MLInput.builder().algorithm(FunctionName.KMEANS).parameters(parameters).inputDataset(inputDataset).build();
    return MLEngine.train(mlInput);
}
Also used : MLInput(org.opensearch.ml.common.parameter.MLInput) Input(org.opensearch.ml.common.parameter.Input) LocalSampleCalculatorInput(org.opensearch.ml.common.parameter.LocalSampleCalculatorInput) KMeansParams(org.opensearch.ml.common.parameter.KMeansParams) MLInputDataset(org.opensearch.ml.common.dataset.MLInputDataset) LinearRegressionHelper.constructLinearRegressionPredictionDataFrame(org.opensearch.ml.engine.helper.LinearRegressionHelper.constructLinearRegressionPredictionDataFrame) KMeansHelper.constructKMeansDataFrame(org.opensearch.ml.engine.helper.KMeansHelper.constructKMeansDataFrame) LinearRegressionHelper.constructLinearRegressionTrainDataFrame(org.opensearch.ml.engine.helper.LinearRegressionHelper.constructLinearRegressionTrainDataFrame) DataFrame(org.opensearch.ml.common.dataframe.DataFrame)

Example 13 with DataFrame

use of org.opensearch.ml.common.dataframe.DataFrame in project ml-commons by opensearch-project.

the class AnomalyDetectionLibSVMTest method constructDataFrame.

private DataFrame constructDataFrame(Dataset<Event> data, boolean training, List<Event.EventType> labels) {
    Iterator<Example<Event>> iterator = data.iterator();
    List<ColumnMeta> columns = null;
    DataFrame dataFrame = null;
    while (iterator.hasNext()) {
        Example<Event> example = iterator.next();
        if (columns == null) {
            columns = new ArrayList<>();
            List<ColumnValue> columnValues = new ArrayList<>();
            for (Feature feature : example) {
                columns.add(new ColumnMeta(feature.getName(), ColumnType.DOUBLE));
                columnValues.add(new DoubleValue(feature.getValue()));
            }
            ColumnMeta[] columnMetas = columns.toArray(new ColumnMeta[columns.size()]);
            dataFrame = new DefaultDataFrame(columnMetas);
            addRow(columnValues, training, example, dataFrame, labels);
        } else {
            List<ColumnValue> columnValues = new ArrayList<>();
            for (Feature feature : example) {
                columnValues.add(new DoubleValue(feature.getValue()));
            }
            addRow(columnValues, training, example, dataFrame, labels);
        }
    }
    return dataFrame;
}
Also used : ArrayList(java.util.ArrayList) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame) Feature(org.tribuo.Feature) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame) ColumnMeta(org.opensearch.ml.common.dataframe.ColumnMeta) DoubleValue(org.opensearch.ml.common.dataframe.DoubleValue) Example(org.tribuo.Example) Event(org.tribuo.anomaly.Event) ColumnValue(org.opensearch.ml.common.dataframe.ColumnValue)

Example 14 with DataFrame

use of org.opensearch.ml.common.dataframe.DataFrame in project ml-commons by opensearch-project.

the class MLInputDatasetHandler method parseSearchQueryInput.

/**
 * Create DataFrame based on given search query
 * @param mlInputDataset MLInputDataset
 * @param listener ActionListener
 */
public void parseSearchQueryInput(MLInputDataset mlInputDataset, ActionListener<DataFrame> listener) {
    if (!mlInputDataset.getInputDataType().equals(MLInputDataType.SEARCH_QUERY)) {
        throw new IllegalArgumentException("Input dataset is not SEARCH_QUERY type.");
    }
    SearchQueryInputDataset inputDataset = (SearchQueryInputDataset) mlInputDataset;
    SearchRequest searchRequest = new SearchRequest();
    searchRequest.source(inputDataset.getSearchSourceBuilder());
    List<String> indicesList = inputDataset.getIndices();
    String[] indices = new String[indicesList.size()];
    indices = indicesList.toArray(indices);
    searchRequest.indices(indices);
    client.search(searchRequest, ActionListener.wrap(r -> {
        if (r == null || r.getHits() == null || r.getHits().getTotalHits() == null || r.getHits().getTotalHits().value == 0) {
            listener.onFailure(new IllegalArgumentException("No document found"));
            return;
        }
        SearchHits hits = r.getHits();
        List<Map<String, Object>> input = new ArrayList<>();
        SearchHit[] searchHits = hits.getHits();
        for (SearchHit hit : searchHits) {
            input.add(hit.getSourceAsMap());
        }
        DataFrame dataFrame = DataFrameBuilder.load(input);
        listener.onResponse(dataFrame);
        return;
    }, e -> {
        log.error("Failed to search" + e);
        listener.onFailure(e);
    }));
    return;
}
Also used : FieldDefaults(lombok.experimental.FieldDefaults) Client(org.opensearch.client.Client) SearchHit(org.opensearch.search.SearchHit) RequiredArgsConstructor(lombok.RequiredArgsConstructor) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) SearchHits(org.opensearch.search.SearchHits) ArrayList(java.util.ArrayList) List(java.util.List) AccessLevel(lombok.AccessLevel) Map(java.util.Map) Log4j2(lombok.extern.log4j.Log4j2) SearchRequest(org.opensearch.action.search.SearchRequest) ActionListener(org.opensearch.action.ActionListener) DataFrameBuilder(org.opensearch.ml.common.dataframe.DataFrameBuilder) MLInputDataset(org.opensearch.ml.common.dataset.MLInputDataset) SearchQueryInputDataset(org.opensearch.ml.common.dataset.SearchQueryInputDataset) DataFrameInputDataset(org.opensearch.ml.common.dataset.DataFrameInputDataset) MLInputDataType(org.opensearch.ml.common.dataset.MLInputDataType) SearchRequest(org.opensearch.action.search.SearchRequest) SearchQueryInputDataset(org.opensearch.ml.common.dataset.SearchQueryInputDataset) SearchHit(org.opensearch.search.SearchHit) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) ArrayList(java.util.ArrayList) List(java.util.List) SearchHits(org.opensearch.search.SearchHits)

Example 15 with DataFrame

use of org.opensearch.ml.common.dataframe.DataFrame in project ml-commons by opensearch-project.

the class MLInput method parse.

public static MLInput parse(XContentParser parser, String inputAlgoName) throws IOException {
    String algorithmName = inputAlgoName.toUpperCase(Locale.ROOT);
    FunctionName algorithm = FunctionName.from(algorithmName);
    MLAlgoParams mlParameters = null;
    SearchSourceBuilder searchSourceBuilder = null;
    List<String> sourceIndices = new ArrayList<>();
    DataFrame dataFrame = null;
    ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser);
    while (parser.nextToken() != XContentParser.Token.END_OBJECT) {
        String fieldName = parser.currentName();
        parser.nextToken();
        switch(fieldName) {
            case ML_PARAMETERS_FIELD:
                mlParameters = parser.namedObject(MLAlgoParams.class, algorithmName, null);
                break;
            case INPUT_INDEX_FIELD:
                ensureExpectedToken(XContentParser.Token.START_ARRAY, parser.currentToken(), parser);
                while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
                    sourceIndices.add(parser.text());
                }
                break;
            case INPUT_QUERY_FIELD:
                ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser);
                searchSourceBuilder = SearchSourceBuilder.fromXContent(parser, false);
                break;
            case INPUT_DATA_FIELD:
                dataFrame = DefaultDataFrame.parse(parser);
            default:
                parser.skipChildren();
                break;
        }
    }
    return new MLInput(algorithm, mlParameters, searchSourceBuilder, sourceIndices, dataFrame, null);
}
Also used : ArrayList(java.util.ArrayList) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) DefaultDataFrame(org.opensearch.ml.common.dataframe.DefaultDataFrame) SearchSourceBuilder(org.opensearch.search.builder.SearchSourceBuilder)

Aggregations

DataFrame (org.opensearch.ml.common.dataframe.DataFrame)34 ColumnMeta (org.opensearch.ml.common.dataframe.ColumnMeta)10 DefaultDataFrame (org.opensearch.ml.common.dataframe.DefaultDataFrame)10 MLPredictionOutput (org.opensearch.ml.common.parameter.MLPredictionOutput)10 MLInput (org.opensearch.ml.common.parameter.MLInput)9 ArrayList (java.util.ArrayList)8 Test (org.junit.Test)8 Model (org.opensearch.ml.common.parameter.Model)8 Row (org.opensearch.ml.common.dataframe.Row)7 DataFrameInputDataset (org.opensearch.ml.common.dataset.DataFrameInputDataset)7 MLInputDataset (org.opensearch.ml.common.dataset.MLInputDataset)7 KMeansHelper.constructKMeansDataFrame (org.opensearch.ml.engine.helper.KMeansHelper.constructKMeansDataFrame)7 HashMap (java.util.HashMap)6 ColumnValue (org.opensearch.ml.common.dataframe.ColumnValue)6 LinearRegressionHelper.constructLinearRegressionPredictionDataFrame (org.opensearch.ml.engine.helper.LinearRegressionHelper.constructLinearRegressionPredictionDataFrame)5 LinearRegressionHelper.constructLinearRegressionTrainDataFrame (org.opensearch.ml.engine.helper.LinearRegressionHelper.constructLinearRegressionTrainDataFrame)5 List (java.util.List)4 Before (org.junit.Before)4 Input (org.opensearch.ml.common.parameter.Input)4 LocalSampleCalculatorInput (org.opensearch.ml.common.parameter.LocalSampleCalculatorInput)4