use of org.opensearch.ml.common.dataset.DataFrameInputDataset in project ml-commons by opensearch-project.
the class MLTrainAndPredictTaskRunner method trainAndPredict.
private void trainAndPredict(MLTask mlTask, DataFrame inputDataFrame, MLTrainingTaskRequest request, ActionListener<MLTaskResponse> listener) {
ActionListener<MLTaskResponse> internalListener = wrappedCleanupListener(listener, mlTask.getTaskId());
// track ML task count and add ML task into cache
mlStats.getStat(ML_EXECUTING_TASK_COUNT).increment();
mlStats.getStat(ML_TOTAL_REQUEST_COUNT).increment();
mlStats.createCounterStatIfAbsent(requestCountStat(mlTask.getFunctionName(), ActionName.TRAIN_PREDICT)).increment();
mlTaskManager.add(mlTask);
MLInput mlInput = request.getMlInput();
// run train and predict
try {
mlTaskManager.updateTaskState(mlTask.getTaskId(), MLTaskState.RUNNING, mlTask.isAsync());
MLOutput output = MLEngine.trainAndPredict(mlInput.toBuilder().inputDataset(new DataFrameInputDataset(inputDataFrame)).build());
handleAsyncMLTaskComplete(mlTask);
if (output instanceof MLPredictionOutput) {
((MLPredictionOutput) output).setStatus(MLTaskState.COMPLETED.name());
}
MLTaskResponse response = MLTaskResponse.builder().output(output).build();
log.info("Train and predict task done for algorithm: {}, task id: {}", mlTask.getFunctionName(), mlTask.getTaskId());
internalListener.onResponse(response);
} catch (Exception e) {
// todo need to specify what exception
log.error("Failed to train and predict " + mlInput.getAlgorithm(), e);
handlePredictFailure(mlTask, listener, e, true);
return;
}
}
use of org.opensearch.ml.common.dataset.DataFrameInputDataset in project ml-commons by opensearch-project.
the class MLInputDatasetHandlerTests method testSearchQueryInputDatasetWrongType.
public void testSearchQueryInputDatasetWrongType() {
expectedEx.expect(IllegalArgumentException.class);
expectedEx.expectMessage("Input dataset is not SEARCH_QUERY type.");
DataFrame testDataFrame = DataFrameBuilder.load(Collections.singletonList(new HashMap<String, Object>() {
{
put("key1", 2.0D);
}
}));
DataFrameInputDataset dataFrameInputDataset = DataFrameInputDataset.builder().dataFrame(testDataFrame).build();
mlInputDatasetHandler.parseSearchQueryInput(dataFrameInputDataset, listener);
}
use of org.opensearch.ml.common.dataset.DataFrameInputDataset in project ml-commons by opensearch-project.
the class MLInputDatasetHandlerTests method testDataFrameInputDataset.
public void testDataFrameInputDataset() {
DataFrame testDataFrame = DataFrameBuilder.load(Collections.singletonList(new HashMap<String, Object>() {
{
put("key1", 2.0D);
}
}));
DataFrameInputDataset dataFrameInputDataset = DataFrameInputDataset.builder().dataFrame(testDataFrame).build();
DataFrame result = mlInputDatasetHandler.parseDataFrameInput(dataFrameInputDataset);
Assert.assertEquals(testDataFrame, result);
}
use of org.opensearch.ml.common.dataset.DataFrameInputDataset in project ml-commons by opensearch-project.
the class MLInputTests method testParseKmeansInputDataFrame.
public void testParseKmeansInputDataFrame() throws IOException {
String query = "{\"input_data\":{\"column_metas\":[{\"name\":\"total_sum\",\"column_type\":\"DOUBLE\"},{\"name\":\"is_error\"," + "\"column_type\":\"BOOLEAN\"}],\"rows\":[{\"values\":[{\"column_type\":\"DOUBLE\",\"value\":15}," + "{\"column_type\":\"BOOLEAN\",\"value\":false}]},{\"values\":[{\"column_type\":\"DOUBLE\",\"value\":100}," + "{\"column_type\":\"BOOLEAN\",\"value\":true}]}]}}";
XContentParser parser = parser(query);
MLInput mlInput = MLInput.parse(parser, FunctionName.KMEANS.name());
DataFrameInputDataset inputDataset = (DataFrameInputDataset) mlInput.getInputDataset();
DataFrame dataFrame = inputDataset.getDataFrame();
assertEquals(2, dataFrame.columnMetas().length);
assertEquals(ColumnType.DOUBLE, dataFrame.columnMetas()[0].getColumnType());
assertEquals(ColumnType.BOOLEAN, dataFrame.columnMetas()[1].getColumnType());
assertEquals("total_sum", dataFrame.columnMetas()[0].getName());
assertEquals("is_error", dataFrame.columnMetas()[1].getName());
assertEquals(ColumnType.DOUBLE, dataFrame.getRow(0).getValue(0).columnType());
assertEquals(ColumnType.BOOLEAN, dataFrame.getRow(0).getValue(1).columnType());
assertEquals(15.0, dataFrame.getRow(0).getValue(0).getValue());
assertEquals(false, dataFrame.getRow(0).getValue(1).getValue());
}
use of org.opensearch.ml.common.dataset.DataFrameInputDataset in project ml-commons by opensearch-project.
the class MLPredictTaskRunner method predict.
private void predict(MLTask mlTask, DataFrame inputDataFrame, MLPredictionTaskRequest request, ActionListener<MLTaskResponse> listener) {
ActionListener<MLTaskResponse> internalListener = wrappedCleanupListener(listener, mlTask.getTaskId());
// track ML task count and add ML task into cache
mlStats.getStat(ML_EXECUTING_TASK_COUNT).increment();
mlStats.getStat(ML_TOTAL_REQUEST_COUNT).increment();
mlStats.createCounterStatIfAbsent(requestCountStat(mlTask.getFunctionName(), ActionName.PREDICT)).increment();
mlTaskManager.add(mlTask);
// run predict
if (request.getModelId() != null) {
// search model by model id.
try (ThreadContext.StoredContext context = threadPool.getThreadContext().stashContext()) {
MLInput mlInput = request.getMlInput();
ActionListener<GetResponse> getResponseListener = ActionListener.wrap(r -> {
if (r == null || !r.isExists()) {
internalListener.onFailure(new ResourceNotFoundException("No model found, please check the modelId."));
return;
}
Map<String, Object> source = r.getSourceAsMap();
User requestUser = getUserContext(client);
User resourceUser = User.parse((String) source.get(USER));
if (!checkUserPermissions(requestUser, resourceUser, request.getModelId())) {
// The backend roles of request user and resource user doesn't have intersection
OpenSearchException e = new OpenSearchException("User: " + requestUser.getName() + " does not have permissions to run predict by model: " + request.getModelId());
handlePredictFailure(mlTask, internalListener, e, false);
return;
}
Model model = new Model();
model.setName((String) source.get(MLModel.MODEL_NAME));
model.setVersion((Integer) source.get(MLModel.MODEL_VERSION));
byte[] decoded = Base64.getDecoder().decode((String) source.get(MLModel.MODEL_CONTENT));
model.setContent(decoded);
// run predict
mlTaskManager.updateTaskState(mlTask.getTaskId(), MLTaskState.RUNNING, mlTask.isAsync());
MLOutput output = MLEngine.predict(mlInput.toBuilder().inputDataset(new DataFrameInputDataset(inputDataFrame)).build(), model);
if (output instanceof MLPredictionOutput) {
((MLPredictionOutput) output).setStatus(MLTaskState.COMPLETED.name());
}
// Once prediction complete, reduce ML_EXECUTING_TASK_COUNT and update task state
handleAsyncMLTaskComplete(mlTask);
MLTaskResponse response = MLTaskResponse.builder().output(output).build();
internalListener.onResponse(response);
}, e -> {
log.error("Failed to predict " + mlInput.getAlgorithm() + ", modelId: " + mlTask.getModelId(), e);
handlePredictFailure(mlTask, internalListener, e, true);
});
GetRequest getRequest = new GetRequest(ML_MODEL_INDEX, mlTask.getModelId());
client.get(getRequest, ActionListener.runBefore(getResponseListener, () -> context.restore()));
} catch (Exception e) {
log.error("Failed to get model " + mlTask.getModelId(), e);
handlePredictFailure(mlTask, internalListener, e, true);
}
} else {
IllegalArgumentException e = new IllegalArgumentException("ModelId is invalid");
log.error("ModelId is invalid", e);
handlePredictFailure(mlTask, internalListener, e, false);
}
}
Aggregations