Search in sources :

Example 1 with ML_TOTAL_MODEL_COUNT

use of org.opensearch.ml.stats.StatNames.ML_TOTAL_MODEL_COUNT in project ml-commons by opensearch-project.

the class MLTrainingTaskRunner method train.

private void train(MLTask mlTask, MLInput mlInput, ActionListener<MLTaskResponse> actionListener) {
    ActionListener<MLTaskResponse> listener = ActionListener.wrap(r -> actionListener.onResponse(r), e -> {
        mlStats.createCounterStatIfAbsent(failureCountStat(mlTask.getFunctionName(), ActionName.TRAIN)).increment();
        mlStats.getStat(ML_TOTAL_FAILURE_COUNT).increment();
        actionListener.onFailure(e);
    });
    try {
        // run training
        mlTaskManager.updateTaskState(mlTask.getTaskId(), MLTaskState.RUNNING, mlTask.isAsync());
        Model model = MLEngine.train(mlInput);
        mlIndicesHandler.initModelIndexIfAbsent(ActionListener.wrap(indexCreated -> {
            if (!indexCreated) {
                listener.onFailure(new RuntimeException("No response to create ML task index"));
                return;
            }
            // TODO: put the user into model for backend role based access control.
            MLModel mlModel = new MLModel(mlInput.getAlgorithm(), model);
            try (ThreadContext.StoredContext context = client.threadPool().getThreadContext().stashContext()) {
                ActionListener<IndexResponse> indexResponseListener = ActionListener.wrap(r -> {
                    log.info("Model data indexing done, result:{}, model id: {}", r.getResult(), r.getId());
                    mlStats.getStat(ML_TOTAL_MODEL_COUNT).increment();
                    mlStats.createCounterStatIfAbsent(modelCountStat(mlTask.getFunctionName())).increment();
                    String returnedTaskId = mlTask.isAsync() ? mlTask.getTaskId() : null;
                    MLTrainingOutput output = new MLTrainingOutput(r.getId(), returnedTaskId, MLTaskState.COMPLETED.name());
                    listener.onResponse(MLTaskResponse.builder().output(output).build());
                }, e -> {
                    listener.onFailure(e);
                });
                IndexRequest indexRequest = new IndexRequest(ML_MODEL_INDEX);
                indexRequest.source(mlModel.toXContent(XContentBuilder.builder(XContentType.JSON.xContent()), ToXContent.EMPTY_PARAMS));
                indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
                client.index(indexRequest, ActionListener.runBefore(indexResponseListener, () -> context.restore()));
            } catch (Exception e) {
                log.error("Failed to save ML model", e);
                listener.onFailure(e);
            }
        }, e -> {
            log.error("Failed to init ML model index", e);
            listener.onFailure(e);
        }));
    } catch (Exception e) {
        // todo need to specify what exception
        log.error("Failed to train " + mlInput.getAlgorithm(), e);
        listener.onFailure(e);
    }
}
Also used : IndexResponse(org.opensearch.action.index.IndexResponse) ToXContent(org.opensearch.common.xcontent.ToXContent) ThreadPool(org.opensearch.threadpool.ThreadPool) StatNames.modelCountStat(org.opensearch.ml.stats.StatNames.modelCountStat) MLTaskState(org.opensearch.ml.common.parameter.MLTaskState) MLInput(org.opensearch.ml.common.parameter.MLInput) MLInputDatasetHandler(org.opensearch.ml.indices.MLInputDatasetHandler) ThreadedActionListener(org.opensearch.action.support.ThreadedActionListener) ThreadContext(org.opensearch.common.util.concurrent.ThreadContext) MLTask(org.opensearch.ml.common.parameter.MLTask) ML_EXECUTING_TASK_COUNT(org.opensearch.ml.stats.StatNames.ML_EXECUTING_TASK_COUNT) WriteRequest(org.opensearch.action.support.WriteRequest) ActionListener(org.opensearch.action.ActionListener) MLModel(org.opensearch.ml.common.parameter.MLModel) MLIndicesHandler(org.opensearch.ml.indices.MLIndicesHandler) ML_TOTAL_MODEL_COUNT(org.opensearch.ml.stats.StatNames.ML_TOTAL_MODEL_COUNT) MLTaskResponse(org.opensearch.ml.common.transport.MLTaskResponse) MLInputDataType(org.opensearch.ml.common.dataset.MLInputDataType) Client(org.opensearch.client.Client) MLStats(org.opensearch.ml.stats.MLStats) ActionName(org.opensearch.ml.stats.ActionName) DataFrame(org.opensearch.ml.common.dataframe.DataFrame) UUID(java.util.UUID) Instant(java.time.Instant) TransportService(org.opensearch.transport.TransportService) StatNames.requestCountStat(org.opensearch.ml.stats.StatNames.requestCountStat) MLTrainingTaskAction(org.opensearch.ml.common.transport.training.MLTrainingTaskAction) XContentBuilder(org.opensearch.common.xcontent.XContentBuilder) MLEngine(org.opensearch.ml.engine.MLEngine) StatNames.failureCountStat(org.opensearch.ml.stats.StatNames.failureCountStat) MLTrainingOutput(org.opensearch.ml.common.parameter.MLTrainingOutput) ML_MODEL_INDEX(org.opensearch.ml.indices.MLIndicesHandler.ML_MODEL_INDEX) Model(org.opensearch.ml.common.parameter.Model) ML_TOTAL_FAILURE_COUNT(org.opensearch.ml.stats.StatNames.ML_TOTAL_FAILURE_COUNT) MLTaskType(org.opensearch.ml.common.parameter.MLTaskType) MLCircuitBreakerService(org.opensearch.ml.common.breaker.MLCircuitBreakerService) Log4j2(lombok.extern.log4j.Log4j2) ActionListenerResponseHandler(org.opensearch.action.ActionListenerResponseHandler) ClusterService(org.opensearch.cluster.service.ClusterService) TASK_THREAD_POOL(org.opensearch.ml.plugin.MachineLearningPlugin.TASK_THREAD_POOL) ML_TOTAL_REQUEST_COUNT(org.opensearch.ml.stats.StatNames.ML_TOTAL_REQUEST_COUNT) XContentType(org.opensearch.common.xcontent.XContentType) IndexRequest(org.opensearch.action.index.IndexRequest) DataFrameInputDataset(org.opensearch.ml.common.dataset.DataFrameInputDataset) MLTrainingTaskRequest(org.opensearch.ml.common.transport.training.MLTrainingTaskRequest) MLTaskResponse(org.opensearch.ml.common.transport.MLTaskResponse) MLTrainingOutput(org.opensearch.ml.common.parameter.MLTrainingOutput) ThreadedActionListener(org.opensearch.action.support.ThreadedActionListener) ActionListener(org.opensearch.action.ActionListener) MLModel(org.opensearch.ml.common.parameter.MLModel) Model(org.opensearch.ml.common.parameter.Model) MLModel(org.opensearch.ml.common.parameter.MLModel) IndexRequest(org.opensearch.action.index.IndexRequest)

Aggregations

Instant (java.time.Instant)1 UUID (java.util.UUID)1 Log4j2 (lombok.extern.log4j.Log4j2)1 ActionListener (org.opensearch.action.ActionListener)1 ActionListenerResponseHandler (org.opensearch.action.ActionListenerResponseHandler)1 IndexRequest (org.opensearch.action.index.IndexRequest)1 IndexResponse (org.opensearch.action.index.IndexResponse)1 ThreadedActionListener (org.opensearch.action.support.ThreadedActionListener)1 WriteRequest (org.opensearch.action.support.WriteRequest)1 Client (org.opensearch.client.Client)1 ClusterService (org.opensearch.cluster.service.ClusterService)1 ThreadContext (org.opensearch.common.util.concurrent.ThreadContext)1 ToXContent (org.opensearch.common.xcontent.ToXContent)1 XContentBuilder (org.opensearch.common.xcontent.XContentBuilder)1 XContentType (org.opensearch.common.xcontent.XContentType)1 MLCircuitBreakerService (org.opensearch.ml.common.breaker.MLCircuitBreakerService)1 DataFrame (org.opensearch.ml.common.dataframe.DataFrame)1 DataFrameInputDataset (org.opensearch.ml.common.dataset.DataFrameInputDataset)1 MLInputDataType (org.opensearch.ml.common.dataset.MLInputDataType)1 MLInput (org.opensearch.ml.common.parameter.MLInput)1