Search in sources :

Example 1 with ERROR_FIELD

use of org.opensearch.ad.model.ADTask.ERROR_FIELD in project anomaly-detection by opensearch-project.

the class ADTaskManager method setHCDetectorTaskDone.

/**
 * Set state for HC detector level task when all entities done.
 *
 * The state could be FINISHED,FAILED or STOPPED.
 * 1. If input task state is FINISHED, will check FINISHED entity task count. If
 * there is no FINISHED entity task, will set HC detector level task as FAILED; otherwise
 * set as FINISHED.
 * 2. If input task state is not FINISHED, will set HC detector level task's state as the same.
 *
 * @param adTask AD task
 * @param state AD task state
 * @param listener action listener
 */
public void setHCDetectorTaskDone(ADTask adTask, ADTaskState state, ActionListener<AnomalyDetectorJobResponse> listener) {
    String detectorId = adTask.getDetectorId();
    String taskId = adTask.isEntityTask() ? adTask.getParentTaskId() : adTask.getTaskId();
    String detectorTaskId = adTask.getDetectorLevelTaskId();
    ActionListener<UpdateResponse> wrappedListener = ActionListener.wrap(response -> {
        logger.info("Historical HC detector done with state: {}. Remove from cache, detector id:{}", state.name(), detectorId);
        adTaskCacheManager.removeHistoricalTaskCache(detectorId);
    }, e -> {
        // Will reset task state when get detector with task or maintain tasks in hourly cron.
        if (e instanceof LimitExceededException && e.getMessage().contains(HC_DETECTOR_TASK_IS_UPDATING)) {
            logger.warn("HC task is updating, skip this update for task: " + taskId);
        } else {
            logger.error("Failed to update task: " + taskId, e);
        }
        adTaskCacheManager.removeHistoricalTaskCache(detectorId);
    });
    // wait for 2 seconds to acquire updating HC detector task semaphore
    long timeoutInMillis = 2000;
    if (state == ADTaskState.FINISHED) {
        this.countEntityTasksByState(detectorTaskId, ImmutableList.of(ADTaskState.FINISHED), ActionListener.wrap(r -> {
            logger.info("number of finished entity tasks: {}, for detector {}", r, adTask.getDetectorId());
            // Set task as FAILED if no finished entity task; otherwise set as FINISHED
            ADTaskState hcDetectorTaskState = r == 0 ? ADTaskState.FAILED : ADTaskState.FINISHED;
            // execute in AD batch task thread pool in case waiting for semaphore waste any shared OpenSearch thread pool
            threadPool.executor(AD_BATCH_TASK_THREAD_POOL_NAME).execute(() -> {
                updateADHCDetectorTask(detectorId, taskId, ImmutableMap.of(STATE_FIELD, hcDetectorTaskState.name(), TASK_PROGRESS_FIELD, 1.0, EXECUTION_END_TIME_FIELD, Instant.now().toEpochMilli()), timeoutInMillis, wrappedListener);
            });
        }, e -> {
            logger.error("Failed to get finished entity tasks", e);
            String errorMessage = getErrorMessage(e);
            threadPool.executor(AD_BATCH_TASK_THREAD_POOL_NAME).execute(() -> {
                updateADHCDetectorTask(detectorId, taskId, ImmutableMap.of(STATE_FIELD, // set as FAILED if fail to get finished entity tasks.
                ADTaskState.FAILED.name(), TASK_PROGRESS_FIELD, 1.0, ERROR_FIELD, errorMessage, EXECUTION_END_TIME_FIELD, Instant.now().toEpochMilli()), timeoutInMillis, wrappedListener);
            });
        }));
    } else {
        threadPool.executor(AD_BATCH_TASK_THREAD_POOL_NAME).execute(() -> {
            updateADHCDetectorTask(detectorId, taskId, ImmutableMap.of(STATE_FIELD, state.name(), ERROR_FIELD, adTask.getError(), EXECUTION_END_TIME_FIELD, Instant.now().toEpochMilli()), timeoutInMillis, wrappedListener);
        });
    }
    listener.onResponse(new AnomalyDetectorJobResponse(taskId, 0, 0, 0, RestStatus.OK));
}
Also used : PARENT_TASK_ID_FIELD(org.opensearch.ad.model.ADTask.PARENT_TASK_ID_FIELD) IndexResponse(org.opensearch.action.index.IndexResponse) HashRing(org.opensearch.ad.cluster.HashRing) Version(org.opensearch.Version) FAIL_TO_FIND_DETECTOR_MSG(org.opensearch.ad.constant.CommonErrorMessages.FAIL_TO_FIND_DETECTOR_MSG) BulkAction(org.opensearch.action.bulk.BulkAction) LimitExceededException(org.opensearch.ad.common.exception.LimitExceededException) ExceptionUtil.getShardsFailure(org.opensearch.ad.util.ExceptionUtil.getShardsFailure) ADTaskState(org.opensearch.ad.model.ADTaskState) AnomalyDetectorFunction(org.opensearch.ad.rest.handler.AnomalyDetectorFunction) STOPPED_BY_FIELD(org.opensearch.ad.model.ADTask.STOPPED_BY_FIELD) WriteRequest(org.opensearch.action.support.WriteRequest) ANOMALY_DETECTOR_JOB_INDEX(org.opensearch.ad.model.AnomalyDetectorJob.ANOMALY_DETECTOR_JOB_INDEX) MAX_OLD_AD_TASK_DOCS_PER_DETECTOR(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_OLD_AD_TASK_DOCS_PER_DETECTOR) Map(java.util.Map) ForwardADTaskAction(org.opensearch.ad.transport.ForwardADTaskAction) ActionListener(org.opensearch.action.ActionListener) DETECTOR_IS_RUNNING(org.opensearch.ad.constant.CommonErrorMessages.DETECTOR_IS_RUNNING) ADStatsNodeResponse(org.opensearch.ad.transport.ADStatsNodeResponse) DeleteRequest(org.opensearch.action.delete.DeleteRequest) MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS) Client(org.opensearch.client.Client) UpdateByQueryAction(org.opensearch.index.reindex.UpdateByQueryAction) AD_BATCH_TASK_THREAD_POOL_NAME(org.opensearch.ad.AnomalyDetectorPlugin.AD_BATCH_TASK_THREAD_POOL_NAME) ADTaskProfileRequest(org.opensearch.ad.transport.ADTaskProfileRequest) TimeValue(org.opensearch.common.unit.TimeValue) RestHandlerUtils.createXContentParserFromRegistry(org.opensearch.ad.util.RestHandlerUtils.createXContentParserFromRegistry) SearchHit(org.opensearch.search.SearchHit) TransportRequestOptions(org.opensearch.transport.TransportRequestOptions) Set(java.util.Set) ExceptionsHelper(org.opensearch.ExceptionsHelper) Settings(org.opensearch.common.settings.Settings) NO_ELIGIBLE_NODE_TO_RUN_DETECTOR(org.opensearch.ad.constant.CommonErrorMessages.NO_ELIGIBLE_NODE_TO_RUN_DETECTOR) IS_LATEST_FIELD(org.opensearch.ad.model.ADTask.IS_LATEST_FIELD) DETECTION_STATE_INDEX(org.opensearch.ad.constant.CommonName.DETECTION_STATE_INDEX) ScoreMode(org.apache.lucene.search.join.ScoreMode) TransportService(org.opensearch.transport.TransportService) Logger(org.apache.logging.log4j.Logger) CAN_NOT_FIND_LATEST_TASK(org.opensearch.ad.constant.CommonErrorMessages.CAN_NOT_FIND_LATEST_TASK) ADEntityTaskProfile(org.opensearch.ad.model.ADEntityTaskProfile) ActionListenerResponseHandler(org.opensearch.action.ActionListenerResponseHandler) TermsQueryBuilder(org.opensearch.index.query.TermsQueryBuilder) EXECUTION_END_TIME_FIELD(org.opensearch.ad.model.ADTask.EXECUTION_END_TIME_FIELD) UpdateRequest(org.opensearch.action.update.UpdateRequest) XContentType(org.opensearch.common.xcontent.XContentType) BoolQueryBuilder(org.opensearch.index.query.BoolQueryBuilder) BATCH_TASK_PIECE_INTERVAL_SECONDS(org.opensearch.ad.settings.AnomalyDetectorSettings.BATCH_TASK_PIECE_INTERVAL_SECONDS) MAX_BATCH_TASK_PER_NODE(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_BATCH_TASK_PER_NODE) UpdateResponse(org.opensearch.action.update.UpdateResponse) ThreadPool(org.opensearch.threadpool.ThreadPool) ADTaskType.taskTypeToString(org.opensearch.ad.model.ADTaskType.taskTypeToString) ADTaskProfile(org.opensearch.ad.model.ADTaskProfile) ArrayList(java.util.ArrayList) XCONTENT_WITH_TYPE(org.opensearch.ad.util.RestHandlerUtils.XCONTENT_WITH_TYPE) HC_DETECTOR_TASK_IS_UPDATING(org.opensearch.ad.constant.CommonErrorMessages.HC_DETECTOR_TASK_IS_UPDATING) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) ADCancelTaskAction(org.opensearch.ad.transport.ADCancelTaskAction) BiConsumer(java.util.function.BiConsumer) DeleteResponse(org.opensearch.action.delete.DeleteResponse) SearchRequest(org.opensearch.action.search.SearchRequest) INIT_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.INIT_PROGRESS_FIELD) QueryBuilders(org.opensearch.index.query.QueryBuilders) ADStatsNodesAction(org.opensearch.ad.transport.ADStatsNodesAction) IOException(java.io.IOException) TotalHits(org.apache.lucene.search.TotalHits) XContentBuilder(org.opensearch.common.xcontent.XContentBuilder) ChronoUnit(java.time.temporal.ChronoUnit) NamedXContentRegistry(org.opensearch.common.xcontent.NamedXContentRegistry) ClusterService(org.opensearch.cluster.service.ClusterService) ExceptionUtil.getErrorMessage(org.opensearch.ad.util.ExceptionUtil.getErrorMessage) ADTaskAction(org.opensearch.ad.model.ADTaskAction) DeleteByQueryRequest(org.opensearch.index.reindex.DeleteByQueryRequest) AD_DETECTOR_ASSIGNED_BATCH_TASK_SLOT_COUNT(org.opensearch.ad.stats.InternalStatNames.AD_DETECTOR_ASSIGNED_BATCH_TASK_SLOT_COUNT) DeleteByQueryAction(org.opensearch.index.reindex.DeleteByQueryAction) COORDINATING_NODE_FIELD(org.opensearch.ad.model.ADTask.COORDINATING_NODE_FIELD) DELETE_AD_RESULT_WHEN_DELETE_DETECTOR(org.opensearch.ad.settings.AnomalyDetectorSettings.DELETE_AD_RESULT_WHEN_DELETE_DETECTOR) ResourceNotFoundException(org.opensearch.ad.common.exception.ResourceNotFoundException) MAX_OLD_AD_TASK_DOCS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_OLD_AD_TASK_DOCS) ADBatchAnomalyResultAction(org.opensearch.ad.transport.ADBatchAnomalyResultAction) ToXContent(org.opensearch.common.xcontent.ToXContent) OpenSearchStatusException(org.opensearch.OpenSearchStatusException) BulkRequest(org.opensearch.action.bulk.BulkRequest) ERROR_FIELD(org.opensearch.ad.model.ADTask.ERROR_FIELD) EXECUTION_START_TIME_FIELD(org.opensearch.ad.model.ADTask.EXECUTION_START_TIME_FIELD) ParseUtils.isNullOrEmpty(org.opensearch.ad.util.ParseUtils.isNullOrEmpty) ADTaskCancelledException(org.opensearch.ad.common.exception.ADTaskCancelledException) ADStatsRequest(org.opensearch.ad.transport.ADStatsRequest) ThreadContext(org.opensearch.common.util.concurrent.ThreadContext) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) XContentParser(org.opensearch.common.xcontent.XContentParser) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) IndexAnomalyDetectorJobActionHandler(org.opensearch.ad.rest.handler.IndexAnomalyDetectorJobActionHandler) DETECTOR_ID_FIELD(org.opensearch.ad.model.ADTask.DETECTOR_ID_FIELD) Locale(java.util.Locale) BulkItemResponse(org.opensearch.action.bulk.BulkItemResponse) XContentFactory(org.opensearch.common.xcontent.XContentFactory) REALTIME_TASK_TYPES(org.opensearch.ad.model.ADTaskType.REALTIME_TASK_TYPES) NestedQueryBuilder(org.opensearch.index.query.NestedQueryBuilder) TASK_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.TASK_PROGRESS_FIELD) DetectorProfile(org.opensearch.ad.model.DetectorProfile) ImmutableSet(com.google.common.collect.ImmutableSet) ESTIMATED_MINUTES_LEFT_FIELD(org.opensearch.ad.model.ADTask.ESTIMATED_MINUTES_LEFT_FIELD) NOT_ENDED_STATES(org.opensearch.ad.model.ADTaskState.NOT_ENDED_STATES) ImmutableMap(com.google.common.collect.ImmutableMap) Script(org.opensearch.script.Script) LoggingDeprecationHandler(org.opensearch.common.xcontent.LoggingDeprecationHandler) ADTaskType(org.opensearch.ad.model.ADTaskType) XContentParserUtils.ensureExpectedToken(org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken) ADTaskProfileNodeResponse(org.opensearch.ad.transport.ADTaskProfileNodeResponse) LAST_UPDATE_TIME_FIELD(org.opensearch.ad.model.ADTask.LAST_UPDATE_TIME_FIELD) Instant(java.time.Instant) RestStatus(org.opensearch.rest.RestStatus) Collectors(java.util.stream.Collectors) TASK_TYPE_FIELD(org.opensearch.ad.model.ADTask.TASK_TYPE_FIELD) Objects(java.util.Objects) AnomalyDetectorJobResponse(org.opensearch.ad.transport.AnomalyDetectorJobResponse) List(java.util.List) ForwardADTaskRequest(org.opensearch.ad.transport.ForwardADTaskRequest) DuplicateTaskException(org.opensearch.ad.common.exception.DuplicateTaskException) SearchSourceBuilder(org.opensearch.search.builder.SearchSourceBuilder) CREATED(org.opensearch.action.DocWriteResponse.Result.CREATED) DetectionDateRange(org.opensearch.ad.model.DetectionDateRange) NUM_MIN_SAMPLES(org.opensearch.ad.settings.AnomalyDetectorSettings.NUM_MIN_SAMPLES) Optional(java.util.Optional) ResourceAlreadyExistsException(org.opensearch.ResourceAlreadyExistsException) ALL_HISTORICAL_TASK_TYPES(org.opensearch.ad.model.ADTaskType.ALL_HISTORICAL_TASK_TYPES) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) RestHandlerUtils(org.opensearch.ad.util.RestHandlerUtils) TASK_ID_FIELD(org.opensearch.ad.model.AnomalyResult.TASK_ID_FIELD) REQUEST_TIMEOUT(org.opensearch.ad.settings.AnomalyDetectorSettings.REQUEST_TIMEOUT) DiscoveryNodeFilterer(org.opensearch.ad.util.DiscoveryNodeFilterer) BytesReference(org.opensearch.common.bytes.BytesReference) ADTask(org.opensearch.ad.model.ADTask) HashMap(java.util.HashMap) SortOrder(org.opensearch.search.sort.SortOrder) ImmutableList(com.google.common.collect.ImmutableList) EXCEED_HISTORICAL_ANALYSIS_LIMIT(org.opensearch.ad.constant.CommonErrorMessages.EXCEED_HISTORICAL_ANALYSIS_LIMIT) ADCancelTaskRequest(org.opensearch.ad.transport.ADCancelTaskRequest) SearchResponse(org.opensearch.action.search.SearchResponse) EndRunException(org.opensearch.ad.common.exception.EndRunException) ADBatchAnomalyResultRequest(org.opensearch.ad.transport.ADBatchAnomalyResultRequest) HISTORICAL_DETECTOR_TASK_TYPES(org.opensearch.ad.model.ADTaskType.HISTORICAL_DETECTOR_TASK_TYPES) Iterator(java.util.Iterator) ALL_AD_RESULTS_INDEX_PATTERN(org.opensearch.ad.indices.AnomalyDetectionIndices.ALL_AD_RESULTS_INDEX_PATTERN) Semaphore(java.util.concurrent.Semaphore) IndexNotFoundException(org.opensearch.index.IndexNotFoundException) ANOMALY_DETECTORS_INDEX(org.opensearch.ad.model.AnomalyDetector.ANOMALY_DETECTORS_INDEX) GetRequest(org.opensearch.action.get.GetRequest) TermQueryBuilder(org.opensearch.index.query.TermQueryBuilder) AnomalyDetectionIndices(org.opensearch.ad.indices.AnomalyDetectionIndices) STATE_FIELD(org.opensearch.ad.model.ADTask.STATE_FIELD) CREATE_INDEX_NOT_ACKNOWLEDGED(org.opensearch.ad.constant.CommonErrorMessages.CREATE_INDEX_NOT_ACKNOWLEDGED) Consumer(java.util.function.Consumer) UpdateByQueryRequest(org.opensearch.index.reindex.UpdateByQueryRequest) AD_USED_BATCH_TASK_SLOT_COUNT(org.opensearch.ad.stats.InternalStatNames.AD_USED_BATCH_TASK_SLOT_COUNT) Entity(org.opensearch.ad.model.Entity) User(org.opensearch.commons.authuser.User) AnomalyDetectorJob(org.opensearch.ad.model.AnomalyDetectorJob) IndexRequest(org.opensearch.action.index.IndexRequest) LogManager(org.apache.logging.log4j.LogManager) ADTaskProfileAction(org.opensearch.ad.transport.ADTaskProfileAction) UpdateResponse(org.opensearch.action.update.UpdateResponse) AnomalyDetectorJobResponse(org.opensearch.ad.transport.AnomalyDetectorJobResponse) LimitExceededException(org.opensearch.ad.common.exception.LimitExceededException) ADTaskType.taskTypeToString(org.opensearch.ad.model.ADTaskType.taskTypeToString) ADTaskState(org.opensearch.ad.model.ADTaskState)

Example 2 with ERROR_FIELD

use of org.opensearch.ad.model.ADTask.ERROR_FIELD in project anomaly-detection by opensearch-project.

the class ADTaskManager method updateLatestRealtimeTaskOnCoordinatingNode.

/**
 * Update realtime task cache on realtime detector's coordinating node.
 *
 * @param detectorId detector id
 * @param state new state
 * @param rcfTotalUpdates rcf total updates
 * @param detectorIntervalInMinutes detector interval in minutes
 * @param error error
 * @param listener action listener
 */
public void updateLatestRealtimeTaskOnCoordinatingNode(String detectorId, String state, Long rcfTotalUpdates, Long detectorIntervalInMinutes, String error, ActionListener<UpdateResponse> listener) {
    Float initProgress = null;
    String newState = null;
    // calculate init progress and task state with RCF total updates
    if (detectorIntervalInMinutes != null && rcfTotalUpdates != null) {
        newState = ADTaskState.INIT.name();
        if (rcfTotalUpdates < NUM_MIN_SAMPLES) {
            initProgress = (float) rcfTotalUpdates / NUM_MIN_SAMPLES;
        } else {
            newState = ADTaskState.RUNNING.name();
            initProgress = 1.0f;
        }
    }
    // Check if new state is not null and override state calculated with rcf total updates
    if (state != null) {
        newState = state;
    }
    error = Optional.ofNullable(error).orElse("");
    if (!adTaskCacheManager.isRealtimeTaskChanged(detectorId, newState, initProgress, error)) {
        // If task not changed, no need to update, just return
        listener.onResponse(null);
        return;
    }
    Map<String, Object> updatedFields = new HashMap<>();
    updatedFields.put(COORDINATING_NODE_FIELD, clusterService.localNode().getId());
    if (initProgress != null) {
        updatedFields.put(INIT_PROGRESS_FIELD, initProgress);
        updatedFields.put(ESTIMATED_MINUTES_LEFT_FIELD, Math.max(0, NUM_MIN_SAMPLES - rcfTotalUpdates) * detectorIntervalInMinutes);
    }
    if (newState != null) {
        updatedFields.put(STATE_FIELD, newState);
    }
    if (error != null) {
        updatedFields.put(ERROR_FIELD, error);
    }
    Float finalInitProgress = initProgress;
    // Variable used in lambda expression should be final or effectively final
    String finalError = error;
    String finalNewState = newState;
    updateLatestADTask(detectorId, ADTaskType.REALTIME_TASK_TYPES, updatedFields, ActionListener.wrap(r -> {
        logger.debug("Updated latest realtime AD task successfully for detector {}", detectorId);
        adTaskCacheManager.updateRealtimeTaskCache(detectorId, finalNewState, finalInitProgress, finalError);
        listener.onResponse(r);
    }, e -> {
        logger.error("Failed to update realtime task for detector " + detectorId, e);
        listener.onFailure(e);
    }));
}
Also used : PARENT_TASK_ID_FIELD(org.opensearch.ad.model.ADTask.PARENT_TASK_ID_FIELD) IndexResponse(org.opensearch.action.index.IndexResponse) HashRing(org.opensearch.ad.cluster.HashRing) Version(org.opensearch.Version) FAIL_TO_FIND_DETECTOR_MSG(org.opensearch.ad.constant.CommonErrorMessages.FAIL_TO_FIND_DETECTOR_MSG) BulkAction(org.opensearch.action.bulk.BulkAction) LimitExceededException(org.opensearch.ad.common.exception.LimitExceededException) ExceptionUtil.getShardsFailure(org.opensearch.ad.util.ExceptionUtil.getShardsFailure) ADTaskState(org.opensearch.ad.model.ADTaskState) AnomalyDetectorFunction(org.opensearch.ad.rest.handler.AnomalyDetectorFunction) STOPPED_BY_FIELD(org.opensearch.ad.model.ADTask.STOPPED_BY_FIELD) WriteRequest(org.opensearch.action.support.WriteRequest) ANOMALY_DETECTOR_JOB_INDEX(org.opensearch.ad.model.AnomalyDetectorJob.ANOMALY_DETECTOR_JOB_INDEX) MAX_OLD_AD_TASK_DOCS_PER_DETECTOR(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_OLD_AD_TASK_DOCS_PER_DETECTOR) Map(java.util.Map) ForwardADTaskAction(org.opensearch.ad.transport.ForwardADTaskAction) ActionListener(org.opensearch.action.ActionListener) DETECTOR_IS_RUNNING(org.opensearch.ad.constant.CommonErrorMessages.DETECTOR_IS_RUNNING) ADStatsNodeResponse(org.opensearch.ad.transport.ADStatsNodeResponse) DeleteRequest(org.opensearch.action.delete.DeleteRequest) MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS) Client(org.opensearch.client.Client) UpdateByQueryAction(org.opensearch.index.reindex.UpdateByQueryAction) AD_BATCH_TASK_THREAD_POOL_NAME(org.opensearch.ad.AnomalyDetectorPlugin.AD_BATCH_TASK_THREAD_POOL_NAME) ADTaskProfileRequest(org.opensearch.ad.transport.ADTaskProfileRequest) TimeValue(org.opensearch.common.unit.TimeValue) RestHandlerUtils.createXContentParserFromRegistry(org.opensearch.ad.util.RestHandlerUtils.createXContentParserFromRegistry) SearchHit(org.opensearch.search.SearchHit) TransportRequestOptions(org.opensearch.transport.TransportRequestOptions) Set(java.util.Set) ExceptionsHelper(org.opensearch.ExceptionsHelper) Settings(org.opensearch.common.settings.Settings) NO_ELIGIBLE_NODE_TO_RUN_DETECTOR(org.opensearch.ad.constant.CommonErrorMessages.NO_ELIGIBLE_NODE_TO_RUN_DETECTOR) IS_LATEST_FIELD(org.opensearch.ad.model.ADTask.IS_LATEST_FIELD) DETECTION_STATE_INDEX(org.opensearch.ad.constant.CommonName.DETECTION_STATE_INDEX) ScoreMode(org.apache.lucene.search.join.ScoreMode) TransportService(org.opensearch.transport.TransportService) Logger(org.apache.logging.log4j.Logger) CAN_NOT_FIND_LATEST_TASK(org.opensearch.ad.constant.CommonErrorMessages.CAN_NOT_FIND_LATEST_TASK) ADEntityTaskProfile(org.opensearch.ad.model.ADEntityTaskProfile) ActionListenerResponseHandler(org.opensearch.action.ActionListenerResponseHandler) TermsQueryBuilder(org.opensearch.index.query.TermsQueryBuilder) EXECUTION_END_TIME_FIELD(org.opensearch.ad.model.ADTask.EXECUTION_END_TIME_FIELD) UpdateRequest(org.opensearch.action.update.UpdateRequest) XContentType(org.opensearch.common.xcontent.XContentType) BoolQueryBuilder(org.opensearch.index.query.BoolQueryBuilder) BATCH_TASK_PIECE_INTERVAL_SECONDS(org.opensearch.ad.settings.AnomalyDetectorSettings.BATCH_TASK_PIECE_INTERVAL_SECONDS) MAX_BATCH_TASK_PER_NODE(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_BATCH_TASK_PER_NODE) UpdateResponse(org.opensearch.action.update.UpdateResponse) ThreadPool(org.opensearch.threadpool.ThreadPool) ADTaskType.taskTypeToString(org.opensearch.ad.model.ADTaskType.taskTypeToString) ADTaskProfile(org.opensearch.ad.model.ADTaskProfile) ArrayList(java.util.ArrayList) XCONTENT_WITH_TYPE(org.opensearch.ad.util.RestHandlerUtils.XCONTENT_WITH_TYPE) HC_DETECTOR_TASK_IS_UPDATING(org.opensearch.ad.constant.CommonErrorMessages.HC_DETECTOR_TASK_IS_UPDATING) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) ADCancelTaskAction(org.opensearch.ad.transport.ADCancelTaskAction) BiConsumer(java.util.function.BiConsumer) DeleteResponse(org.opensearch.action.delete.DeleteResponse) SearchRequest(org.opensearch.action.search.SearchRequest) INIT_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.INIT_PROGRESS_FIELD) QueryBuilders(org.opensearch.index.query.QueryBuilders) ADStatsNodesAction(org.opensearch.ad.transport.ADStatsNodesAction) IOException(java.io.IOException) TotalHits(org.apache.lucene.search.TotalHits) XContentBuilder(org.opensearch.common.xcontent.XContentBuilder) ChronoUnit(java.time.temporal.ChronoUnit) NamedXContentRegistry(org.opensearch.common.xcontent.NamedXContentRegistry) ClusterService(org.opensearch.cluster.service.ClusterService) ExceptionUtil.getErrorMessage(org.opensearch.ad.util.ExceptionUtil.getErrorMessage) ADTaskAction(org.opensearch.ad.model.ADTaskAction) DeleteByQueryRequest(org.opensearch.index.reindex.DeleteByQueryRequest) AD_DETECTOR_ASSIGNED_BATCH_TASK_SLOT_COUNT(org.opensearch.ad.stats.InternalStatNames.AD_DETECTOR_ASSIGNED_BATCH_TASK_SLOT_COUNT) DeleteByQueryAction(org.opensearch.index.reindex.DeleteByQueryAction) COORDINATING_NODE_FIELD(org.opensearch.ad.model.ADTask.COORDINATING_NODE_FIELD) DELETE_AD_RESULT_WHEN_DELETE_DETECTOR(org.opensearch.ad.settings.AnomalyDetectorSettings.DELETE_AD_RESULT_WHEN_DELETE_DETECTOR) ResourceNotFoundException(org.opensearch.ad.common.exception.ResourceNotFoundException) MAX_OLD_AD_TASK_DOCS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_OLD_AD_TASK_DOCS) ADBatchAnomalyResultAction(org.opensearch.ad.transport.ADBatchAnomalyResultAction) ToXContent(org.opensearch.common.xcontent.ToXContent) OpenSearchStatusException(org.opensearch.OpenSearchStatusException) BulkRequest(org.opensearch.action.bulk.BulkRequest) ERROR_FIELD(org.opensearch.ad.model.ADTask.ERROR_FIELD) EXECUTION_START_TIME_FIELD(org.opensearch.ad.model.ADTask.EXECUTION_START_TIME_FIELD) ParseUtils.isNullOrEmpty(org.opensearch.ad.util.ParseUtils.isNullOrEmpty) ADTaskCancelledException(org.opensearch.ad.common.exception.ADTaskCancelledException) ADStatsRequest(org.opensearch.ad.transport.ADStatsRequest) ThreadContext(org.opensearch.common.util.concurrent.ThreadContext) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) XContentParser(org.opensearch.common.xcontent.XContentParser) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) IndexAnomalyDetectorJobActionHandler(org.opensearch.ad.rest.handler.IndexAnomalyDetectorJobActionHandler) DETECTOR_ID_FIELD(org.opensearch.ad.model.ADTask.DETECTOR_ID_FIELD) Locale(java.util.Locale) BulkItemResponse(org.opensearch.action.bulk.BulkItemResponse) XContentFactory(org.opensearch.common.xcontent.XContentFactory) REALTIME_TASK_TYPES(org.opensearch.ad.model.ADTaskType.REALTIME_TASK_TYPES) NestedQueryBuilder(org.opensearch.index.query.NestedQueryBuilder) TASK_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.TASK_PROGRESS_FIELD) DetectorProfile(org.opensearch.ad.model.DetectorProfile) ImmutableSet(com.google.common.collect.ImmutableSet) ESTIMATED_MINUTES_LEFT_FIELD(org.opensearch.ad.model.ADTask.ESTIMATED_MINUTES_LEFT_FIELD) NOT_ENDED_STATES(org.opensearch.ad.model.ADTaskState.NOT_ENDED_STATES) ImmutableMap(com.google.common.collect.ImmutableMap) Script(org.opensearch.script.Script) LoggingDeprecationHandler(org.opensearch.common.xcontent.LoggingDeprecationHandler) ADTaskType(org.opensearch.ad.model.ADTaskType) XContentParserUtils.ensureExpectedToken(org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken) ADTaskProfileNodeResponse(org.opensearch.ad.transport.ADTaskProfileNodeResponse) LAST_UPDATE_TIME_FIELD(org.opensearch.ad.model.ADTask.LAST_UPDATE_TIME_FIELD) Instant(java.time.Instant) RestStatus(org.opensearch.rest.RestStatus) Collectors(java.util.stream.Collectors) TASK_TYPE_FIELD(org.opensearch.ad.model.ADTask.TASK_TYPE_FIELD) Objects(java.util.Objects) AnomalyDetectorJobResponse(org.opensearch.ad.transport.AnomalyDetectorJobResponse) List(java.util.List) ForwardADTaskRequest(org.opensearch.ad.transport.ForwardADTaskRequest) DuplicateTaskException(org.opensearch.ad.common.exception.DuplicateTaskException) SearchSourceBuilder(org.opensearch.search.builder.SearchSourceBuilder) CREATED(org.opensearch.action.DocWriteResponse.Result.CREATED) DetectionDateRange(org.opensearch.ad.model.DetectionDateRange) NUM_MIN_SAMPLES(org.opensearch.ad.settings.AnomalyDetectorSettings.NUM_MIN_SAMPLES) Optional(java.util.Optional) ResourceAlreadyExistsException(org.opensearch.ResourceAlreadyExistsException) ALL_HISTORICAL_TASK_TYPES(org.opensearch.ad.model.ADTaskType.ALL_HISTORICAL_TASK_TYPES) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) RestHandlerUtils(org.opensearch.ad.util.RestHandlerUtils) TASK_ID_FIELD(org.opensearch.ad.model.AnomalyResult.TASK_ID_FIELD) REQUEST_TIMEOUT(org.opensearch.ad.settings.AnomalyDetectorSettings.REQUEST_TIMEOUT) DiscoveryNodeFilterer(org.opensearch.ad.util.DiscoveryNodeFilterer) BytesReference(org.opensearch.common.bytes.BytesReference) ADTask(org.opensearch.ad.model.ADTask) HashMap(java.util.HashMap) SortOrder(org.opensearch.search.sort.SortOrder) ImmutableList(com.google.common.collect.ImmutableList) EXCEED_HISTORICAL_ANALYSIS_LIMIT(org.opensearch.ad.constant.CommonErrorMessages.EXCEED_HISTORICAL_ANALYSIS_LIMIT) ADCancelTaskRequest(org.opensearch.ad.transport.ADCancelTaskRequest) SearchResponse(org.opensearch.action.search.SearchResponse) EndRunException(org.opensearch.ad.common.exception.EndRunException) ADBatchAnomalyResultRequest(org.opensearch.ad.transport.ADBatchAnomalyResultRequest) HISTORICAL_DETECTOR_TASK_TYPES(org.opensearch.ad.model.ADTaskType.HISTORICAL_DETECTOR_TASK_TYPES) Iterator(java.util.Iterator) ALL_AD_RESULTS_INDEX_PATTERN(org.opensearch.ad.indices.AnomalyDetectionIndices.ALL_AD_RESULTS_INDEX_PATTERN) Semaphore(java.util.concurrent.Semaphore) IndexNotFoundException(org.opensearch.index.IndexNotFoundException) ANOMALY_DETECTORS_INDEX(org.opensearch.ad.model.AnomalyDetector.ANOMALY_DETECTORS_INDEX) GetRequest(org.opensearch.action.get.GetRequest) TermQueryBuilder(org.opensearch.index.query.TermQueryBuilder) AnomalyDetectionIndices(org.opensearch.ad.indices.AnomalyDetectionIndices) STATE_FIELD(org.opensearch.ad.model.ADTask.STATE_FIELD) CREATE_INDEX_NOT_ACKNOWLEDGED(org.opensearch.ad.constant.CommonErrorMessages.CREATE_INDEX_NOT_ACKNOWLEDGED) Consumer(java.util.function.Consumer) UpdateByQueryRequest(org.opensearch.index.reindex.UpdateByQueryRequest) AD_USED_BATCH_TASK_SLOT_COUNT(org.opensearch.ad.stats.InternalStatNames.AD_USED_BATCH_TASK_SLOT_COUNT) Entity(org.opensearch.ad.model.Entity) User(org.opensearch.commons.authuser.User) AnomalyDetectorJob(org.opensearch.ad.model.AnomalyDetectorJob) IndexRequest(org.opensearch.action.index.IndexRequest) LogManager(org.apache.logging.log4j.LogManager) ADTaskProfileAction(org.opensearch.ad.transport.ADTaskProfileAction) HashMap(java.util.HashMap) ADTaskType.taskTypeToString(org.opensearch.ad.model.ADTaskType.taskTypeToString)

Example 3 with ERROR_FIELD

use of org.opensearch.ad.model.ADTask.ERROR_FIELD in project anomaly-detection by opensearch-project.

the class ForwardADTaskTransportAction method doExecute.

@Override
protected void doExecute(Task task, ForwardADTaskRequest request, ActionListener<AnomalyDetectorJobResponse> listener) {
    ADTaskAction adTaskAction = request.getAdTaskAction();
    AnomalyDetector detector = request.getDetector();
    DetectionDateRange detectionDateRange = request.getDetectionDateRange();
    String detectorId = detector.getDetectorId();
    ADTask adTask = request.getAdTask();
    User user = request.getUser();
    Integer availableTaskSlots = request.getAvailableTaskSLots();
    String entityValue = adTaskManager.convertEntityToString(adTask);
    switch(adTaskAction) {
        case APPLY_FOR_TASK_SLOTS:
            logger.debug("Received APPLY_FOR_TASK_SLOTS action for detector {}", detectorId);
            adTaskManager.checkTaskSlots(adTask, detector, detectionDateRange, user, ADTaskAction.START, transportService, listener);
            break;
        case CHECK_AVAILABLE_TASK_SLOTS:
            logger.debug("Received CHECK_AVAILABLE_TASK_SLOTS action for detector {}", detectorId);
            adTaskManager.checkTaskSlots(adTask, detector, detectionDateRange, user, ADTaskAction.SCALE_ENTITY_TASK_SLOTS, transportService, listener);
            break;
        case START:
            // Start historical analysis for detector
            logger.debug("Received START action for detector {}", detectorId);
            adTaskManager.startDetector(detector, detectionDateRange, user, transportService, ActionListener.wrap(r -> {
                adTaskCacheManager.setDetectorTaskSlots(detector.getDetectorId(), availableTaskSlots);
                listener.onResponse(r);
            }, e -> listener.onFailure(e)));
            break;
        case NEXT_ENTITY:
            logger.debug("Received NEXT_ENTITY action for detector {}, task {}", detectorId, adTask.getTaskId());
            // Run next entity for HC detector historical analysis.
            if (detector.isMultientityDetector()) {
                // AD task could be HC detector level task or entity task
                adTaskCacheManager.removeRunningEntity(detectorId, entityValue);
                if (!adTaskCacheManager.hasEntity(detectorId)) {
                    adTaskCacheManager.setDetectorTaskSlots(detectorId, 0);
                    logger.info("Historical HC detector done, will remove from cache, detector id:{}", detectorId);
                    listener.onResponse(new AnomalyDetectorJobResponse(detectorId, 0, 0, 0, RestStatus.OK));
                    ADTaskState state = !adTask.isEntityTask() && adTask.getError() != null ? ADTaskState.FAILED : ADTaskState.FINISHED;
                    adTaskManager.setHCDetectorTaskDone(adTask, state, listener);
                } else {
                    logger.debug("Run next entity for detector " + detectorId);
                    adTaskManager.runNextEntityForHCADHistorical(adTask, transportService, listener);
                    adTaskManager.updateADHCDetectorTask(detectorId, adTask.getParentTaskId(), ImmutableMap.of(STATE_FIELD, ADTaskState.RUNNING.name(), TASK_PROGRESS_FIELD, adTaskManager.hcDetectorProgress(detectorId), ERROR_FIELD, adTask.getError() != null ? adTask.getError() : ""));
                }
            } else {
                logger.warn("Can only handle HC entity task for NEXT_ENTITY action, taskId:{} , taskType:{}", adTask.getTaskId(), adTask.getTaskType());
                listener.onFailure(new IllegalArgumentException("Unsupported task"));
            }
            break;
        case PUSH_BACK_ENTITY:
            logger.debug("Received PUSH_BACK_ENTITY action for detector {}, task {}", detectorId, adTask.getTaskId());
            // Push back entity to pending entities queue and run next entity.
            if (adTask.isEntityTask()) {
                // AD task must be entity level task.
                adTaskCacheManager.removeRunningEntity(detectorId, entityValue);
                if (adTaskManager.isRetryableError(adTask.getError()) && !adTaskCacheManager.exceedRetryLimit(adTask.getDetectorId(), adTask.getTaskId())) {
                    // If retryable exception happens when run entity task, will push back entity to the end
                    // of pending entities queue, then we can retry it later.
                    adTaskCacheManager.pushBackEntity(adTask.getTaskId(), adTask.getDetectorId(), entityValue);
                } else {
                    // If exception is not retryable or exceeds retry limit, will remove this entity.
                    adTaskCacheManager.removeEntity(adTask.getDetectorId(), entityValue);
                    logger.warn("Entity task failed, task id: {}, entity: {}", adTask.getTaskId(), adTask.getEntity().toString());
                }
                if (!adTaskCacheManager.hasEntity(detectorId)) {
                    adTaskCacheManager.setDetectorTaskSlots(detectorId, 0);
                    adTaskManager.setHCDetectorTaskDone(adTask, ADTaskState.FINISHED, listener);
                } else {
                    logger.debug("scale task slots for PUSH_BACK_ENTITY, detector {} task {}", detectorId, adTask.getTaskId());
                    int taskSlots = adTaskCacheManager.scaleDownHCDetectorTaskSlots(detectorId, 1);
                    if (taskSlots == 1) {
                        logger.debug("After scale down, only 1 task slot reserved for detector {}, run next entity", detectorId);
                        adTaskManager.runNextEntityForHCADHistorical(adTask, transportService, listener);
                    }
                    listener.onResponse(new AnomalyDetectorJobResponse(adTask.getTaskId(), 0, 0, 0, RestStatus.ACCEPTED));
                }
            } else {
                logger.warn("Can only push back entity task");
                listener.onFailure(new IllegalArgumentException("Can only push back entity task"));
            }
            break;
        case SCALE_ENTITY_TASK_SLOTS:
            logger.debug("Received SCALE_ENTITY_TASK_LANE action for detector {}", detectorId);
            // Check current available task slots and scale entity task lane.
            if (availableTaskSlots != null && availableTaskSlots > 0) {
                int newSlots = Math.min(availableTaskSlots, adTaskManager.detectorTaskSlotScaleDelta(detectorId));
                if (newSlots > 0) {
                    adTaskCacheManager.setAllowedRunningEntities(detectorId, newSlots);
                    adTaskCacheManager.scaleUpDetectorTaskSlots(detectorId, newSlots);
                }
            }
            listener.onResponse(new AnomalyDetectorJobResponse(detector.getDetectorId(), 0, 0, 0, RestStatus.OK));
            break;
        case CANCEL:
            logger.debug("Received CANCEL action for detector {}", detectorId);
            // on worker node.
            if (detector.isMultientityDetector()) {
                adTaskCacheManager.clearPendingEntities(detectorId);
                adTaskCacheManager.removeRunningEntity(detectorId, entityValue);
                if (!adTaskCacheManager.hasEntity(detectorId) || !adTask.isEntityTask()) {
                    adTaskManager.setHCDetectorTaskDone(adTask, ADTaskState.STOPPED, listener);
                }
                listener.onResponse(new AnomalyDetectorJobResponse(adTask.getTaskId(), 0, 0, 0, RestStatus.OK));
            } else {
                listener.onFailure(new IllegalArgumentException("Only support cancel HC now"));
            }
            break;
        case CLEAN_STALE_RUNNING_ENTITIES:
            logger.debug("Received CLEAN_STALE_RUNNING_ENTITIES action for detector {}", detectorId);
            // Clean stale running entities of HC detector. For example, some worker node crashed or failed to send
            // entity task done message to coordinating node, then coordinating node can't remove running entity
            // from cache. We will check task profile when get task. If some entities exist in coordinating cache but
            // doesn't exist in worker node's cache, we will clean up these stale running entities on coordinating node.
            List<String> staleRunningEntities = request.getStaleRunningEntities();
            logger.debug("Clean stale running entities of task {}, staleRunningEntities: {}", adTask.getTaskId(), Arrays.toString(staleRunningEntities.toArray(new String[0])));
            for (String entity : staleRunningEntities) {
                adTaskManager.removeStaleRunningEntity(adTask, entity, transportService, listener);
            }
            listener.onResponse(new AnomalyDetectorJobResponse(adTask.getTaskId(), 0, 0, 0, RestStatus.OK));
            break;
        case CLEAN_CACHE:
            boolean historicalTask = adTask.isHistoricalTask();
            logger.debug("Received CLEAN_CACHE action for detector {}, taskId: {}, historical: {}", detectorId, adTask.getTaskId(), historicalTask);
            if (historicalTask) {
                // Don't clear task cache if still has running entity. CLEAN_STALE_RUNNING_ENTITIES will clean
                // stale running entity.
                adTaskCacheManager.removeHistoricalTaskCacheIfNoRunningEntity(detectorId);
            } else {
                adTaskCacheManager.removeRealtimeTaskCache(detectorId);
                // If hash ring changed like new node added when scale out, the realtime job coordinating node may
                // change, then we should clean up cache on old coordinating node.
                stateManager.clear(detectorId);
                featureManager.clear(detectorId);
            }
            listener.onResponse(new AnomalyDetectorJobResponse(detector.getDetectorId(), 0, 0, 0, RestStatus.OK));
            break;
        default:
            listener.onFailure(new OpenSearchStatusException("Unsupported AD task action " + adTaskAction, RestStatus.BAD_REQUEST));
            break;
    }
}
Also used : ADTaskCacheManager(org.opensearch.ad.task.ADTaskCacheManager) Arrays(java.util.Arrays) HandledTransportAction(org.opensearch.action.support.HandledTransportAction) ADTask(org.opensearch.ad.model.ADTask) OpenSearchStatusException(org.opensearch.OpenSearchStatusException) ERROR_FIELD(org.opensearch.ad.model.ADTask.ERROR_FIELD) ADTaskState(org.opensearch.ad.model.ADTaskState) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) Inject(org.opensearch.common.inject.Inject) ActionListener(org.opensearch.action.ActionListener) TASK_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.TASK_PROGRESS_FIELD) FeatureManager(org.opensearch.ad.feature.FeatureManager) ImmutableMap(com.google.common.collect.ImmutableMap) ADTaskManager(org.opensearch.ad.task.ADTaskManager) STATE_FIELD(org.opensearch.ad.model.ADTask.STATE_FIELD) Task(org.opensearch.tasks.Task) RestStatus(org.opensearch.rest.RestStatus) TransportService(org.opensearch.transport.TransportService) ActionFilters(org.opensearch.action.support.ActionFilters) List(java.util.List) Logger(org.apache.logging.log4j.Logger) NodeStateManager(org.opensearch.ad.NodeStateManager) User(org.opensearch.commons.authuser.User) DetectionDateRange(org.opensearch.ad.model.DetectionDateRange) ADTaskAction(org.opensearch.ad.model.ADTaskAction) LogManager(org.apache.logging.log4j.LogManager) User(org.opensearch.commons.authuser.User) ADTaskAction(org.opensearch.ad.model.ADTaskAction) ADTaskState(org.opensearch.ad.model.ADTaskState) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) DetectionDateRange(org.opensearch.ad.model.DetectionDateRange) ADTask(org.opensearch.ad.model.ADTask) OpenSearchStatusException(org.opensearch.OpenSearchStatusException)

Aggregations

ImmutableMap (com.google.common.collect.ImmutableMap)3 List (java.util.List)3 LogManager (org.apache.logging.log4j.LogManager)3 Logger (org.apache.logging.log4j.Logger)3 OpenSearchStatusException (org.opensearch.OpenSearchStatusException)3 ActionListener (org.opensearch.action.ActionListener)3 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 IOException (java.io.IOException)2 Instant (java.time.Instant)2 ChronoUnit (java.time.temporal.ChronoUnit)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 Iterator (java.util.Iterator)2 Locale (java.util.Locale)2 Map (java.util.Map)2 Objects (java.util.Objects)2 Optional (java.util.Optional)2 Set (java.util.Set)2 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)2