Search in sources :

Example 1 with ADBatchAnomalyResultResponse

use of org.opensearch.ad.transport.ADBatchAnomalyResultResponse in project anomaly-detection by opensearch-project.

the class ADBatchTaskRunner method startADBatchTaskOnWorkerNode.

/**
 * Start AD task in dedicated batch task thread pool on worker node.
 *
 * @param adTask ad task
 * @param runTaskRemotely run task remotely or not
 * @param transportService transport service
 * @param delegatedListener action listener
 */
public void startADBatchTaskOnWorkerNode(ADTask adTask, boolean runTaskRemotely, TransportService transportService, ActionListener<ADBatchAnomalyResultResponse> delegatedListener) {
    try {
        // check if cluster is eligible to run AD currently, if not eligible like
        // circuit breaker open, will throw exception.
        checkClusterState(adTask);
        threadPool.executor(AD_BATCH_TASK_THREAD_POOL_NAME).execute(() -> {
            ActionListener<String> internalListener = internalBatchTaskListener(adTask, transportService);
            try {
                executeADBatchTaskOnWorkerNode(adTask, internalListener);
            } catch (Exception e) {
                internalListener.onFailure(e);
            }
        });
        delegatedListener.onResponse(new ADBatchAnomalyResultResponse(clusterService.localNode().getId(), runTaskRemotely));
    } catch (Exception e) {
        logger.error("Fail to start AD batch task " + adTask.getTaskId(), e);
        delegatedListener.onFailure(e);
    }
}
Also used : ADBatchAnomalyResultResponse(org.opensearch.ad.transport.ADBatchAnomalyResultResponse) LimitExceededException(org.opensearch.ad.common.exception.LimitExceededException) ResourceNotFoundException(org.opensearch.ad.common.exception.ResourceNotFoundException) ADTaskCancelledException(org.opensearch.ad.common.exception.ADTaskCancelledException) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) EndRunException(org.opensearch.ad.common.exception.EndRunException)

Example 2 with ADBatchAnomalyResultResponse

use of org.opensearch.ad.transport.ADBatchAnomalyResultResponse in project anomaly-detection by opensearch-project.

the class ADBatchTaskRunner method forwardOrExecuteADTask.

/**
 * Forward AD task to work node.
 * 1. For HC detector, return directly if no more pending entity. Otherwise check if
 *    there is AD task created for this entity. If yes, just forward the entity task
 *    to worker node; otherwise, create entity task first, then forward.
 * 2. For single entity detector, set task as INIT state and forward task to worker
 *    node.
 *
 * @param adTask AD task
 * @param transportService transport service
 * @param listener action listener
 */
public void forwardOrExecuteADTask(ADTask adTask, TransportService transportService, ActionListener<ADBatchAnomalyResultResponse> listener) {
    try {
        checkIfADTaskCancelledAndCleanupCache(adTask);
        String detectorId = adTask.getDetectorId();
        AnomalyDetector detector = adTask.getDetector();
        boolean isHCDetector = detector.isMultientityDetector();
        if (isHCDetector) {
            String entityString = adTaskCacheManager.pollEntity(detectorId);
            logger.debug("Start to run entity: {} of detector {}", entityString, detectorId);
            if (entityString == null) {
                listener.onResponse(new ADBatchAnomalyResultResponse(clusterService.localNode().getId(), false));
                return;
            }
            ActionListener<Object> wrappedListener = ActionListener.wrap(r -> logger.debug("Entity task created successfully"), e -> {
                logger.error("Failed to start entity task for detector: {}, entity: {}", detectorId, entityString);
                // If fail, move the entity into pending task queue
                adTaskCacheManager.addPendingEntity(detectorId, entityString);
            });
            // This is to handle retry case. To retry entity, we need to get the old entity task created before.
            Entity entity = adTaskManager.parseEntityFromString(entityString, adTask);
            String parentTaskId = adTask.getTaskType().equals(ADTaskType.HISTORICAL_HC_ENTITY.name()) ? // For HISTORICAL_HC_ENTITY task, return its parent task id
            adTask.getParentTaskId() : // For HISTORICAL_HC_DETECTOR task, its task id is parent task id
            adTask.getTaskId();
            adTaskManager.getAndExecuteOnLatestADTask(detectorId, parentTaskId, entity, ImmutableList.of(ADTaskType.HISTORICAL_HC_ENTITY), existingEntityTask -> {
                if (existingEntityTask.isPresent()) {
                    // retry failed entity caused by limit exceed exception
                    // TODO: if task failed due to limit exceed exception in half way, resume from the break point or just clear
                    // the
                    // old AD tasks and rerun it? Currently we just support rerunning task failed due to limit exceed exception
                    // before starting.
                    ADTask adEntityTask = existingEntityTask.get();
                    logger.debug("Rerun entity task for task id: {}, error of last run: {}", adEntityTask.getTaskId(), adEntityTask.getError());
                    ActionListener<ADBatchAnomalyResultResponse> workerNodeResponseListener = workerNodeResponseListener(adEntityTask, transportService, listener);
                    forwardOrExecuteEntityTask(adEntityTask, transportService, workerNodeResponseListener);
                } else {
                    logger.info("Create entity task for entity:{}", entityString);
                    Instant now = Instant.now();
                    ADTask adEntityTask = new ADTask.Builder().detectorId(adTask.getDetectorId()).detector(detector).isLatest(true).taskType(ADTaskType.HISTORICAL_HC_ENTITY.name()).executionStartTime(now).taskProgress(0.0f).initProgress(0.0f).state(ADTaskState.INIT.name()).initProgress(0.0f).lastUpdateTime(now).startedBy(adTask.getStartedBy()).coordinatingNode(clusterService.localNode().getId()).detectionDateRange(adTask.getDetectionDateRange()).user(adTask.getUser()).entity(entity).parentTaskId(parentTaskId).build();
                    adTaskManager.createADTaskDirectly(adEntityTask, r -> {
                        adEntityTask.setTaskId(r.getId());
                        ActionListener<ADBatchAnomalyResultResponse> workerNodeResponseListener = workerNodeResponseListener(adEntityTask, transportService, listener);
                        forwardOrExecuteEntityTask(adEntityTask, transportService, workerNodeResponseListener);
                    }, wrappedListener);
                }
            }, transportService, false, wrappedListener);
        } else {
            Map<String, Object> updatedFields = new HashMap<>();
            updatedFields.put(STATE_FIELD, ADTaskState.INIT.name());
            updatedFields.put(INIT_PROGRESS_FIELD, 0.0f);
            ActionListener<ADBatchAnomalyResultResponse> workerNodeResponseListener = workerNodeResponseListener(adTask, transportService, listener);
            adTaskManager.updateADTask(adTask.getTaskId(), updatedFields, ActionListener.wrap(r -> forwardOrExecuteEntityTask(adTask, transportService, workerNodeResponseListener), e -> {
                workerNodeResponseListener.onFailure(e);
            }));
        }
    } catch (Exception e) {
        logger.error("Failed to forward or execute AD task " + adTask.getTaskId(), e);
        listener.onFailure(e);
    }
}
Also used : AnomalyResultBulkIndexHandler(org.opensearch.ad.transport.handler.AnomalyResultBulkIndexHandler) ModelManager(org.opensearch.ad.ml.ModelManager) HashRing(org.opensearch.ad.cluster.HashRing) LimitExceededException(org.opensearch.ad.common.exception.LimitExceededException) ADTaskState(org.opensearch.ad.model.ADTaskState) AnomalyDetectorFunction(org.opensearch.ad.rest.handler.AnomalyDetectorFunction) ADIndex(org.opensearch.ad.indices.ADIndex) Map(java.util.Map) ActionListener(org.opensearch.action.ActionListener) BATCH_TASK_PIECE_SIZE(org.opensearch.ad.settings.AnomalyDetectorSettings.BATCH_TASK_PIECE_SIZE) ADStatsNodeResponse(org.opensearch.ad.transport.ADStatsNodeResponse) MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS) Client(org.opensearch.client.Client) AD_BATCH_TASK_THREAD_POOL_NAME(org.opensearch.ad.AnomalyDetectorPlugin.AD_BATCH_TASK_THREAD_POOL_NAME) TimeValue(org.opensearch.common.unit.TimeValue) TransportRequestOptions(org.opensearch.transport.TransportRequestOptions) Settings(org.opensearch.common.settings.Settings) NO_ELIGIBLE_NODE_TO_RUN_DETECTOR(org.opensearch.ad.constant.CommonErrorMessages.NO_ELIGIBLE_NODE_TO_RUN_DETECTOR) TransportService(org.opensearch.transport.TransportService) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) Logger(org.apache.logging.log4j.Logger) PriorityTracker(org.opensearch.ad.caching.PriorityTracker) ExceptionUtil(org.opensearch.ad.util.ExceptionUtil) ActionListenerResponseHandler(org.opensearch.action.ActionListenerResponseHandler) SearchFeatureDao(org.opensearch.ad.feature.SearchFeatureDao) CheckedRunnable(org.opensearch.common.CheckedRunnable) EXECUTION_END_TIME_FIELD(org.opensearch.ad.model.ADTask.EXECUTION_END_TIME_FIELD) InjectSecurity(org.opensearch.commons.InjectSecurity) BoolQueryBuilder(org.opensearch.index.query.BoolQueryBuilder) BATCH_TASK_PIECE_INTERVAL_SECONDS(org.opensearch.ad.settings.AnomalyDetectorSettings.BATCH_TASK_PIECE_INTERVAL_SECONDS) ADCircuitBreakerService(org.opensearch.ad.breaker.ADCircuitBreakerService) MAX_BATCH_TASK_PER_NODE(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_BATCH_TASK_PER_NODE) StringTerms(org.opensearch.search.aggregations.bucket.terms.StringTerms) ThreadPool(org.opensearch.threadpool.ThreadPool) EnabledSetting(org.opensearch.ad.settings.EnabledSetting) AnomalyDetectorSettings(org.opensearch.ad.settings.AnomalyDetectorSettings) ArrayList(java.util.ArrayList) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) BiConsumer(java.util.function.BiConsumer) SearchRequest(org.opensearch.action.search.SearchRequest) INIT_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.INIT_PROGRESS_FIELD) SinglePointFeatures(org.opensearch.ad.feature.SinglePointFeatures) FeatureManager(org.opensearch.ad.feature.FeatureManager) MAX_TOP_ENTITIES_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_TOP_ENTITIES_FOR_HISTORICAL_ANALYSIS) ADStatsNodesAction(org.opensearch.ad.transport.ADStatsNodesAction) AggregationBuilders(org.opensearch.search.aggregations.AggregationBuilders) CommonErrorMessages(org.opensearch.ad.constant.CommonErrorMessages) ClusterService(org.opensearch.cluster.service.ClusterService) StatNames(org.opensearch.ad.stats.StatNames) ParseUtils(org.opensearch.ad.util.ParseUtils) InternalMin(org.opensearch.search.aggregations.metrics.InternalMin) AD_EXECUTING_BATCH_TASK_COUNT(org.opensearch.ad.stats.StatNames.AD_EXECUTING_BATCH_TASK_COUNT) ResourceNotFoundException(org.opensearch.ad.common.exception.ResourceNotFoundException) ADStats(org.opensearch.ad.stats.ADStats) ParseUtils.isNullOrEmpty(org.opensearch.ad.util.ParseUtils.isNullOrEmpty) ADTaskCancelledException(org.opensearch.ad.common.exception.ADTaskCancelledException) ADStatsRequest(org.opensearch.ad.transport.ADStatsRequest) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) AggregationBuilder(org.opensearch.search.aggregations.AggregationBuilder) InternalMax(org.opensearch.search.aggregations.metrics.InternalMax) TASK_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.TASK_PROGRESS_FIELD) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ADBatchTaskRemoteExecutionAction(org.opensearch.ad.transport.ADBatchTaskRemoteExecutionAction) ADTaskType(org.opensearch.ad.model.ADTaskType) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) AnomalyResult(org.opensearch.ad.model.AnomalyResult) SearchSourceBuilder(org.opensearch.search.builder.SearchSourceBuilder) DEFAULT_JVM_HEAP_USAGE_THRESHOLD(org.opensearch.ad.breaker.MemoryCircuitBreaker.DEFAULT_JVM_HEAP_USAGE_THRESHOLD) DetectionDateRange(org.opensearch.ad.model.DetectionDateRange) NUM_MIN_SAMPLES(org.opensearch.ad.settings.AnomalyDetectorSettings.NUM_MIN_SAMPLES) Optional(java.util.Optional) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) ADTask(org.opensearch.ad.model.ADTask) FeatureData(org.opensearch.ad.model.FeatureData) HashMap(java.util.HashMap) Deque(java.util.Deque) ThreadedActionListener(org.opensearch.action.support.ThreadedActionListener) CURRENT_PIECE_FIELD(org.opensearch.ad.model.ADTask.CURRENT_PIECE_FIELD) ImmutableList(com.google.common.collect.ImmutableList) ADBatchAnomalyResultResponse(org.opensearch.ad.transport.ADBatchAnomalyResultResponse) JVM_HEAP_USAGE(org.opensearch.ad.stats.InternalStatNames.JVM_HEAP_USAGE) EndRunException(org.opensearch.ad.common.exception.EndRunException) ADBatchAnomalyResultRequest(org.opensearch.ad.transport.ADBatchAnomalyResultRequest) AGG_NAME_MAX_TIME(org.opensearch.ad.constant.CommonName.AGG_NAME_MAX_TIME) RangeQueryBuilder(org.opensearch.index.query.RangeQueryBuilder) MAX_TOP_ENTITIES_LIMIT_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_TOP_ENTITIES_LIMIT_FOR_HISTORICAL_ANALYSIS) TermQueryBuilder(org.opensearch.index.query.TermQueryBuilder) AnomalyDetectionIndices(org.opensearch.ad.indices.AnomalyDetectionIndices) STATE_FIELD(org.opensearch.ad.model.ADTask.STATE_FIELD) WORKER_NODE_FIELD(org.opensearch.ad.model.ADTask.WORKER_NODE_FIELD) Entity(org.opensearch.ad.model.Entity) TermsAggregationBuilder(org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) Clock(java.time.Clock) IntervalTimeConfiguration(org.opensearch.ad.model.IntervalTimeConfiguration) LogManager(org.apache.logging.log4j.LogManager) AGG_NAME_MIN_TIME(org.opensearch.ad.constant.CommonName.AGG_NAME_MIN_TIME) Entity(org.opensearch.ad.model.Entity) HashMap(java.util.HashMap) Instant(java.time.Instant) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) LimitExceededException(org.opensearch.ad.common.exception.LimitExceededException) ResourceNotFoundException(org.opensearch.ad.common.exception.ResourceNotFoundException) ADTaskCancelledException(org.opensearch.ad.common.exception.ADTaskCancelledException) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) EndRunException(org.opensearch.ad.common.exception.EndRunException) ADBatchAnomalyResultResponse(org.opensearch.ad.transport.ADBatchAnomalyResultResponse) ADTask(org.opensearch.ad.model.ADTask)

Example 3 with ADBatchAnomalyResultResponse

use of org.opensearch.ad.transport.ADBatchAnomalyResultResponse in project anomaly-detection by opensearch-project.

the class ADBatchTaskRunner method run.

/**
 * Run AD task.
 * 1. For HC detector, will get top entities first(initialize top entities). If top
 *    entities already initialized, will execute AD task directly.
 * 2. For single entity detector, execute AD task directly.
 * @param adTask single entity or HC detector task
 * @param transportService transport service
 * @param listener action listener
 */
public void run(ADTask adTask, TransportService transportService, ActionListener<ADBatchAnomalyResultResponse> listener) {
    boolean isHCDetector = adTask.getDetector().isMultientityDetector();
    if (isHCDetector && !adTaskCacheManager.topEntityInited(adTask.getDetectorId())) {
        // Initialize top entities for HC detector
        threadPool.executor(AD_BATCH_TASK_THREAD_POOL_NAME).execute(() -> {
            ActionListener<ADBatchAnomalyResultResponse> hcDelegatedListener = getInternalHCDelegatedListener(adTask);
            ActionListener<String> topEntitiesListener = getTopEntitiesListener(adTask, transportService, hcDelegatedListener);
            try {
                getTopEntities(adTask, topEntitiesListener);
            } catch (Exception e) {
                topEntitiesListener.onFailure(e);
            }
        });
        listener.onResponse(new ADBatchAnomalyResultResponse(clusterService.localNode().getId(), false));
    } else {
        // Execute AD task for single entity detector or HC detector which top entities initialized
        forwardOrExecuteADTask(adTask, transportService, listener);
    }
}
Also used : ADBatchAnomalyResultResponse(org.opensearch.ad.transport.ADBatchAnomalyResultResponse) LimitExceededException(org.opensearch.ad.common.exception.LimitExceededException) ResourceNotFoundException(org.opensearch.ad.common.exception.ResourceNotFoundException) ADTaskCancelledException(org.opensearch.ad.common.exception.ADTaskCancelledException) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) EndRunException(org.opensearch.ad.common.exception.EndRunException)

Aggregations

ADTaskCancelledException (org.opensearch.ad.common.exception.ADTaskCancelledException)3 AnomalyDetectionException (org.opensearch.ad.common.exception.AnomalyDetectionException)3 EndRunException (org.opensearch.ad.common.exception.EndRunException)3 LimitExceededException (org.opensearch.ad.common.exception.LimitExceededException)3 ResourceNotFoundException (org.opensearch.ad.common.exception.ResourceNotFoundException)3 ADBatchAnomalyResultResponse (org.opensearch.ad.transport.ADBatchAnomalyResultResponse)3 RandomCutForest (com.amazon.randomcutforest.RandomCutForest)1 AnomalyDescriptor (com.amazon.randomcutforest.parkservices.AnomalyDescriptor)1 ThresholdedRandomCutForest (com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Clock (java.time.Clock)1 Instant (java.time.Instant)1 ArrayList (java.util.ArrayList)1 Deque (java.util.Deque)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1