Search in sources :

Example 1 with ADStatsNodeResponse

use of org.opensearch.ad.transport.ADStatsNodeResponse in project anomaly-detection by opensearch-project.

the class ADTaskManagerTests method setupTaskSlots.

private void setupTaskSlots(int node1UsedTaskSlots, int node1AssignedTaskSLots, int node2UsedTaskSlots, int node2AssignedTaskSLots) {
    doAnswer(invocation -> {
        ActionListener<ADStatsNodesResponse> listener = invocation.getArgument(2);
        listener.onResponse(new ADStatsNodesResponse(new ClusterName(randomAlphaOfLength(5)), ImmutableList.of(new ADStatsNodeResponse(node1, ImmutableMap.of(InternalStatNames.AD_USED_BATCH_TASK_SLOT_COUNT.getName(), node1UsedTaskSlots, InternalStatNames.AD_DETECTOR_ASSIGNED_BATCH_TASK_SLOT_COUNT.getName(), node1AssignedTaskSLots)), new ADStatsNodeResponse(node2, ImmutableMap.of(InternalStatNames.AD_USED_BATCH_TASK_SLOT_COUNT.getName(), node2UsedTaskSlots, InternalStatNames.AD_DETECTOR_ASSIGNED_BATCH_TASK_SLOT_COUNT.getName(), node2AssignedTaskSLots))), ImmutableList.of()));
        return null;
    }).when(client).execute(any(), any(), any());
}
Also used : ADStatsNodesResponse(org.opensearch.ad.transport.ADStatsNodesResponse) ADStatsNodeResponse(org.opensearch.ad.transport.ADStatsNodeResponse) ClusterName(org.opensearch.cluster.ClusterName)

Example 2 with ADStatsNodeResponse

use of org.opensearch.ad.transport.ADStatsNodeResponse in project anomaly-detection by opensearch-project.

the class ADStatsResponseTests method testToXContent.

@Test
public void testToXContent() throws IOException {
    ADStatsResponse adStatsResponse = new ADStatsResponse();
    Map<String, Object> testClusterStats = new HashMap<>();
    testClusterStats.put("test_stat", 1);
    adStatsResponse.setClusterStats(testClusterStats);
    List<ADStatsNodeResponse> responses = Collections.emptyList();
    List<FailedNodeException> failures = Collections.emptyList();
    ADStatsNodesResponse adStatsNodesResponse = new ADStatsNodesResponse(ClusterName.DEFAULT, responses, failures);
    adStatsResponse.setADStatsNodesResponse(adStatsNodesResponse);
    XContentBuilder builder = XContentFactory.jsonBuilder();
    adStatsResponse.toXContent(builder);
    XContentParser parser = createParser(builder);
    assertEquals(1, parser.map().get("test_stat"));
}
Also used : ADStatsNodesResponse(org.opensearch.ad.transport.ADStatsNodesResponse) HashMap(java.util.HashMap) ADStatsNodeResponse(org.opensearch.ad.transport.ADStatsNodeResponse) FailedNodeException(org.opensearch.action.FailedNodeException) XContentBuilder(org.opensearch.common.xcontent.XContentBuilder) XContentParser(org.opensearch.common.xcontent.XContentParser) Test(org.junit.Test)

Example 3 with ADStatsNodeResponse

use of org.opensearch.ad.transport.ADStatsNodeResponse in project anomaly-detection by opensearch-project.

the class ADStatsResponseTests method testGetAndSetADStatsNodesResponse.

@Test
public void testGetAndSetADStatsNodesResponse() {
    ADStatsResponse adStatsResponse = new ADStatsResponse();
    List<ADStatsNodeResponse> responses = Collections.emptyList();
    List<FailedNodeException> failures = Collections.emptyList();
    ADStatsNodesResponse adStatsNodesResponse = new ADStatsNodesResponse(ClusterName.DEFAULT, responses, failures);
    adStatsResponse.setADStatsNodesResponse(adStatsNodesResponse);
    assertEquals(adStatsNodesResponse, adStatsResponse.getADStatsNodesResponse());
}
Also used : ADStatsNodesResponse(org.opensearch.ad.transport.ADStatsNodesResponse) ADStatsNodeResponse(org.opensearch.ad.transport.ADStatsNodeResponse) FailedNodeException(org.opensearch.action.FailedNodeException) Test(org.junit.Test)

Example 4 with ADStatsNodeResponse

use of org.opensearch.ad.transport.ADStatsNodeResponse in project anomaly-detection by opensearch-project.

the class ADTaskManager method checkTaskSlots.

/**
 * Check available task slots before start historical analysis and scale task lane.
 * This check will be done on lead node which will gather detector task slots of all
 * data nodes and calculate how many task slots available.
 *
 * @param adTask AD task
 * @param detector detector
 * @param detectionDateRange detection date range
 * @param user user
 * @param afterCheckAction target task action to run after task slot checking
 * @param transportService transport service
 * @param listener action listener
 */
public void checkTaskSlots(ADTask adTask, AnomalyDetector detector, DetectionDateRange detectionDateRange, User user, ADTaskAction afterCheckAction, TransportService transportService, ActionListener<AnomalyDetectorJobResponse> listener) {
    String detectorId = detector.getDetectorId();
    logger.debug("Start checking task slots for detector: {}, task action: {}", detectorId, afterCheckAction);
    if (!checkingTaskSlot.tryAcquire()) {
        logger.info("Can't acquire checking task slot semaphore for detector {}", detectorId);
        listener.onFailure(new OpenSearchStatusException("Too many historical analysis requests in short time. Please retry later.", RestStatus.FORBIDDEN));
        return;
    }
    ActionListener<AnomalyDetectorJobResponse> wrappedActionListener = ActionListener.runAfter(listener, () -> {
        checkingTaskSlot.release(1);
        logger.debug("Release checking task slot semaphore on lead node for detector {}", detectorId);
    });
    hashRing.getNodesWithSameLocalAdVersion(nodes -> {
        int maxAdTaskSlots = nodes.length * maxAdBatchTaskPerNode;
        ADStatsRequest adStatsRequest = new ADStatsRequest(nodes);
        adStatsRequest.addAll(ImmutableSet.of(AD_USED_BATCH_TASK_SLOT_COUNT.getName(), AD_DETECTOR_ASSIGNED_BATCH_TASK_SLOT_COUNT.getName()));
        client.execute(ADStatsNodesAction.INSTANCE, adStatsRequest, ActionListener.wrap(adStatsResponse -> {
            // Total entity tasks running on worker nodes
            int totalUsedTaskSlots = 0;
            // Total assigned task slots on coordinating nodes
            int totalAssignedTaskSlots = 0;
            for (ADStatsNodeResponse response : adStatsResponse.getNodes()) {
                totalUsedTaskSlots += (int) response.getStatsMap().get(AD_USED_BATCH_TASK_SLOT_COUNT.getName());
                totalAssignedTaskSlots += (int) response.getStatsMap().get(AD_DETECTOR_ASSIGNED_BATCH_TASK_SLOT_COUNT.getName());
            }
            logger.info("Current total used task slots is {}, total detector assigned task slots is {} when start historical " + "analysis for detector {}", totalUsedTaskSlots, totalAssignedTaskSlots, detectorId);
            // In happy case, totalAssignedTaskSlots >= totalUsedTaskSlots. If some coordinating node left, then we can't
            // get detector task slots cached on it, so it's possible that totalAssignedTaskSlots < totalUsedTaskSlots.
            int currentUsedTaskSlots = Math.max(totalUsedTaskSlots, totalAssignedTaskSlots);
            if (currentUsedTaskSlots >= maxAdTaskSlots) {
                wrappedActionListener.onFailure(new OpenSearchStatusException("No available task slot", RestStatus.BAD_REQUEST));
                return;
            }
            int availableAdTaskSlots = maxAdTaskSlots - currentUsedTaskSlots;
            logger.info("Current available task slots is {} for historical analysis of detector {}", availableAdTaskSlots, detectorId);
            if (ADTaskAction.SCALE_ENTITY_TASK_SLOTS == afterCheckAction) {
                forwardToCoordinatingNode(adTask, detector, detectionDateRange, user, afterCheckAction, transportService, wrappedActionListener, availableAdTaskSlots);
                return;
            }
            // It takes long time to check top entities especially for multi-category HC. Tested with
            // 1.8 billion docs for multi-category HC, it took more than 20 seconds and caused timeout.
            // By removing top entity check, it took about 200ms to return. So just remove it to make
            // sure REST API can return quickly.
            // We may assign more task slots. For example, cluster has 4 data nodes, each node can run 2
            // batch tasks, so the available task slot number is 8. If max running entities per HC is 4,
            // then we will assign 4 tasks slots to this HC detector (4 is less than 8). The data index
            // only has 2 entities. So we assign 2 more task slots than actual need. But it's ok as we
            // will auto tune task slot when historical analysis task starts.
            int approvedTaskSlots = detector.isMultientityDetector() ? Math.min(maxRunningEntitiesPerDetector, availableAdTaskSlots) : 1;
            forwardToCoordinatingNode(adTask, detector, detectionDateRange, user, afterCheckAction, transportService, wrappedActionListener, approvedTaskSlots);
        }, exception -> {
            logger.error("Failed to get node's task stats for detector " + detectorId, exception);
            wrappedActionListener.onFailure(exception);
        }));
    }, wrappedActionListener);
}
Also used : PARENT_TASK_ID_FIELD(org.opensearch.ad.model.ADTask.PARENT_TASK_ID_FIELD) IndexResponse(org.opensearch.action.index.IndexResponse) HashRing(org.opensearch.ad.cluster.HashRing) Version(org.opensearch.Version) FAIL_TO_FIND_DETECTOR_MSG(org.opensearch.ad.constant.CommonErrorMessages.FAIL_TO_FIND_DETECTOR_MSG) BulkAction(org.opensearch.action.bulk.BulkAction) LimitExceededException(org.opensearch.ad.common.exception.LimitExceededException) ExceptionUtil.getShardsFailure(org.opensearch.ad.util.ExceptionUtil.getShardsFailure) ADTaskState(org.opensearch.ad.model.ADTaskState) AnomalyDetectorFunction(org.opensearch.ad.rest.handler.AnomalyDetectorFunction) STOPPED_BY_FIELD(org.opensearch.ad.model.ADTask.STOPPED_BY_FIELD) WriteRequest(org.opensearch.action.support.WriteRequest) ANOMALY_DETECTOR_JOB_INDEX(org.opensearch.ad.model.AnomalyDetectorJob.ANOMALY_DETECTOR_JOB_INDEX) MAX_OLD_AD_TASK_DOCS_PER_DETECTOR(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_OLD_AD_TASK_DOCS_PER_DETECTOR) Map(java.util.Map) ForwardADTaskAction(org.opensearch.ad.transport.ForwardADTaskAction) ActionListener(org.opensearch.action.ActionListener) DETECTOR_IS_RUNNING(org.opensearch.ad.constant.CommonErrorMessages.DETECTOR_IS_RUNNING) ADStatsNodeResponse(org.opensearch.ad.transport.ADStatsNodeResponse) DeleteRequest(org.opensearch.action.delete.DeleteRequest) MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS) Client(org.opensearch.client.Client) UpdateByQueryAction(org.opensearch.index.reindex.UpdateByQueryAction) AD_BATCH_TASK_THREAD_POOL_NAME(org.opensearch.ad.AnomalyDetectorPlugin.AD_BATCH_TASK_THREAD_POOL_NAME) ADTaskProfileRequest(org.opensearch.ad.transport.ADTaskProfileRequest) TimeValue(org.opensearch.common.unit.TimeValue) RestHandlerUtils.createXContentParserFromRegistry(org.opensearch.ad.util.RestHandlerUtils.createXContentParserFromRegistry) SearchHit(org.opensearch.search.SearchHit) TransportRequestOptions(org.opensearch.transport.TransportRequestOptions) Set(java.util.Set) ExceptionsHelper(org.opensearch.ExceptionsHelper) Settings(org.opensearch.common.settings.Settings) NO_ELIGIBLE_NODE_TO_RUN_DETECTOR(org.opensearch.ad.constant.CommonErrorMessages.NO_ELIGIBLE_NODE_TO_RUN_DETECTOR) IS_LATEST_FIELD(org.opensearch.ad.model.ADTask.IS_LATEST_FIELD) DETECTION_STATE_INDEX(org.opensearch.ad.constant.CommonName.DETECTION_STATE_INDEX) ScoreMode(org.apache.lucene.search.join.ScoreMode) TransportService(org.opensearch.transport.TransportService) Logger(org.apache.logging.log4j.Logger) CAN_NOT_FIND_LATEST_TASK(org.opensearch.ad.constant.CommonErrorMessages.CAN_NOT_FIND_LATEST_TASK) ADEntityTaskProfile(org.opensearch.ad.model.ADEntityTaskProfile) ActionListenerResponseHandler(org.opensearch.action.ActionListenerResponseHandler) TermsQueryBuilder(org.opensearch.index.query.TermsQueryBuilder) EXECUTION_END_TIME_FIELD(org.opensearch.ad.model.ADTask.EXECUTION_END_TIME_FIELD) UpdateRequest(org.opensearch.action.update.UpdateRequest) XContentType(org.opensearch.common.xcontent.XContentType) BoolQueryBuilder(org.opensearch.index.query.BoolQueryBuilder) BATCH_TASK_PIECE_INTERVAL_SECONDS(org.opensearch.ad.settings.AnomalyDetectorSettings.BATCH_TASK_PIECE_INTERVAL_SECONDS) MAX_BATCH_TASK_PER_NODE(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_BATCH_TASK_PER_NODE) UpdateResponse(org.opensearch.action.update.UpdateResponse) ThreadPool(org.opensearch.threadpool.ThreadPool) ADTaskType.taskTypeToString(org.opensearch.ad.model.ADTaskType.taskTypeToString) ADTaskProfile(org.opensearch.ad.model.ADTaskProfile) ArrayList(java.util.ArrayList) XCONTENT_WITH_TYPE(org.opensearch.ad.util.RestHandlerUtils.XCONTENT_WITH_TYPE) HC_DETECTOR_TASK_IS_UPDATING(org.opensearch.ad.constant.CommonErrorMessages.HC_DETECTOR_TASK_IS_UPDATING) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) ADCancelTaskAction(org.opensearch.ad.transport.ADCancelTaskAction) BiConsumer(java.util.function.BiConsumer) DeleteResponse(org.opensearch.action.delete.DeleteResponse) SearchRequest(org.opensearch.action.search.SearchRequest) INIT_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.INIT_PROGRESS_FIELD) QueryBuilders(org.opensearch.index.query.QueryBuilders) ADStatsNodesAction(org.opensearch.ad.transport.ADStatsNodesAction) IOException(java.io.IOException) TotalHits(org.apache.lucene.search.TotalHits) XContentBuilder(org.opensearch.common.xcontent.XContentBuilder) ChronoUnit(java.time.temporal.ChronoUnit) NamedXContentRegistry(org.opensearch.common.xcontent.NamedXContentRegistry) ClusterService(org.opensearch.cluster.service.ClusterService) ExceptionUtil.getErrorMessage(org.opensearch.ad.util.ExceptionUtil.getErrorMessage) ADTaskAction(org.opensearch.ad.model.ADTaskAction) DeleteByQueryRequest(org.opensearch.index.reindex.DeleteByQueryRequest) AD_DETECTOR_ASSIGNED_BATCH_TASK_SLOT_COUNT(org.opensearch.ad.stats.InternalStatNames.AD_DETECTOR_ASSIGNED_BATCH_TASK_SLOT_COUNT) DeleteByQueryAction(org.opensearch.index.reindex.DeleteByQueryAction) COORDINATING_NODE_FIELD(org.opensearch.ad.model.ADTask.COORDINATING_NODE_FIELD) DELETE_AD_RESULT_WHEN_DELETE_DETECTOR(org.opensearch.ad.settings.AnomalyDetectorSettings.DELETE_AD_RESULT_WHEN_DELETE_DETECTOR) ResourceNotFoundException(org.opensearch.ad.common.exception.ResourceNotFoundException) MAX_OLD_AD_TASK_DOCS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_OLD_AD_TASK_DOCS) ADBatchAnomalyResultAction(org.opensearch.ad.transport.ADBatchAnomalyResultAction) ToXContent(org.opensearch.common.xcontent.ToXContent) OpenSearchStatusException(org.opensearch.OpenSearchStatusException) BulkRequest(org.opensearch.action.bulk.BulkRequest) ERROR_FIELD(org.opensearch.ad.model.ADTask.ERROR_FIELD) EXECUTION_START_TIME_FIELD(org.opensearch.ad.model.ADTask.EXECUTION_START_TIME_FIELD) ParseUtils.isNullOrEmpty(org.opensearch.ad.util.ParseUtils.isNullOrEmpty) ADTaskCancelledException(org.opensearch.ad.common.exception.ADTaskCancelledException) ADStatsRequest(org.opensearch.ad.transport.ADStatsRequest) ThreadContext(org.opensearch.common.util.concurrent.ThreadContext) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) XContentParser(org.opensearch.common.xcontent.XContentParser) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) IndexAnomalyDetectorJobActionHandler(org.opensearch.ad.rest.handler.IndexAnomalyDetectorJobActionHandler) DETECTOR_ID_FIELD(org.opensearch.ad.model.ADTask.DETECTOR_ID_FIELD) Locale(java.util.Locale) BulkItemResponse(org.opensearch.action.bulk.BulkItemResponse) XContentFactory(org.opensearch.common.xcontent.XContentFactory) REALTIME_TASK_TYPES(org.opensearch.ad.model.ADTaskType.REALTIME_TASK_TYPES) NestedQueryBuilder(org.opensearch.index.query.NestedQueryBuilder) TASK_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.TASK_PROGRESS_FIELD) DetectorProfile(org.opensearch.ad.model.DetectorProfile) ImmutableSet(com.google.common.collect.ImmutableSet) ESTIMATED_MINUTES_LEFT_FIELD(org.opensearch.ad.model.ADTask.ESTIMATED_MINUTES_LEFT_FIELD) NOT_ENDED_STATES(org.opensearch.ad.model.ADTaskState.NOT_ENDED_STATES) ImmutableMap(com.google.common.collect.ImmutableMap) Script(org.opensearch.script.Script) LoggingDeprecationHandler(org.opensearch.common.xcontent.LoggingDeprecationHandler) ADTaskType(org.opensearch.ad.model.ADTaskType) XContentParserUtils.ensureExpectedToken(org.opensearch.common.xcontent.XContentParserUtils.ensureExpectedToken) ADTaskProfileNodeResponse(org.opensearch.ad.transport.ADTaskProfileNodeResponse) LAST_UPDATE_TIME_FIELD(org.opensearch.ad.model.ADTask.LAST_UPDATE_TIME_FIELD) Instant(java.time.Instant) RestStatus(org.opensearch.rest.RestStatus) Collectors(java.util.stream.Collectors) TASK_TYPE_FIELD(org.opensearch.ad.model.ADTask.TASK_TYPE_FIELD) Objects(java.util.Objects) AnomalyDetectorJobResponse(org.opensearch.ad.transport.AnomalyDetectorJobResponse) List(java.util.List) ForwardADTaskRequest(org.opensearch.ad.transport.ForwardADTaskRequest) DuplicateTaskException(org.opensearch.ad.common.exception.DuplicateTaskException) SearchSourceBuilder(org.opensearch.search.builder.SearchSourceBuilder) CREATED(org.opensearch.action.DocWriteResponse.Result.CREATED) DetectionDateRange(org.opensearch.ad.model.DetectionDateRange) NUM_MIN_SAMPLES(org.opensearch.ad.settings.AnomalyDetectorSettings.NUM_MIN_SAMPLES) Optional(java.util.Optional) ResourceAlreadyExistsException(org.opensearch.ResourceAlreadyExistsException) ALL_HISTORICAL_TASK_TYPES(org.opensearch.ad.model.ADTaskType.ALL_HISTORICAL_TASK_TYPES) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) RestHandlerUtils(org.opensearch.ad.util.RestHandlerUtils) TASK_ID_FIELD(org.opensearch.ad.model.AnomalyResult.TASK_ID_FIELD) REQUEST_TIMEOUT(org.opensearch.ad.settings.AnomalyDetectorSettings.REQUEST_TIMEOUT) DiscoveryNodeFilterer(org.opensearch.ad.util.DiscoveryNodeFilterer) BytesReference(org.opensearch.common.bytes.BytesReference) ADTask(org.opensearch.ad.model.ADTask) HashMap(java.util.HashMap) SortOrder(org.opensearch.search.sort.SortOrder) ImmutableList(com.google.common.collect.ImmutableList) EXCEED_HISTORICAL_ANALYSIS_LIMIT(org.opensearch.ad.constant.CommonErrorMessages.EXCEED_HISTORICAL_ANALYSIS_LIMIT) ADCancelTaskRequest(org.opensearch.ad.transport.ADCancelTaskRequest) SearchResponse(org.opensearch.action.search.SearchResponse) EndRunException(org.opensearch.ad.common.exception.EndRunException) ADBatchAnomalyResultRequest(org.opensearch.ad.transport.ADBatchAnomalyResultRequest) HISTORICAL_DETECTOR_TASK_TYPES(org.opensearch.ad.model.ADTaskType.HISTORICAL_DETECTOR_TASK_TYPES) Iterator(java.util.Iterator) ALL_AD_RESULTS_INDEX_PATTERN(org.opensearch.ad.indices.AnomalyDetectionIndices.ALL_AD_RESULTS_INDEX_PATTERN) Semaphore(java.util.concurrent.Semaphore) IndexNotFoundException(org.opensearch.index.IndexNotFoundException) ANOMALY_DETECTORS_INDEX(org.opensearch.ad.model.AnomalyDetector.ANOMALY_DETECTORS_INDEX) GetRequest(org.opensearch.action.get.GetRequest) TermQueryBuilder(org.opensearch.index.query.TermQueryBuilder) AnomalyDetectionIndices(org.opensearch.ad.indices.AnomalyDetectionIndices) STATE_FIELD(org.opensearch.ad.model.ADTask.STATE_FIELD) CREATE_INDEX_NOT_ACKNOWLEDGED(org.opensearch.ad.constant.CommonErrorMessages.CREATE_INDEX_NOT_ACKNOWLEDGED) Consumer(java.util.function.Consumer) UpdateByQueryRequest(org.opensearch.index.reindex.UpdateByQueryRequest) AD_USED_BATCH_TASK_SLOT_COUNT(org.opensearch.ad.stats.InternalStatNames.AD_USED_BATCH_TASK_SLOT_COUNT) Entity(org.opensearch.ad.model.Entity) User(org.opensearch.commons.authuser.User) AnomalyDetectorJob(org.opensearch.ad.model.AnomalyDetectorJob) IndexRequest(org.opensearch.action.index.IndexRequest) LogManager(org.apache.logging.log4j.LogManager) ADTaskProfileAction(org.opensearch.ad.transport.ADTaskProfileAction) ADStatsNodeResponse(org.opensearch.ad.transport.ADStatsNodeResponse) AnomalyDetectorJobResponse(org.opensearch.ad.transport.AnomalyDetectorJobResponse) ADTaskType.taskTypeToString(org.opensearch.ad.model.ADTaskType.taskTypeToString) OpenSearchStatusException(org.opensearch.OpenSearchStatusException) ADStatsRequest(org.opensearch.ad.transport.ADStatsRequest)

Example 5 with ADStatsNodeResponse

use of org.opensearch.ad.transport.ADStatsNodeResponse in project anomaly-detection by opensearch-project.

the class ADBatchTaskRunner method dispatchTask.

private void dispatchTask(ADTask adTask, ActionListener<DiscoveryNode> listener) {
    hashRing.getNodesWithSameLocalAdVersion(dataNodes -> {
        ADStatsRequest adStatsRequest = new ADStatsRequest(dataNodes);
        adStatsRequest.addAll(ImmutableSet.of(AD_EXECUTING_BATCH_TASK_COUNT.getName(), JVM_HEAP_USAGE.getName()));
        client.execute(ADStatsNodesAction.INSTANCE, adStatsRequest, ActionListener.wrap(adStatsResponse -> {
            List<ADStatsNodeResponse> candidateNodeResponse = adStatsResponse.getNodes().stream().filter(stat -> (long) stat.getStatsMap().get(JVM_HEAP_USAGE.getName()) < DEFAULT_JVM_HEAP_USAGE_THRESHOLD).collect(Collectors.toList());
            if (candidateNodeResponse.size() == 0) {
                StringBuilder errorMessageBuilder = new StringBuilder("All nodes' memory usage exceeds limitation ").append(DEFAULT_JVM_HEAP_USAGE_THRESHOLD).append("%. ").append(NO_ELIGIBLE_NODE_TO_RUN_DETECTOR).append(adTask.getDetectorId());
                String errorMessage = errorMessageBuilder.toString();
                logger.warn(errorMessage + ", task id " + adTask.getTaskId() + ", " + adTask.getTaskType());
                listener.onFailure(new LimitExceededException(adTask.getDetectorId(), errorMessage));
                return;
            }
            candidateNodeResponse = candidateNodeResponse.stream().filter(stat -> (Long) stat.getStatsMap().get(AD_EXECUTING_BATCH_TASK_COUNT.getName()) < maxAdBatchTaskPerNode).collect(Collectors.toList());
            if (candidateNodeResponse.size() == 0) {
                StringBuilder errorMessageBuilder = new StringBuilder("All nodes' executing batch tasks exceeds limitation ").append(NO_ELIGIBLE_NODE_TO_RUN_DETECTOR).append(adTask.getDetectorId());
                String errorMessage = errorMessageBuilder.toString();
                logger.warn(errorMessage + ", task id " + adTask.getTaskId() + ", " + adTask.getTaskType());
                listener.onFailure(new LimitExceededException(adTask.getDetectorId(), errorMessage));
                return;
            }
            Optional<ADStatsNodeResponse> targetNode = candidateNodeResponse.stream().sorted((ADStatsNodeResponse r1, ADStatsNodeResponse r2) -> {
                int result = ((Long) r1.getStatsMap().get(AD_EXECUTING_BATCH_TASK_COUNT.getName())).compareTo((Long) r2.getStatsMap().get(AD_EXECUTING_BATCH_TASK_COUNT.getName()));
                if (result == 0) {
                    // JVM heap usage.
                    return ((Long) r1.getStatsMap().get(JVM_HEAP_USAGE.getName())).compareTo((Long) r2.getStatsMap().get(JVM_HEAP_USAGE.getName()));
                }
                return result;
            }).findFirst();
            listener.onResponse(targetNode.get().getNode());
        }, exception -> {
            logger.error("Failed to get node's task stats", exception);
            listener.onFailure(exception);
        }));
    }, listener);
}
Also used : AnomalyResultBulkIndexHandler(org.opensearch.ad.transport.handler.AnomalyResultBulkIndexHandler) ModelManager(org.opensearch.ad.ml.ModelManager) HashRing(org.opensearch.ad.cluster.HashRing) LimitExceededException(org.opensearch.ad.common.exception.LimitExceededException) ADTaskState(org.opensearch.ad.model.ADTaskState) AnomalyDetectorFunction(org.opensearch.ad.rest.handler.AnomalyDetectorFunction) ADIndex(org.opensearch.ad.indices.ADIndex) Map(java.util.Map) ActionListener(org.opensearch.action.ActionListener) BATCH_TASK_PIECE_SIZE(org.opensearch.ad.settings.AnomalyDetectorSettings.BATCH_TASK_PIECE_SIZE) ADStatsNodeResponse(org.opensearch.ad.transport.ADStatsNodeResponse) MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS) Client(org.opensearch.client.Client) AD_BATCH_TASK_THREAD_POOL_NAME(org.opensearch.ad.AnomalyDetectorPlugin.AD_BATCH_TASK_THREAD_POOL_NAME) TimeValue(org.opensearch.common.unit.TimeValue) TransportRequestOptions(org.opensearch.transport.TransportRequestOptions) Settings(org.opensearch.common.settings.Settings) NO_ELIGIBLE_NODE_TO_RUN_DETECTOR(org.opensearch.ad.constant.CommonErrorMessages.NO_ELIGIBLE_NODE_TO_RUN_DETECTOR) TransportService(org.opensearch.transport.TransportService) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) Logger(org.apache.logging.log4j.Logger) PriorityTracker(org.opensearch.ad.caching.PriorityTracker) ExceptionUtil(org.opensearch.ad.util.ExceptionUtil) ActionListenerResponseHandler(org.opensearch.action.ActionListenerResponseHandler) SearchFeatureDao(org.opensearch.ad.feature.SearchFeatureDao) CheckedRunnable(org.opensearch.common.CheckedRunnable) EXECUTION_END_TIME_FIELD(org.opensearch.ad.model.ADTask.EXECUTION_END_TIME_FIELD) InjectSecurity(org.opensearch.commons.InjectSecurity) BoolQueryBuilder(org.opensearch.index.query.BoolQueryBuilder) BATCH_TASK_PIECE_INTERVAL_SECONDS(org.opensearch.ad.settings.AnomalyDetectorSettings.BATCH_TASK_PIECE_INTERVAL_SECONDS) ADCircuitBreakerService(org.opensearch.ad.breaker.ADCircuitBreakerService) MAX_BATCH_TASK_PER_NODE(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_BATCH_TASK_PER_NODE) StringTerms(org.opensearch.search.aggregations.bucket.terms.StringTerms) ThreadPool(org.opensearch.threadpool.ThreadPool) EnabledSetting(org.opensearch.ad.settings.EnabledSetting) AnomalyDetectorSettings(org.opensearch.ad.settings.AnomalyDetectorSettings) ArrayList(java.util.ArrayList) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) BiConsumer(java.util.function.BiConsumer) SearchRequest(org.opensearch.action.search.SearchRequest) INIT_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.INIT_PROGRESS_FIELD) SinglePointFeatures(org.opensearch.ad.feature.SinglePointFeatures) FeatureManager(org.opensearch.ad.feature.FeatureManager) MAX_TOP_ENTITIES_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_TOP_ENTITIES_FOR_HISTORICAL_ANALYSIS) ADStatsNodesAction(org.opensearch.ad.transport.ADStatsNodesAction) AggregationBuilders(org.opensearch.search.aggregations.AggregationBuilders) CommonErrorMessages(org.opensearch.ad.constant.CommonErrorMessages) ClusterService(org.opensearch.cluster.service.ClusterService) StatNames(org.opensearch.ad.stats.StatNames) ParseUtils(org.opensearch.ad.util.ParseUtils) InternalMin(org.opensearch.search.aggregations.metrics.InternalMin) AD_EXECUTING_BATCH_TASK_COUNT(org.opensearch.ad.stats.StatNames.AD_EXECUTING_BATCH_TASK_COUNT) ResourceNotFoundException(org.opensearch.ad.common.exception.ResourceNotFoundException) ADStats(org.opensearch.ad.stats.ADStats) ParseUtils.isNullOrEmpty(org.opensearch.ad.util.ParseUtils.isNullOrEmpty) ADTaskCancelledException(org.opensearch.ad.common.exception.ADTaskCancelledException) ADStatsRequest(org.opensearch.ad.transport.ADStatsRequest) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) AggregationBuilder(org.opensearch.search.aggregations.AggregationBuilder) InternalMax(org.opensearch.search.aggregations.metrics.InternalMax) TASK_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.TASK_PROGRESS_FIELD) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ADBatchTaskRemoteExecutionAction(org.opensearch.ad.transport.ADBatchTaskRemoteExecutionAction) ADTaskType(org.opensearch.ad.model.ADTaskType) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) AnomalyResult(org.opensearch.ad.model.AnomalyResult) SearchSourceBuilder(org.opensearch.search.builder.SearchSourceBuilder) DEFAULT_JVM_HEAP_USAGE_THRESHOLD(org.opensearch.ad.breaker.MemoryCircuitBreaker.DEFAULT_JVM_HEAP_USAGE_THRESHOLD) DetectionDateRange(org.opensearch.ad.model.DetectionDateRange) NUM_MIN_SAMPLES(org.opensearch.ad.settings.AnomalyDetectorSettings.NUM_MIN_SAMPLES) Optional(java.util.Optional) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) ADTask(org.opensearch.ad.model.ADTask) FeatureData(org.opensearch.ad.model.FeatureData) HashMap(java.util.HashMap) Deque(java.util.Deque) ThreadedActionListener(org.opensearch.action.support.ThreadedActionListener) CURRENT_PIECE_FIELD(org.opensearch.ad.model.ADTask.CURRENT_PIECE_FIELD) ImmutableList(com.google.common.collect.ImmutableList) ADBatchAnomalyResultResponse(org.opensearch.ad.transport.ADBatchAnomalyResultResponse) JVM_HEAP_USAGE(org.opensearch.ad.stats.InternalStatNames.JVM_HEAP_USAGE) EndRunException(org.opensearch.ad.common.exception.EndRunException) ADBatchAnomalyResultRequest(org.opensearch.ad.transport.ADBatchAnomalyResultRequest) AGG_NAME_MAX_TIME(org.opensearch.ad.constant.CommonName.AGG_NAME_MAX_TIME) RangeQueryBuilder(org.opensearch.index.query.RangeQueryBuilder) MAX_TOP_ENTITIES_LIMIT_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_TOP_ENTITIES_LIMIT_FOR_HISTORICAL_ANALYSIS) TermQueryBuilder(org.opensearch.index.query.TermQueryBuilder) AnomalyDetectionIndices(org.opensearch.ad.indices.AnomalyDetectionIndices) STATE_FIELD(org.opensearch.ad.model.ADTask.STATE_FIELD) WORKER_NODE_FIELD(org.opensearch.ad.model.ADTask.WORKER_NODE_FIELD) Entity(org.opensearch.ad.model.Entity) TermsAggregationBuilder(org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) Clock(java.time.Clock) IntervalTimeConfiguration(org.opensearch.ad.model.IntervalTimeConfiguration) LogManager(org.apache.logging.log4j.LogManager) AGG_NAME_MIN_TIME(org.opensearch.ad.constant.CommonName.AGG_NAME_MIN_TIME) Optional(java.util.Optional) ADStatsNodeResponse(org.opensearch.ad.transport.ADStatsNodeResponse) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) LimitExceededException(org.opensearch.ad.common.exception.LimitExceededException) ADStatsRequest(org.opensearch.ad.transport.ADStatsRequest)

Aggregations

ADStatsNodeResponse (org.opensearch.ad.transport.ADStatsNodeResponse)6 HashMap (java.util.HashMap)4 ADStatsNodesResponse (org.opensearch.ad.transport.ADStatsNodesResponse)4 Test (org.junit.Test)3 FailedNodeException (org.opensearch.action.FailedNodeException)3 ImmutableList (com.google.common.collect.ImmutableList)2 ImmutableMap (com.google.common.collect.ImmutableMap)2 ImmutableSet (com.google.common.collect.ImmutableSet)2 Instant (java.time.Instant)2 ArrayList (java.util.ArrayList)2 List (java.util.List)2 Map (java.util.Map)2 Objects (java.util.Objects)2 Optional (java.util.Optional)2 BiConsumer (java.util.function.BiConsumer)2 Collectors (java.util.stream.Collectors)2 LogManager (org.apache.logging.log4j.LogManager)2 Logger (org.apache.logging.log4j.Logger)2 ActionListener (org.opensearch.action.ActionListener)2 ActionListenerResponseHandler (org.opensearch.action.ActionListenerResponseHandler)2