Search in sources :

Example 1 with PriorityTracker

use of org.opensearch.ad.caching.PriorityTracker in project anomaly-detection by opensearch-project.

the class ADBatchTaskRunner method getTopEntities.

/**
 * Get top entities for HC detector. Will use similar logic of realtime detector,
 * but split the whole historical detection date range into limited number of
 * buckets (1000 buckets by default). Will get top entities for each bucket, then
 * put each bucket's top entities into {@link PriorityTracker} to track top
 * entities dynamically. Once all buckets done, we can get finalized top entities
 * in {@link PriorityTracker}.
 *
 * @param adTask AD task
 * @param internalHCListener internal HC listener
 */
public void getTopEntities(ADTask adTask, ActionListener<String> internalHCListener) {
    getDateRangeOfSourceData(adTask, (dataStartTime, dataEndTime) -> {
        PriorityTracker priorityTracker = new PriorityTracker(Clock.systemUTC(), adTask.getDetector().getDetectorIntervalInSeconds(), adTask.getDetectionDateRange().getStartTime().toEpochMilli(), MAX_TOP_ENTITIES_LIMIT_FOR_HISTORICAL_ANALYSIS);
        long detectorInterval = adTask.getDetector().getDetectorIntervalInMilliseconds();
        logger.debug("start to search top entities at {}, data start time: {}, data end time: {}, interval: {}", System.currentTimeMillis(), dataStartTime, dataEndTime, detectorInterval);
        if (adTask.getDetector().isMultiCategoryDetector()) {
            searchTopEntitiesForMultiCategoryHC(adTask, priorityTracker, dataEndTime, Math.max((dataEndTime - dataStartTime) / MAX_TOP_ENTITY_SEARCH_BUCKETS, detectorInterval), dataStartTime, dataStartTime + detectorInterval, internalHCListener);
        } else {
            searchTopEntitiesForSingleCategoryHC(adTask, priorityTracker, dataEndTime, Math.max((dataEndTime - dataStartTime) / MAX_TOP_ENTITY_SEARCH_BUCKETS, detectorInterval), dataStartTime, dataStartTime + detectorInterval, internalHCListener);
        }
    }, internalHCListener);
}
Also used : PriorityTracker(org.opensearch.ad.caching.PriorityTracker)

Example 2 with PriorityTracker

use of org.opensearch.ad.caching.PriorityTracker in project anomaly-detection by opensearch-project.

the class ADBatchTaskRunner method searchTopEntitiesForSingleCategoryHC.

private void searchTopEntitiesForSingleCategoryHC(ADTask adTask, PriorityTracker priorityTracker, long detectionEndTime, long interval, long dataStartTime, long dataEndTime, ActionListener<String> internalHCListener) {
    checkIfADTaskCancelledAndCleanupCache(adTask);
    SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
    BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
    RangeQueryBuilder rangeQueryBuilder = new RangeQueryBuilder(adTask.getDetector().getTimeField()).gte(dataStartTime).lte(dataEndTime).format("epoch_millis");
    boolQueryBuilder.filter(rangeQueryBuilder);
    boolQueryBuilder.filter(adTask.getDetector().getFilterQuery());
    sourceBuilder.query(boolQueryBuilder);
    String topEntitiesAgg = "topEntities";
    AggregationBuilder aggregation = new TermsAggregationBuilder(topEntitiesAgg).field(adTask.getDetector().getCategoryField().get(0)).size(MAX_TOP_ENTITIES_LIMIT_FOR_HISTORICAL_ANALYSIS);
    sourceBuilder.aggregation(aggregation).size(0);
    SearchRequest searchRequest = new SearchRequest();
    searchRequest.source(sourceBuilder);
    searchRequest.indices(adTask.getDetector().getIndices().toArray(new String[0]));
    client.search(searchRequest, ActionListener.wrap(r -> {
        StringTerms stringTerms = r.getAggregations().get(topEntitiesAgg);
        List<StringTerms.Bucket> buckets = stringTerms.getBuckets();
        List<String> topEntities = new ArrayList<>();
        for (StringTerms.Bucket bucket : buckets) {
            String key = bucket.getKeyAsString();
            topEntities.add(key);
        }
        topEntities.forEach(e -> priorityTracker.updatePriority(e));
        if (dataEndTime < detectionEndTime) {
            searchTopEntitiesForSingleCategoryHC(adTask, priorityTracker, detectionEndTime, interval, dataEndTime, dataEndTime + interval, internalHCListener);
        } else {
            logger.debug("finish searching top entities at " + System.currentTimeMillis());
            List<String> topNEntities = priorityTracker.getTopNEntities(maxTopEntitiesPerHcDetector);
            if (topNEntities.size() == 0) {
                logger.error("There is no entity found for detector " + adTask.getDetectorId());
                internalHCListener.onFailure(new ResourceNotFoundException(adTask.getDetectorId(), "No entity found"));
                return;
            }
            adTaskCacheManager.addPendingEntities(adTask.getDetectorId(), topNEntities);
            adTaskCacheManager.setTopEntityCount(adTask.getDetectorId(), topNEntities.size());
            internalHCListener.onResponse("Get top entities done");
        }
    }, e -> {
        logger.error("Failed to get top entities for detector " + adTask.getDetectorId(), e);
        internalHCListener.onFailure(e);
    }));
}
Also used : AnomalyResultBulkIndexHandler(org.opensearch.ad.transport.handler.AnomalyResultBulkIndexHandler) ModelManager(org.opensearch.ad.ml.ModelManager) HashRing(org.opensearch.ad.cluster.HashRing) LimitExceededException(org.opensearch.ad.common.exception.LimitExceededException) ADTaskState(org.opensearch.ad.model.ADTaskState) AnomalyDetectorFunction(org.opensearch.ad.rest.handler.AnomalyDetectorFunction) ADIndex(org.opensearch.ad.indices.ADIndex) Map(java.util.Map) ActionListener(org.opensearch.action.ActionListener) BATCH_TASK_PIECE_SIZE(org.opensearch.ad.settings.AnomalyDetectorSettings.BATCH_TASK_PIECE_SIZE) ADStatsNodeResponse(org.opensearch.ad.transport.ADStatsNodeResponse) MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS) Client(org.opensearch.client.Client) AD_BATCH_TASK_THREAD_POOL_NAME(org.opensearch.ad.AnomalyDetectorPlugin.AD_BATCH_TASK_THREAD_POOL_NAME) TimeValue(org.opensearch.common.unit.TimeValue) TransportRequestOptions(org.opensearch.transport.TransportRequestOptions) Settings(org.opensearch.common.settings.Settings) NO_ELIGIBLE_NODE_TO_RUN_DETECTOR(org.opensearch.ad.constant.CommonErrorMessages.NO_ELIGIBLE_NODE_TO_RUN_DETECTOR) TransportService(org.opensearch.transport.TransportService) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) Logger(org.apache.logging.log4j.Logger) PriorityTracker(org.opensearch.ad.caching.PriorityTracker) ExceptionUtil(org.opensearch.ad.util.ExceptionUtil) ActionListenerResponseHandler(org.opensearch.action.ActionListenerResponseHandler) SearchFeatureDao(org.opensearch.ad.feature.SearchFeatureDao) CheckedRunnable(org.opensearch.common.CheckedRunnable) EXECUTION_END_TIME_FIELD(org.opensearch.ad.model.ADTask.EXECUTION_END_TIME_FIELD) InjectSecurity(org.opensearch.commons.InjectSecurity) BoolQueryBuilder(org.opensearch.index.query.BoolQueryBuilder) BATCH_TASK_PIECE_INTERVAL_SECONDS(org.opensearch.ad.settings.AnomalyDetectorSettings.BATCH_TASK_PIECE_INTERVAL_SECONDS) ADCircuitBreakerService(org.opensearch.ad.breaker.ADCircuitBreakerService) MAX_BATCH_TASK_PER_NODE(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_BATCH_TASK_PER_NODE) StringTerms(org.opensearch.search.aggregations.bucket.terms.StringTerms) ThreadPool(org.opensearch.threadpool.ThreadPool) EnabledSetting(org.opensearch.ad.settings.EnabledSetting) AnomalyDetectorSettings(org.opensearch.ad.settings.AnomalyDetectorSettings) ArrayList(java.util.ArrayList) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) BiConsumer(java.util.function.BiConsumer) SearchRequest(org.opensearch.action.search.SearchRequest) INIT_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.INIT_PROGRESS_FIELD) SinglePointFeatures(org.opensearch.ad.feature.SinglePointFeatures) FeatureManager(org.opensearch.ad.feature.FeatureManager) MAX_TOP_ENTITIES_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_TOP_ENTITIES_FOR_HISTORICAL_ANALYSIS) ADStatsNodesAction(org.opensearch.ad.transport.ADStatsNodesAction) AggregationBuilders(org.opensearch.search.aggregations.AggregationBuilders) CommonErrorMessages(org.opensearch.ad.constant.CommonErrorMessages) ClusterService(org.opensearch.cluster.service.ClusterService) StatNames(org.opensearch.ad.stats.StatNames) ParseUtils(org.opensearch.ad.util.ParseUtils) InternalMin(org.opensearch.search.aggregations.metrics.InternalMin) AD_EXECUTING_BATCH_TASK_COUNT(org.opensearch.ad.stats.StatNames.AD_EXECUTING_BATCH_TASK_COUNT) ResourceNotFoundException(org.opensearch.ad.common.exception.ResourceNotFoundException) ADStats(org.opensearch.ad.stats.ADStats) ParseUtils.isNullOrEmpty(org.opensearch.ad.util.ParseUtils.isNullOrEmpty) ADTaskCancelledException(org.opensearch.ad.common.exception.ADTaskCancelledException) ADStatsRequest(org.opensearch.ad.transport.ADStatsRequest) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) AggregationBuilder(org.opensearch.search.aggregations.AggregationBuilder) InternalMax(org.opensearch.search.aggregations.metrics.InternalMax) TASK_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.TASK_PROGRESS_FIELD) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ADBatchTaskRemoteExecutionAction(org.opensearch.ad.transport.ADBatchTaskRemoteExecutionAction) ADTaskType(org.opensearch.ad.model.ADTaskType) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) AnomalyResult(org.opensearch.ad.model.AnomalyResult) SearchSourceBuilder(org.opensearch.search.builder.SearchSourceBuilder) DEFAULT_JVM_HEAP_USAGE_THRESHOLD(org.opensearch.ad.breaker.MemoryCircuitBreaker.DEFAULT_JVM_HEAP_USAGE_THRESHOLD) DetectionDateRange(org.opensearch.ad.model.DetectionDateRange) NUM_MIN_SAMPLES(org.opensearch.ad.settings.AnomalyDetectorSettings.NUM_MIN_SAMPLES) Optional(java.util.Optional) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) ADTask(org.opensearch.ad.model.ADTask) FeatureData(org.opensearch.ad.model.FeatureData) HashMap(java.util.HashMap) Deque(java.util.Deque) ThreadedActionListener(org.opensearch.action.support.ThreadedActionListener) CURRENT_PIECE_FIELD(org.opensearch.ad.model.ADTask.CURRENT_PIECE_FIELD) ImmutableList(com.google.common.collect.ImmutableList) ADBatchAnomalyResultResponse(org.opensearch.ad.transport.ADBatchAnomalyResultResponse) JVM_HEAP_USAGE(org.opensearch.ad.stats.InternalStatNames.JVM_HEAP_USAGE) EndRunException(org.opensearch.ad.common.exception.EndRunException) ADBatchAnomalyResultRequest(org.opensearch.ad.transport.ADBatchAnomalyResultRequest) AGG_NAME_MAX_TIME(org.opensearch.ad.constant.CommonName.AGG_NAME_MAX_TIME) RangeQueryBuilder(org.opensearch.index.query.RangeQueryBuilder) MAX_TOP_ENTITIES_LIMIT_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_TOP_ENTITIES_LIMIT_FOR_HISTORICAL_ANALYSIS) TermQueryBuilder(org.opensearch.index.query.TermQueryBuilder) AnomalyDetectionIndices(org.opensearch.ad.indices.AnomalyDetectionIndices) STATE_FIELD(org.opensearch.ad.model.ADTask.STATE_FIELD) WORKER_NODE_FIELD(org.opensearch.ad.model.ADTask.WORKER_NODE_FIELD) Entity(org.opensearch.ad.model.Entity) TermsAggregationBuilder(org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) Clock(java.time.Clock) IntervalTimeConfiguration(org.opensearch.ad.model.IntervalTimeConfiguration) LogManager(org.apache.logging.log4j.LogManager) AGG_NAME_MIN_TIME(org.opensearch.ad.constant.CommonName.AGG_NAME_MIN_TIME) SearchRequest(org.opensearch.action.search.SearchRequest) AggregationBuilder(org.opensearch.search.aggregations.AggregationBuilder) TermsAggregationBuilder(org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder) RangeQueryBuilder(org.opensearch.index.query.RangeQueryBuilder) SearchSourceBuilder(org.opensearch.search.builder.SearchSourceBuilder) TermsAggregationBuilder(org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder) StringTerms(org.opensearch.search.aggregations.bucket.terms.StringTerms) BoolQueryBuilder(org.opensearch.index.query.BoolQueryBuilder) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) ResourceNotFoundException(org.opensearch.ad.common.exception.ResourceNotFoundException)

Aggregations

PriorityTracker (org.opensearch.ad.caching.PriorityTracker)2 RandomCutForest (com.amazon.randomcutforest.RandomCutForest)1 AnomalyDescriptor (com.amazon.randomcutforest.parkservices.AnomalyDescriptor)1 ThresholdedRandomCutForest (com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Clock (java.time.Clock)1 Instant (java.time.Instant)1 ArrayList (java.util.ArrayList)1 Deque (java.util.Deque)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Optional (java.util.Optional)1 BiConsumer (java.util.function.BiConsumer)1 Collectors (java.util.stream.Collectors)1 LogManager (org.apache.logging.log4j.LogManager)1 Logger (org.apache.logging.log4j.Logger)1