Search in sources :

Example 1 with AGG_NAME_MIN_TIME

use of org.opensearch.ad.constant.CommonName.AGG_NAME_MIN_TIME in project anomaly-detection by opensearch-project.

the class ADBatchTaskRunner method getDateRangeOfSourceData.

private void getDateRangeOfSourceData(ADTask adTask, BiConsumer<Long, Long> consumer, ActionListener<String> internalListener) {
    String taskId = adTask.getTaskId();
    SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().aggregation(AggregationBuilders.min(AGG_NAME_MIN_TIME).field(adTask.getDetector().getTimeField())).aggregation(AggregationBuilders.max(AGG_NAME_MAX_TIME).field(adTask.getDetector().getTimeField())).size(0);
    if (adTask.getEntity() != null && adTask.getEntity().getAttributes().size() > 0) {
        BoolQueryBuilder query = new BoolQueryBuilder();
        adTask.getEntity().getAttributes().entrySet().forEach(entity -> query.filter(new TermQueryBuilder(entity.getKey(), entity.getValue())));
        searchSourceBuilder.query(query);
    }
    SearchRequest request = new SearchRequest().indices(adTask.getDetector().getIndices().toArray(new String[0])).source(searchSourceBuilder);
    client.search(request, ActionListener.wrap(r -> {
        InternalMin minAgg = r.getAggregations().get(AGG_NAME_MIN_TIME);
        InternalMax maxAgg = r.getAggregations().get(AGG_NAME_MAX_TIME);
        double minValue = minAgg.getValue();
        double maxValue = maxAgg.getValue();
        // If time field not exist or there is no value, will return infinity value
        if (minValue == Double.POSITIVE_INFINITY) {
            internalListener.onFailure(new ResourceNotFoundException(adTask.getDetectorId(), "There is no data in the time field"));
            return;
        }
        long interval = ((IntervalTimeConfiguration) adTask.getDetector().getDetectionInterval()).toDuration().toMillis();
        DetectionDateRange detectionDateRange = adTask.getDetectionDateRange();
        long dataStartTime = detectionDateRange.getStartTime().toEpochMilli();
        long dataEndTime = detectionDateRange.getEndTime().toEpochMilli();
        long minDate = (long) minValue;
        long maxDate = (long) maxValue;
        if (minDate >= dataEndTime || maxDate <= dataStartTime) {
            internalListener.onFailure(new ResourceNotFoundException(adTask.getDetectorId(), "There is no data in the detection date range"));
            return;
        }
        if (minDate > dataStartTime) {
            dataStartTime = minDate;
        }
        if (maxDate < dataEndTime) {
            dataEndTime = maxDate;
        }
        // normalize start/end time to make it consistent with feature data agg result
        dataStartTime = dataStartTime - dataStartTime % interval;
        dataEndTime = dataEndTime - dataEndTime % interval;
        logger.debug("adjusted date range: start: {}, end: {}, taskId: {}", dataStartTime, dataEndTime, taskId);
        if ((dataEndTime - dataStartTime) < NUM_MIN_SAMPLES * interval) {
            internalListener.onFailure(new AnomalyDetectionException("There is not enough data to train model").countedInStats(false));
            return;
        }
        consumer.accept(dataStartTime, dataEndTime);
    }, e -> {
        internalListener.onFailure(e);
    }));
}
Also used : AnomalyResultBulkIndexHandler(org.opensearch.ad.transport.handler.AnomalyResultBulkIndexHandler) ModelManager(org.opensearch.ad.ml.ModelManager) HashRing(org.opensearch.ad.cluster.HashRing) LimitExceededException(org.opensearch.ad.common.exception.LimitExceededException) ADTaskState(org.opensearch.ad.model.ADTaskState) AnomalyDetectorFunction(org.opensearch.ad.rest.handler.AnomalyDetectorFunction) ADIndex(org.opensearch.ad.indices.ADIndex) Map(java.util.Map) ActionListener(org.opensearch.action.ActionListener) BATCH_TASK_PIECE_SIZE(org.opensearch.ad.settings.AnomalyDetectorSettings.BATCH_TASK_PIECE_SIZE) ADStatsNodeResponse(org.opensearch.ad.transport.ADStatsNodeResponse) MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS) Client(org.opensearch.client.Client) AD_BATCH_TASK_THREAD_POOL_NAME(org.opensearch.ad.AnomalyDetectorPlugin.AD_BATCH_TASK_THREAD_POOL_NAME) TimeValue(org.opensearch.common.unit.TimeValue) TransportRequestOptions(org.opensearch.transport.TransportRequestOptions) Settings(org.opensearch.common.settings.Settings) NO_ELIGIBLE_NODE_TO_RUN_DETECTOR(org.opensearch.ad.constant.CommonErrorMessages.NO_ELIGIBLE_NODE_TO_RUN_DETECTOR) TransportService(org.opensearch.transport.TransportService) RandomCutForest(com.amazon.randomcutforest.RandomCutForest) Logger(org.apache.logging.log4j.Logger) PriorityTracker(org.opensearch.ad.caching.PriorityTracker) ExceptionUtil(org.opensearch.ad.util.ExceptionUtil) ActionListenerResponseHandler(org.opensearch.action.ActionListenerResponseHandler) SearchFeatureDao(org.opensearch.ad.feature.SearchFeatureDao) CheckedRunnable(org.opensearch.common.CheckedRunnable) EXECUTION_END_TIME_FIELD(org.opensearch.ad.model.ADTask.EXECUTION_END_TIME_FIELD) InjectSecurity(org.opensearch.commons.InjectSecurity) BoolQueryBuilder(org.opensearch.index.query.BoolQueryBuilder) BATCH_TASK_PIECE_INTERVAL_SECONDS(org.opensearch.ad.settings.AnomalyDetectorSettings.BATCH_TASK_PIECE_INTERVAL_SECONDS) ADCircuitBreakerService(org.opensearch.ad.breaker.ADCircuitBreakerService) MAX_BATCH_TASK_PER_NODE(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_BATCH_TASK_PER_NODE) StringTerms(org.opensearch.search.aggregations.bucket.terms.StringTerms) ThreadPool(org.opensearch.threadpool.ThreadPool) EnabledSetting(org.opensearch.ad.settings.EnabledSetting) AnomalyDetectorSettings(org.opensearch.ad.settings.AnomalyDetectorSettings) ArrayList(java.util.ArrayList) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) BiConsumer(java.util.function.BiConsumer) SearchRequest(org.opensearch.action.search.SearchRequest) INIT_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.INIT_PROGRESS_FIELD) SinglePointFeatures(org.opensearch.ad.feature.SinglePointFeatures) FeatureManager(org.opensearch.ad.feature.FeatureManager) MAX_TOP_ENTITIES_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_TOP_ENTITIES_FOR_HISTORICAL_ANALYSIS) ADStatsNodesAction(org.opensearch.ad.transport.ADStatsNodesAction) AggregationBuilders(org.opensearch.search.aggregations.AggregationBuilders) CommonErrorMessages(org.opensearch.ad.constant.CommonErrorMessages) ClusterService(org.opensearch.cluster.service.ClusterService) StatNames(org.opensearch.ad.stats.StatNames) ParseUtils(org.opensearch.ad.util.ParseUtils) InternalMin(org.opensearch.search.aggregations.metrics.InternalMin) AD_EXECUTING_BATCH_TASK_COUNT(org.opensearch.ad.stats.StatNames.AD_EXECUTING_BATCH_TASK_COUNT) ResourceNotFoundException(org.opensearch.ad.common.exception.ResourceNotFoundException) ADStats(org.opensearch.ad.stats.ADStats) ParseUtils.isNullOrEmpty(org.opensearch.ad.util.ParseUtils.isNullOrEmpty) ADTaskCancelledException(org.opensearch.ad.common.exception.ADTaskCancelledException) ADStatsRequest(org.opensearch.ad.transport.ADStatsRequest) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) DiscoveryNode(org.opensearch.cluster.node.DiscoveryNode) AggregationBuilder(org.opensearch.search.aggregations.AggregationBuilder) InternalMax(org.opensearch.search.aggregations.metrics.InternalMax) TASK_PROGRESS_FIELD(org.opensearch.ad.model.ADTask.TASK_PROGRESS_FIELD) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) ADBatchTaskRemoteExecutionAction(org.opensearch.ad.transport.ADBatchTaskRemoteExecutionAction) ADTaskType(org.opensearch.ad.model.ADTaskType) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) List(java.util.List) AnomalyResult(org.opensearch.ad.model.AnomalyResult) SearchSourceBuilder(org.opensearch.search.builder.SearchSourceBuilder) DEFAULT_JVM_HEAP_USAGE_THRESHOLD(org.opensearch.ad.breaker.MemoryCircuitBreaker.DEFAULT_JVM_HEAP_USAGE_THRESHOLD) DetectionDateRange(org.opensearch.ad.model.DetectionDateRange) NUM_MIN_SAMPLES(org.opensearch.ad.settings.AnomalyDetectorSettings.NUM_MIN_SAMPLES) Optional(java.util.Optional) AnomalyDescriptor(com.amazon.randomcutforest.parkservices.AnomalyDescriptor) ADTask(org.opensearch.ad.model.ADTask) FeatureData(org.opensearch.ad.model.FeatureData) HashMap(java.util.HashMap) Deque(java.util.Deque) ThreadedActionListener(org.opensearch.action.support.ThreadedActionListener) CURRENT_PIECE_FIELD(org.opensearch.ad.model.ADTask.CURRENT_PIECE_FIELD) ImmutableList(com.google.common.collect.ImmutableList) ADBatchAnomalyResultResponse(org.opensearch.ad.transport.ADBatchAnomalyResultResponse) JVM_HEAP_USAGE(org.opensearch.ad.stats.InternalStatNames.JVM_HEAP_USAGE) EndRunException(org.opensearch.ad.common.exception.EndRunException) ADBatchAnomalyResultRequest(org.opensearch.ad.transport.ADBatchAnomalyResultRequest) AGG_NAME_MAX_TIME(org.opensearch.ad.constant.CommonName.AGG_NAME_MAX_TIME) RangeQueryBuilder(org.opensearch.index.query.RangeQueryBuilder) MAX_TOP_ENTITIES_LIMIT_FOR_HISTORICAL_ANALYSIS(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_TOP_ENTITIES_LIMIT_FOR_HISTORICAL_ANALYSIS) TermQueryBuilder(org.opensearch.index.query.TermQueryBuilder) AnomalyDetectionIndices(org.opensearch.ad.indices.AnomalyDetectionIndices) STATE_FIELD(org.opensearch.ad.model.ADTask.STATE_FIELD) WORKER_NODE_FIELD(org.opensearch.ad.model.ADTask.WORKER_NODE_FIELD) Entity(org.opensearch.ad.model.Entity) TermsAggregationBuilder(org.opensearch.search.aggregations.bucket.terms.TermsAggregationBuilder) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) Clock(java.time.Clock) IntervalTimeConfiguration(org.opensearch.ad.model.IntervalTimeConfiguration) LogManager(org.apache.logging.log4j.LogManager) AGG_NAME_MIN_TIME(org.opensearch.ad.constant.CommonName.AGG_NAME_MIN_TIME) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) SearchRequest(org.opensearch.action.search.SearchRequest) InternalMax(org.opensearch.search.aggregations.metrics.InternalMax) BoolQueryBuilder(org.opensearch.index.query.BoolQueryBuilder) InternalMin(org.opensearch.search.aggregations.metrics.InternalMin) IntervalTimeConfiguration(org.opensearch.ad.model.IntervalTimeConfiguration) TermQueryBuilder(org.opensearch.index.query.TermQueryBuilder) ResourceNotFoundException(org.opensearch.ad.common.exception.ResourceNotFoundException) DetectionDateRange(org.opensearch.ad.model.DetectionDateRange) SearchSourceBuilder(org.opensearch.search.builder.SearchSourceBuilder)

Aggregations

RandomCutForest (com.amazon.randomcutforest.RandomCutForest)1 AnomalyDescriptor (com.amazon.randomcutforest.parkservices.AnomalyDescriptor)1 ThresholdedRandomCutForest (com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Clock (java.time.Clock)1 Instant (java.time.Instant)1 ArrayList (java.util.ArrayList)1 Deque (java.util.Deque)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Objects (java.util.Objects)1 Optional (java.util.Optional)1 BiConsumer (java.util.function.BiConsumer)1 Collectors (java.util.stream.Collectors)1 LogManager (org.apache.logging.log4j.LogManager)1 Logger (org.apache.logging.log4j.Logger)1 ActionListener (org.opensearch.action.ActionListener)1