Search in sources :

Example 1 with EndRunException

use of org.opensearch.ad.common.exception.EndRunException in project anomaly-detection by opensearch-project.

the class EntityColdStarter method getEntityColdStartData.

/**
 * Get training data for an entity.
 *
 * We first note the maximum and minimum timestamp, and sample at most 24 points
 * (with 60 points apart between two neighboring samples) between those minimum
 * and maximum timestamps.  Samples can be missing.  We only interpolate points
 * between present neighboring samples. We then transform samples and interpolate
 * points to shingles. Finally, full shingles will be used for cold start.
 *
 * @param detectorId detector Id
 * @param entity the entity's information
 * @param listener listener to return training data
 */
private void getEntityColdStartData(String detectorId, Entity entity, ActionListener<Optional<List<double[][]>>> listener) {
    ActionListener<Optional<AnomalyDetector>> getDetectorListener = ActionListener.wrap(detectorOp -> {
        if (!detectorOp.isPresent()) {
            listener.onFailure(new EndRunException(detectorId, "AnomalyDetector is not available.", false));
            return;
        }
        List<double[][]> coldStartData = new ArrayList<>();
        AnomalyDetector detector = detectorOp.get();
        ActionListener<Optional<Long>> minTimeListener = ActionListener.wrap(earliest -> {
            if (earliest.isPresent()) {
                long startTimeMs = earliest.get().longValue();
                nodeStateManager.getAnomalyDetectorJob(detectorId, ActionListener.wrap(jobOp -> {
                    if (!jobOp.isPresent()) {
                        listener.onFailure(new EndRunException(detectorId, "AnomalyDetector job is not available.", false));
                        return;
                    }
                    AnomalyDetectorJob job = jobOp.get();
                    // End time uses milliseconds as start time is assumed to be in milliseconds.
                    // Opensearch uses a set of preconfigured formats to recognize and parse these strings into a long value
                    // representing milliseconds-since-the-epoch in UTC.
                    // More on https://tinyurl.com/wub4fk92
                    // Existing samples either predates or coincide with cold start data. In either case,
                    // combining them without reordering based on time stamps is not ok. We might introduce
                    // anomalies in the process.
                    // An ideal solution would be to record time stamps of data points and combine existing
                    // samples and cold start samples and do interpolation afterwards. Recording time stamps
                    // requires changes across the board like bwc in checkpoints. A pragmatic solution is to use
                    // job enabled time as the end time of cold start period as it is easier to combine
                    // existing samples with cold start data. We just need to appends existing samples after
                    // cold start data as existing samples all happen after job enabled time. There might
                    // be some gaps in between the last cold start sample and the first accumulated sample.
                    // We will need to accept that precision loss in current solution.
                    long endTimeMs = job.getEnabledTime().toEpochMilli();
                    Pair<Integer, Integer> params = selectRangeParam(detector);
                    int stride = params.getLeft();
                    int numberOfSamples = params.getRight();
                    // we start with round 0
                    getFeatures(listener, 0, coldStartData, detector, entity, stride, numberOfSamples, startTimeMs, endTimeMs);
                }, listener::onFailure));
            } else {
                listener.onResponse(Optional.empty());
            }
        }, listener::onFailure);
        searchFeatureDao.getEntityMinDataTime(detector, entity, new ThreadedActionListener<>(logger, threadPool, AnomalyDetectorPlugin.AD_THREAD_POOL_NAME, minTimeListener, false));
    }, listener::onFailure);
    nodeStateManager.getAnomalyDetector(detectorId, new ThreadedActionListener<>(logger, threadPool, AnomalyDetectorPlugin.AD_THREAD_POOL_NAME, getDetectorListener, false));
}
Also used : Arrays(java.util.Arrays) Precision(com.amazon.randomcutforest.config.Precision) RequestPriority(org.opensearch.ad.ratelimit.RequestPriority) ThreadPool(org.opensearch.threadpool.ThreadPool) SimpleImmutableEntry(java.util.AbstractMap.SimpleImmutableEntry) ThreadedActionListener(org.opensearch.action.support.ThreadedActionListener) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) AnomalyDetectorSettings(org.opensearch.ad.settings.AnomalyDetectorSettings) ArrayList(java.util.ArrayList) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) Throwables(org.apache.logging.log4j.core.util.Throwables) DoorKeeper(org.opensearch.ad.caching.DoorKeeper) Pair(org.apache.commons.lang3.tuple.Pair) Duration(java.time.Duration) Map(java.util.Map) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) ActionListener(org.opensearch.action.ActionListener) EndRunException(org.opensearch.ad.common.exception.EndRunException) Interpolator(org.opensearch.ad.dataprocessor.Interpolator) COOLDOWN_MINUTES(org.opensearch.ad.settings.AnomalyDetectorSettings.COOLDOWN_MINUTES) FeatureManager(org.opensearch.ad.feature.FeatureManager) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Settings(org.opensearch.common.settings.Settings) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) MaintenanceState(org.opensearch.ad.MaintenanceState) List(java.util.List) Stream(java.util.stream.Stream) Logger(org.apache.logging.log4j.Logger) ExceptionUtil(org.opensearch.ad.util.ExceptionUtil) Entity(org.opensearch.ad.model.Entity) NodeStateManager(org.opensearch.ad.NodeStateManager) AnomalyDetectorJob(org.opensearch.ad.model.AnomalyDetectorJob) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) Entry(java.util.Map.Entry) SearchFeatureDao(org.opensearch.ad.feature.SearchFeatureDao) Clock(java.time.Clock) Optional(java.util.Optional) AnomalyDetectorPlugin(org.opensearch.ad.AnomalyDetectorPlugin) Queue(java.util.Queue) ArrayDeque(java.util.ArrayDeque) IntervalTimeConfiguration(org.opensearch.ad.model.IntervalTimeConfiguration) LogManager(org.apache.logging.log4j.LogManager) CheckpointWriteWorker(org.opensearch.ad.ratelimit.CheckpointWriteWorker) EndRunException(org.opensearch.ad.common.exception.EndRunException) Optional(java.util.Optional) ArrayList(java.util.ArrayList) AnomalyDetectorJob(org.opensearch.ad.model.AnomalyDetectorJob) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) Pair(org.apache.commons.lang3.tuple.Pair)

Example 2 with EndRunException

use of org.opensearch.ad.common.exception.EndRunException in project anomaly-detection by opensearch-project.

the class NodeStateManager method setException.

/**
 * For single-stream detector, we have one exception per interval.  When
 * an interval starts, it fetches and clears the exception.
 * For HCAD, there can be one exception per entity.  To not bloat memory
 * with exceptions, we will keep only one exception. An exception has 3 purposes:
 * 1) stop detector if nothing else works;
 * 2) increment error stats to ticket about high-error domain
 * 3) debugging.
 *
 * For HCAD, we record all entities' exceptions in anomaly results. So 3)
 * is covered.  As long as we keep one exception among all exceptions, 2)
 * is covered.  So the only thing we have to pay attention is to keep EndRunException.
 * When overriding an exception, EndRunException has priority.
 * @param detectorId Detector Id
 * @param e Exception to set
 */
public void setException(String detectorId, Exception e) {
    if (e == null || Strings.isEmpty(detectorId)) {
        return;
    }
    NodeState state = states.computeIfAbsent(detectorId, d -> new NodeState(detectorId, clock));
    Optional<Exception> exception = state.getException();
    if (exception.isPresent()) {
        Exception higherPriorityException = ExceptionUtil.selectHigherPriorityException(e, exception.get());
        if (higherPriorityException != e) {
            return;
        }
    }
    state.setException(e);
}
Also used : EndRunException(org.opensearch.ad.common.exception.EndRunException)

Example 3 with EndRunException

use of org.opensearch.ad.common.exception.EndRunException in project anomaly-detection by opensearch-project.

the class AbstractRetriever method parseAggregation.

protected double parseAggregation(Aggregation aggregationToParse) {
    Double result = null;
    if (aggregationToParse instanceof InternalSingleBucketAggregation) {
        InternalAggregations bucket = ((InternalSingleBucketAggregation) aggregationToParse).getAggregations();
        if (bucket != null) {
            List<Aggregation> aggrs = bucket.asList();
            if (aggrs.size() == 1) {
                // we only accept a single value as feature
                aggregationToParse = aggrs.get(0);
            }
        }
    }
    final Aggregation aggregation = aggregationToParse;
    if (aggregation instanceof SingleValue) {
        result = ((SingleValue) aggregation).value();
    } else if (aggregation instanceof InternalTDigestPercentiles) {
        Iterator<Percentile> percentile = ((InternalTDigestPercentiles) aggregation).iterator();
        if (percentile.hasNext()) {
            result = percentile.next().getValue();
        }
    }
    return Optional.ofNullable(result).orElseThrow(() -> new EndRunException("Failed to parse aggregation " + aggregation, true).countedInStats(false));
}
Also used : Aggregation(org.opensearch.search.aggregations.Aggregation) InternalSingleBucketAggregation(org.opensearch.search.aggregations.bucket.InternalSingleBucketAggregation) MultiBucketsAggregation(org.opensearch.search.aggregations.bucket.MultiBucketsAggregation) InternalAggregations(org.opensearch.search.aggregations.InternalAggregations) SingleValue(org.opensearch.search.aggregations.metrics.NumericMetricsAggregation.SingleValue) EndRunException(org.opensearch.ad.common.exception.EndRunException) InternalTDigestPercentiles(org.opensearch.search.aggregations.metrics.InternalTDigestPercentiles) Iterator(java.util.Iterator) InternalSingleBucketAggregation(org.opensearch.search.aggregations.bucket.InternalSingleBucketAggregation)

Example 4 with EndRunException

use of org.opensearch.ad.common.exception.EndRunException in project anomaly-detection by opensearch-project.

the class NodeStateManager method onGetDetectorResponse.

private ActionListener<GetResponse> onGetDetectorResponse(String adID, ActionListener<Optional<AnomalyDetector>> listener) {
    return ActionListener.wrap(response -> {
        if (response == null || !response.isExists()) {
            listener.onResponse(Optional.empty());
            return;
        }
        String xc = response.getSourceAsString();
        LOG.debug("Fetched anomaly detector: {}", xc);
        try (XContentParser parser = XContentType.JSON.xContent().createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, xc)) {
            ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser);
            AnomalyDetector detector = AnomalyDetector.parse(parser, response.getId());
            // end execution if all features are disabled
            if (detector.getEnabledFeatureIds().isEmpty()) {
                listener.onFailure(new EndRunException(adID, CommonErrorMessages.ALL_FEATURES_DISABLED_ERR_MSG, true).countedInStats(false));
                return;
            }
            NodeState state = states.computeIfAbsent(adID, id -> new NodeState(id, clock));
            state.setDetectorDef(detector);
            listener.onResponse(Optional.of(detector));
        } catch (Exception t) {
            LOG.error("Fail to parse detector {}", adID);
            LOG.error("Stack trace:", t);
            listener.onResponse(Optional.empty());
        }
    }, listener::onFailure);
}
Also used : EndRunException(org.opensearch.ad.common.exception.EndRunException) XContentParser(org.opensearch.common.xcontent.XContentParser) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) EndRunException(org.opensearch.ad.common.exception.EndRunException)

Example 5 with EndRunException

use of org.opensearch.ad.common.exception.EndRunException in project anomaly-detection by opensearch-project.

the class AnomalyDetectorJobRunner method indexAnomalyResult.

private void indexAnomalyResult(AnomalyDetectorJob jobParameter, LockService lockService, LockModel lock, Instant detectionStartTime, Instant executionStartTime, AnomalyResultResponse response) {
    String detectorId = jobParameter.getName();
    detectorEndRunExceptionCount.remove(detectorId);
    try {
        // We return 0 or Double.NaN rcf score if there is no error.
        if ((response.getAnomalyScore() <= 0 || Double.isNaN(response.getAnomalyScore())) && response.getError() == null) {
            updateRealtimeTask(response, detectorId);
            return;
        }
        IntervalTimeConfiguration windowDelay = (IntervalTimeConfiguration) jobParameter.getWindowDelay();
        Instant dataStartTime = detectionStartTime.minus(windowDelay.getInterval(), windowDelay.getUnit());
        Instant dataEndTime = executionStartTime.minus(windowDelay.getInterval(), windowDelay.getUnit());
        User user = jobParameter.getUser();
        if (response.getError() != null) {
            log.info("Anomaly result action run successfully for {} with error {}", detectorId, response.getError());
        }
        AnomalyResult anomalyResult = response.toAnomalyResult(detectorId, dataStartTime, dataEndTime, executionStartTime, Instant.now(), anomalyDetectionIndices.getSchemaVersion(ADIndex.RESULT), user, response.getError());
        String resultIndex = jobParameter.getResultIndex();
        anomalyResultHandler.index(anomalyResult, detectorId, resultIndex);
        updateRealtimeTask(response, detectorId);
    } catch (EndRunException e) {
        handleAdException(jobParameter, lockService, lock, detectionStartTime, executionStartTime, e);
    } catch (Exception e) {
        log.error("Failed to index anomaly result for " + detectorId, e);
    } finally {
        releaseLock(jobParameter, lockService, lock);
    }
}
Also used : User(org.opensearch.commons.authuser.User) EndRunException(org.opensearch.ad.common.exception.EndRunException) Instant(java.time.Instant) IntervalTimeConfiguration(org.opensearch.ad.model.IntervalTimeConfiguration) AnomalyResult(org.opensearch.ad.model.AnomalyResult) ResourceNotFoundException(org.opensearch.ad.common.exception.ResourceNotFoundException) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) EndRunException(org.opensearch.ad.common.exception.EndRunException) IOException(java.io.IOException)

Aggregations

EndRunException (org.opensearch.ad.common.exception.EndRunException)32 IndexNotFoundException (org.opensearch.index.IndexNotFoundException)12 OpenSearchTimeoutException (org.opensearch.OpenSearchTimeoutException)10 PlainActionFuture (org.opensearch.action.support.PlainActionFuture)10 LimitExceededException (org.opensearch.ad.common.exception.LimitExceededException)10 ActionListener (org.opensearch.action.ActionListener)9 SearchPhaseExecutionException (org.opensearch.action.search.SearchPhaseExecutionException)9 AnomalyDetectionException (org.opensearch.ad.common.exception.AnomalyDetectionException)9 AnomalyDetector (org.opensearch.ad.model.AnomalyDetector)9 IOException (java.io.IOException)8 NoSuchElementException (java.util.NoSuchElementException)8 Optional (java.util.Optional)8 ActionFilters (org.opensearch.action.support.ActionFilters)7 NodeStateManager (org.opensearch.ad.NodeStateManager)7 ResourceNotFoundException (org.opensearch.ad.common.exception.ResourceNotFoundException)7 ArrayList (java.util.ArrayList)6 ParameterizedMessage (org.apache.logging.log4j.message.ParameterizedMessage)6 InternalFailure (org.opensearch.ad.common.exception.InternalFailure)6 ConnectException (java.net.ConnectException)5 List (java.util.List)5