use of org.opensearch.ad.common.exception.EndRunException in project anomaly-detection by opensearch-project.
the class EntityColdStarter method getEntityColdStartData.
/**
* Get training data for an entity.
*
* We first note the maximum and minimum timestamp, and sample at most 24 points
* (with 60 points apart between two neighboring samples) between those minimum
* and maximum timestamps. Samples can be missing. We only interpolate points
* between present neighboring samples. We then transform samples and interpolate
* points to shingles. Finally, full shingles will be used for cold start.
*
* @param detectorId detector Id
* @param entity the entity's information
* @param listener listener to return training data
*/
private void getEntityColdStartData(String detectorId, Entity entity, ActionListener<Optional<List<double[][]>>> listener) {
ActionListener<Optional<AnomalyDetector>> getDetectorListener = ActionListener.wrap(detectorOp -> {
if (!detectorOp.isPresent()) {
listener.onFailure(new EndRunException(detectorId, "AnomalyDetector is not available.", false));
return;
}
List<double[][]> coldStartData = new ArrayList<>();
AnomalyDetector detector = detectorOp.get();
ActionListener<Optional<Long>> minTimeListener = ActionListener.wrap(earliest -> {
if (earliest.isPresent()) {
long startTimeMs = earliest.get().longValue();
nodeStateManager.getAnomalyDetectorJob(detectorId, ActionListener.wrap(jobOp -> {
if (!jobOp.isPresent()) {
listener.onFailure(new EndRunException(detectorId, "AnomalyDetector job is not available.", false));
return;
}
AnomalyDetectorJob job = jobOp.get();
// End time uses milliseconds as start time is assumed to be in milliseconds.
// Opensearch uses a set of preconfigured formats to recognize and parse these strings into a long value
// representing milliseconds-since-the-epoch in UTC.
// More on https://tinyurl.com/wub4fk92
// Existing samples either predates or coincide with cold start data. In either case,
// combining them without reordering based on time stamps is not ok. We might introduce
// anomalies in the process.
// An ideal solution would be to record time stamps of data points and combine existing
// samples and cold start samples and do interpolation afterwards. Recording time stamps
// requires changes across the board like bwc in checkpoints. A pragmatic solution is to use
// job enabled time as the end time of cold start period as it is easier to combine
// existing samples with cold start data. We just need to appends existing samples after
// cold start data as existing samples all happen after job enabled time. There might
// be some gaps in between the last cold start sample and the first accumulated sample.
// We will need to accept that precision loss in current solution.
long endTimeMs = job.getEnabledTime().toEpochMilli();
Pair<Integer, Integer> params = selectRangeParam(detector);
int stride = params.getLeft();
int numberOfSamples = params.getRight();
// we start with round 0
getFeatures(listener, 0, coldStartData, detector, entity, stride, numberOfSamples, startTimeMs, endTimeMs);
}, listener::onFailure));
} else {
listener.onResponse(Optional.empty());
}
}, listener::onFailure);
searchFeatureDao.getEntityMinDataTime(detector, entity, new ThreadedActionListener<>(logger, threadPool, AnomalyDetectorPlugin.AD_THREAD_POOL_NAME, minTimeListener, false));
}, listener::onFailure);
nodeStateManager.getAnomalyDetector(detectorId, new ThreadedActionListener<>(logger, threadPool, AnomalyDetectorPlugin.AD_THREAD_POOL_NAME, getDetectorListener, false));
}
use of org.opensearch.ad.common.exception.EndRunException in project anomaly-detection by opensearch-project.
the class NodeStateManager method setException.
/**
* For single-stream detector, we have one exception per interval. When
* an interval starts, it fetches and clears the exception.
* For HCAD, there can be one exception per entity. To not bloat memory
* with exceptions, we will keep only one exception. An exception has 3 purposes:
* 1) stop detector if nothing else works;
* 2) increment error stats to ticket about high-error domain
* 3) debugging.
*
* For HCAD, we record all entities' exceptions in anomaly results. So 3)
* is covered. As long as we keep one exception among all exceptions, 2)
* is covered. So the only thing we have to pay attention is to keep EndRunException.
* When overriding an exception, EndRunException has priority.
* @param detectorId Detector Id
* @param e Exception to set
*/
public void setException(String detectorId, Exception e) {
if (e == null || Strings.isEmpty(detectorId)) {
return;
}
NodeState state = states.computeIfAbsent(detectorId, d -> new NodeState(detectorId, clock));
Optional<Exception> exception = state.getException();
if (exception.isPresent()) {
Exception higherPriorityException = ExceptionUtil.selectHigherPriorityException(e, exception.get());
if (higherPriorityException != e) {
return;
}
}
state.setException(e);
}
use of org.opensearch.ad.common.exception.EndRunException in project anomaly-detection by opensearch-project.
the class AbstractRetriever method parseAggregation.
protected double parseAggregation(Aggregation aggregationToParse) {
Double result = null;
if (aggregationToParse instanceof InternalSingleBucketAggregation) {
InternalAggregations bucket = ((InternalSingleBucketAggregation) aggregationToParse).getAggregations();
if (bucket != null) {
List<Aggregation> aggrs = bucket.asList();
if (aggrs.size() == 1) {
// we only accept a single value as feature
aggregationToParse = aggrs.get(0);
}
}
}
final Aggregation aggregation = aggregationToParse;
if (aggregation instanceof SingleValue) {
result = ((SingleValue) aggregation).value();
} else if (aggregation instanceof InternalTDigestPercentiles) {
Iterator<Percentile> percentile = ((InternalTDigestPercentiles) aggregation).iterator();
if (percentile.hasNext()) {
result = percentile.next().getValue();
}
}
return Optional.ofNullable(result).orElseThrow(() -> new EndRunException("Failed to parse aggregation " + aggregation, true).countedInStats(false));
}
use of org.opensearch.ad.common.exception.EndRunException in project anomaly-detection by opensearch-project.
the class NodeStateManager method onGetDetectorResponse.
private ActionListener<GetResponse> onGetDetectorResponse(String adID, ActionListener<Optional<AnomalyDetector>> listener) {
return ActionListener.wrap(response -> {
if (response == null || !response.isExists()) {
listener.onResponse(Optional.empty());
return;
}
String xc = response.getSourceAsString();
LOG.debug("Fetched anomaly detector: {}", xc);
try (XContentParser parser = XContentType.JSON.xContent().createParser(xContentRegistry, LoggingDeprecationHandler.INSTANCE, xc)) {
ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.nextToken(), parser);
AnomalyDetector detector = AnomalyDetector.parse(parser, response.getId());
// end execution if all features are disabled
if (detector.getEnabledFeatureIds().isEmpty()) {
listener.onFailure(new EndRunException(adID, CommonErrorMessages.ALL_FEATURES_DISABLED_ERR_MSG, true).countedInStats(false));
return;
}
NodeState state = states.computeIfAbsent(adID, id -> new NodeState(id, clock));
state.setDetectorDef(detector);
listener.onResponse(Optional.of(detector));
} catch (Exception t) {
LOG.error("Fail to parse detector {}", adID);
LOG.error("Stack trace:", t);
listener.onResponse(Optional.empty());
}
}, listener::onFailure);
}
use of org.opensearch.ad.common.exception.EndRunException in project anomaly-detection by opensearch-project.
the class AnomalyDetectorJobRunner method indexAnomalyResult.
private void indexAnomalyResult(AnomalyDetectorJob jobParameter, LockService lockService, LockModel lock, Instant detectionStartTime, Instant executionStartTime, AnomalyResultResponse response) {
String detectorId = jobParameter.getName();
detectorEndRunExceptionCount.remove(detectorId);
try {
// We return 0 or Double.NaN rcf score if there is no error.
if ((response.getAnomalyScore() <= 0 || Double.isNaN(response.getAnomalyScore())) && response.getError() == null) {
updateRealtimeTask(response, detectorId);
return;
}
IntervalTimeConfiguration windowDelay = (IntervalTimeConfiguration) jobParameter.getWindowDelay();
Instant dataStartTime = detectionStartTime.minus(windowDelay.getInterval(), windowDelay.getUnit());
Instant dataEndTime = executionStartTime.minus(windowDelay.getInterval(), windowDelay.getUnit());
User user = jobParameter.getUser();
if (response.getError() != null) {
log.info("Anomaly result action run successfully for {} with error {}", detectorId, response.getError());
}
AnomalyResult anomalyResult = response.toAnomalyResult(detectorId, dataStartTime, dataEndTime, executionStartTime, Instant.now(), anomalyDetectionIndices.getSchemaVersion(ADIndex.RESULT), user, response.getError());
String resultIndex = jobParameter.getResultIndex();
anomalyResultHandler.index(anomalyResult, detectorId, resultIndex);
updateRealtimeTask(response, detectorId);
} catch (EndRunException e) {
handleAdException(jobParameter, lockService, lock, detectionStartTime, executionStartTime, e);
} catch (Exception e) {
log.error("Failed to index anomaly result for " + detectorId, e);
} finally {
releaseLock(jobParameter, lockService, lock);
}
}
Aggregations