use of org.opensearch.ad.common.exception.ClientException in project anomaly-detection by opensearch-project.
the class AnomalyResultTransportAction method doExecute.
/**
* All the exceptions thrown by AD is a subclass of AnomalyDetectionException.
* ClientException is a subclass of AnomalyDetectionException. All exception visible to
* Client is under ClientVisible. Two classes directly extends ClientException:
* - InternalFailure for "root cause unknown failure. Maybe transient." We can continue the
* detector running.
* - EndRunException for "failures that might impact the customer." The method endNow() is
* added to indicate whether the client should immediately terminate running a detector.
* + endNow() returns true for "unrecoverable issue". We want to terminate the detector run
* immediately.
* + endNow() returns false for "maybe unrecoverable issue but worth retrying a few more
* times." We want to wait for a few more times on different requests before terminating
* the detector run.
*
* AD may not be able to get an anomaly grade but can find a feature vector. Consider the
* case when the shingle is not ready. In that case, AD just put NaN as anomaly grade and
* return the feature vector. If AD cannot even find a feature vector, AD throws
* EndRunException if there is an issue or returns empty response (all the numeric fields
* are Double.NaN and feature array is empty. Do so so that customer can write painless
* script.) otherwise.
*
* Known causes of EndRunException with endNow returning false:
* + training data for cold start not available
* + cold start cannot succeed
* + unknown prediction error
* + memory circuit breaker tripped
* + invalid search query
*
* Known causes of EndRunException with endNow returning true:
* + a model partition's memory size reached limit
* + models' total memory size reached limit
* + Having trouble querying feature data due to
* * index does not exist
* * all features have been disabled
*
* + anomaly detector is not available
* + AD plugin is disabled
* + training data is invalid due to serious internal bug(s)
*
* Known causes of InternalFailure:
* + threshold model node is not available
* + cluster read/write is blocked
* + cold start hasn't been finished
* + fail to get all of rcf model nodes' responses
* + fail to get threshold model node's response
* + RCF/Threshold model node failing to get checkpoint to restore model before timeout
* + Detection is throttle because previous detection query is running
*/
@Override
protected void doExecute(Task task, ActionRequest actionRequest, ActionListener<AnomalyResultResponse> listener) {
try (ThreadContext.StoredContext context = client.threadPool().getThreadContext().stashContext()) {
AnomalyResultRequest request = AnomalyResultRequest.fromActionRequest(actionRequest);
String adID = request.getAdID();
ActionListener<AnomalyResultResponse> original = listener;
listener = ActionListener.wrap(r -> {
hcDetectors.remove(adID);
original.onResponse(r);
}, e -> {
// we will not count it in failure stats.
if (!(e instanceof AnomalyDetectionException) || ((AnomalyDetectionException) e).isCountedInStats()) {
adStats.getStat(StatNames.AD_EXECUTE_FAIL_COUNT.getName()).increment();
if (hcDetectors.contains(adID)) {
adStats.getStat(StatNames.AD_HC_EXECUTE_FAIL_COUNT.getName()).increment();
}
}
hcDetectors.remove(adID);
original.onFailure(e);
});
if (!EnabledSetting.isADPluginEnabled()) {
throw new EndRunException(adID, CommonErrorMessages.DISABLED_ERR_MSG, true).countedInStats(false);
}
adStats.getStat(StatNames.AD_EXECUTE_REQUEST_COUNT.getName()).increment();
if (adCircuitBreakerService.isOpen()) {
listener.onFailure(new LimitExceededException(adID, CommonErrorMessages.MEMORY_CIRCUIT_BROKEN_ERR_MSG, false));
return;
}
try {
stateManager.getAnomalyDetector(adID, onGetDetector(listener, adID, request));
} catch (Exception ex) {
handleExecuteException(ex, listener, adID);
}
} catch (Exception e) {
LOG.error(e);
listener.onFailure(e);
}
}
use of org.opensearch.ad.common.exception.ClientException in project anomaly-detection by opensearch-project.
the class PreviewAnomalyDetectorTransportAction method previewExecute.
void previewExecute(PreviewAnomalyDetectorRequest request, ThreadContext.StoredContext context, ActionListener<PreviewAnomalyDetectorResponse> listener) {
if (adCircuitBreakerService.isOpen()) {
listener.onFailure(new LimitExceededException(request.getDetectorId(), CommonErrorMessages.MEMORY_CIRCUIT_BROKEN_ERR_MSG, false));
return;
}
try {
if (!lock.tryAcquire()) {
listener.onFailure(new ClientException(request.getDetectorId(), CommonErrorMessages.REQUEST_THROTTLED_MSG));
return;
}
try {
AnomalyDetector detector = request.getDetector();
String detectorId = request.getDetectorId();
Instant startTime = request.getStartTime();
Instant endTime = request.getEndTime();
ActionListener<PreviewAnomalyDetectorResponse> releaseListener = ActionListener.runAfter(listener, () -> lock.release());
if (detector != null) {
String error = validateDetector(detector);
if (StringUtils.isNotBlank(error)) {
listener.onFailure(new OpenSearchStatusException(error, RestStatus.BAD_REQUEST));
lock.release();
return;
}
anomalyDetectorRunner.executeDetector(detector, startTime, endTime, context, getPreviewDetectorActionListener(releaseListener, detector));
} else {
previewAnomalyDetector(releaseListener, detectorId, detector, startTime, endTime, context);
}
} catch (Exception e) {
logger.error("Fail to preview", e);
lock.release();
}
} catch (Exception e) {
logger.error(e);
listener.onFailure(e);
}
}
Aggregations