use of org.opensearch.ad.common.exception.ResourceNotFoundException in project anomaly-detection by opensearch-project.
the class ADDataMigrator method checkIfRealtimeTaskExistsAndBackfill.
private void checkIfRealtimeTaskExistsAndBackfill(AnomalyDetectorJob job, AnomalyDetectorFunction createRealtimeTaskFunction, ConcurrentLinkedQueue<AnomalyDetectorJob> detectorJobs, boolean migrateAll) {
String jobId = job.getName();
BoolQueryBuilder query = new BoolQueryBuilder();
query.filter(new TermQueryBuilder(DETECTOR_ID_FIELD, jobId));
if (job.isEnabled()) {
query.filter(new TermQueryBuilder(IS_LATEST_FIELD, true));
}
query.filter(new TermsQueryBuilder(TASK_TYPE_FIELD, taskTypeToString(ADTaskType.REALTIME_TASK_TYPES)));
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder().query(query).size(1);
SearchRequest searchRequest = new SearchRequest(DETECTION_STATE_INDEX).source(searchSourceBuilder);
client.search(searchRequest, ActionListener.wrap(r -> {
if (r != null && r.getHits().getTotalHits().value > 0) {
// Backfill next realtime job
backfillRealtimeTask(detectorJobs, migrateAll);
return;
}
createRealtimeTaskFunction.execute();
}, e -> {
if (e instanceof ResourceNotFoundException) {
createRealtimeTaskFunction.execute();
}
logger.error("Failed to search tasks of detector " + jobId);
}));
}
use of org.opensearch.ad.common.exception.ResourceNotFoundException in project anomaly-detection by opensearch-project.
the class AnomalyResultTransportAction method coldStartIfNoModel.
/**
* Verify failure of rcf or threshold models. If there is no model, trigger cold
* start. If there is an exception for the previous cold start of this detector,
* throw exception to the caller.
*
* @param failure object that may contain exceptions thrown
* @param detector detector object
* @return exception if AD job execution gets resource not found exception
* @throws Exception when the input failure is not a ResourceNotFoundException.
* List of exceptions we can throw
* 1. Exception from cold start:
* 1). InternalFailure due to
* a. OpenSearchTimeoutException thrown by putModelCheckpoint during cold start
* 2). EndRunException with endNow equal to false
* a. training data not available
* b. cold start cannot succeed
* c. invalid training data
* 3) EndRunException with endNow equal to true
* a. invalid search query
* 2. LimitExceededException from one of RCF model node when the total size of the models
* is more than X% of heap memory.
* 3. InternalFailure wrapping OpenSearchTimeoutException inside caused by
* RCF/Threshold model node failing to get checkpoint to restore model before timeout.
*/
private Exception coldStartIfNoModel(AtomicReference<Exception> failure, AnomalyDetector detector) throws Exception {
Exception exp = failure.get();
if (exp == null) {
return null;
}
// return exceptions like LimitExceededException to caller
if (!(exp instanceof ResourceNotFoundException)) {
return exp;
}
// fetch previous cold start exception
String adID = detector.getDetectorId();
final Optional<Exception> previousException = stateManager.fetchExceptionAndClear(adID);
if (previousException.isPresent()) {
Exception exception = previousException.get();
LOG.error("Previous exception of {}: {}", () -> adID, () -> exception);
if (exception instanceof EndRunException && ((EndRunException) exception).isEndNow()) {
return exception;
}
}
LOG.info("Trigger cold start for {}", detector.getDetectorId());
coldStart(detector);
return previousException.orElse(new InternalFailure(adID, NO_MODEL_ERR_MSG));
}
use of org.opensearch.ad.common.exception.ResourceNotFoundException in project anomaly-detection by opensearch-project.
the class AnomalyDetectorProfileRunnerTests method setUpClientExecuteRCFPollingAction.
@SuppressWarnings("unchecked")
private void setUpClientExecuteRCFPollingAction(RCFPollingStatus inittedEverResultStatus) {
doAnswer(invocation -> {
Object[] args = invocation.getArguments();
ActionListener<RCFPollingResponse> listener = (ActionListener<RCFPollingResponse>) args[2];
Exception cause = null;
String detectorId = "123";
if (inittedEverResultStatus == RCFPollingStatus.INIT_NOT_EXIT || inittedEverResultStatus == RCFPollingStatus.REMOTE_INIT_NOT_EXIT || inittedEverResultStatus == RCFPollingStatus.INDEX_NOT_FOUND || inittedEverResultStatus == RCFPollingStatus.REMOTE_INDEX_NOT_FOUND) {
switch(inittedEverResultStatus) {
case INIT_NOT_EXIT:
case REMOTE_INIT_NOT_EXIT:
cause = new ResourceNotFoundException(detectorId, messaingExceptionError);
break;
case INDEX_NOT_FOUND:
case REMOTE_INDEX_NOT_FOUND:
cause = new IndexNotFoundException(detectorId, CommonName.CHECKPOINT_INDEX_NAME);
break;
default:
assertTrue("should not reach here", false);
break;
}
cause = new AnomalyDetectionException(detectorId, cause);
if (inittedEverResultStatus == RCFPollingStatus.REMOTE_INIT_NOT_EXIT || inittedEverResultStatus == RCFPollingStatus.REMOTE_INDEX_NOT_FOUND) {
cause = new RemoteTransportException(RCFPollingAction.NAME, new NotSerializableExceptionWrapper(cause));
}
listener.onFailure(cause);
} else {
RCFPollingResponse result = null;
switch(inittedEverResultStatus) {
case INIT_DONE:
result = new RCFPollingResponse(requiredSamples + 1);
break;
case INITTING:
result = new RCFPollingResponse(requiredSamples - neededSamples);
break;
case EMPTY:
result = new RCFPollingResponse(0);
break;
case EXCEPTION:
listener.onFailure(new RuntimeException());
break;
default:
assertTrue("should not reach here", false);
break;
}
listener.onResponse(result);
}
return null;
}).when(client).execute(any(RCFPollingAction.class), any(), any());
}
use of org.opensearch.ad.common.exception.ResourceNotFoundException in project anomaly-detection by opensearch-project.
the class ADTaskManager method cleanDetectorCache.
/**
* Clean detector cache on coordinating node.
* If task's coordinating node is still in cluster, will forward stop
* task request to coordinating node, then coordinating node will
* remove detector from cache.
* If task's coordinating node is not in cluster, we don't need to
* forward stop task request to coordinating node.
* [Important!] Make sure listener returns in function
*
* @param adTask AD task
* @param transportService transport service
* @param function will execute it when detector cache cleaned successfully or coordinating node left cluster
* @param listener action listener
* @param <T> response type of listener
*/
public <T> void cleanDetectorCache(ADTask adTask, TransportService transportService, AnomalyDetectorFunction function, ActionListener<T> listener) {
String coordinatingNode = adTask.getCoordinatingNode();
String detectorId = adTask.getDetectorId();
String taskId = adTask.getTaskId();
try {
forwardADTaskToCoordinatingNode(adTask, ADTaskAction.CLEAN_CACHE, transportService, ActionListener.wrap(r -> {
function.execute();
}, e -> {
logger.error("Failed to clear detector cache on coordinating node " + coordinatingNode, e);
listener.onFailure(e);
}));
} catch (ResourceNotFoundException e) {
logger.warn("Task coordinating node left cluster, taskId: {}, detectorId: {}, coordinatingNode: {}", taskId, detectorId, coordinatingNode);
function.execute();
} catch (Exception e) {
logger.error("Failed to forward clean cache event for detector " + detectorId + ", task " + taskId, e);
listener.onFailure(e);
}
}
use of org.opensearch.ad.common.exception.ResourceNotFoundException in project anomaly-detection by opensearch-project.
the class ADTaskManager method getCoordinatingNode.
private DiscoveryNode getCoordinatingNode(ADTask adTask) {
String coordinatingNode = adTask.getCoordinatingNode();
DiscoveryNode[] eligibleDataNodes = nodeFilter.getEligibleDataNodes();
DiscoveryNode targetNode = null;
for (DiscoveryNode node : eligibleDataNodes) {
if (node.getId().equals(coordinatingNode)) {
targetNode = node;
break;
}
}
if (targetNode == null) {
throw new ResourceNotFoundException(adTask.getDetectorId(), "AD task coordinating node not found");
}
return targetNode;
}
Aggregations