use of org.opensearch.ad.rest.handler.AnomalyDetectorFunction in project anomaly-detection by opensearch-project.
the class ADTaskManager method resetHistoricalDetectorTaskState.
private <T> void resetHistoricalDetectorTaskState(List<ADTask> runningHistoricalTasks, AnomalyDetectorFunction function, TransportService transportService, ActionListener<T> listener) {
if (isNullOrEmpty(runningHistoricalTasks)) {
function.execute();
return;
}
ADTask adTask = runningHistoricalTasks.get(0);
// state when get historical task with get detector API.
if (!lastUpdateTimeOfHistoricalTaskExpired(adTask)) {
function.execute();
return;
}
String taskId = adTask.getTaskId();
AnomalyDetector detector = adTask.getDetector();
getADTaskProfile(adTask, ActionListener.wrap(taskProfile -> {
boolean taskStopped = isTaskStopped(taskId, detector, taskProfile);
if (taskStopped) {
logger.debug("Reset task state as stopped, task id: {}", adTask.getTaskId());
if (// This means coordinating node doesn't have HC detector cache
taskProfile.getTaskId() == null && detector.isMultientityDetector() && !isNullOrEmpty(taskProfile.getEntityTaskProfiles())) {
// If coordinating node restarted, HC detector cache on it will be gone. But worker node still
// runs entity tasks, we'd better stop these entity tasks to clean up resource earlier.
stopHistoricalAnalysis(adTask.getDetectorId(), Optional.of(adTask), null, ActionListener.wrap(r -> {
logger.debug("Restop detector successfully");
resetTaskStateAsStopped(adTask, function, transportService, listener);
}, e -> {
logger.error("Failed to restop detector ", e);
listener.onFailure(e);
}));
} else {
resetTaskStateAsStopped(adTask, function, transportService, listener);
}
} else {
function.execute();
// If still running, check if there is any stale running entities and clean them
if (ADTaskType.HISTORICAL_HC_DETECTOR.name().equals(adTask.getTaskType())) {
// and poll next entity from pending entity queue and run it.
if (!isNullOrEmpty(taskProfile.getRunningEntities()) && hcBatchTaskExpired(taskProfile.getLatestHCTaskRunTime())) {
List<String> runningTasksInCoordinatingNodeCache = new ArrayList<>(taskProfile.getRunningEntities());
List<String> runningTasksOnWorkerNode = new ArrayList<>();
if (taskProfile.getEntityTaskProfiles() != null && taskProfile.getEntityTaskProfiles().size() > 0) {
taskProfile.getEntityTaskProfiles().forEach(entryTask -> runningTasksOnWorkerNode.add(convertEntityToString(entryTask.getEntity(), detector)));
}
if (runningTasksInCoordinatingNodeCache.size() > runningTasksOnWorkerNode.size()) {
runningTasksInCoordinatingNodeCache.removeAll(runningTasksOnWorkerNode);
forwardStaleRunningEntitiesToCoordinatingNode(adTask, ADTaskAction.CLEAN_STALE_RUNNING_ENTITIES, transportService, runningTasksInCoordinatingNodeCache, ActionListener.wrap(res -> logger.debug("Forwarded task to clean stale running entity, task id {}", taskId), ex -> logger.error("Failed to forward clean stale running entity for task " + taskId, ex)));
}
}
}
}
}, e -> {
logger.error("Failed to get AD task profile for task " + adTask.getTaskId(), e);
function.execute();
}));
}
use of org.opensearch.ad.rest.handler.AnomalyDetectorFunction in project anomaly-detection by opensearch-project.
the class ADTaskManager method initRealtimeTaskCacheAndCleanupStaleCache.
/**
* Init realtime task cache and clean up realtime task cache on old coordinating node. Realtime AD
* depends on job scheduler to choose node (job coordinating node) to run AD job. Nodes have primary
* or replica shard of AD job index are candidate to run AD job. Job scheduler will build hash ring
* on these candidate nodes and choose one to run AD job. If AD job index shard relocated, for example
* new node added into cluster, then job scheduler will rebuild hash ring and may choose different
* node to run AD job. So we need to init realtime task cache on new AD job coordinating node and
* clean up cache on old coordinating node.
*
* If realtime task cache inited for the first time on this node, listener will return true; otherwise
* listener will return false.
*
* @param detectorId detector id
* @param detector anomaly detector
* @param transportService transport service
* @param listener listener
*/
public void initRealtimeTaskCacheAndCleanupStaleCache(String detectorId, AnomalyDetector detector, TransportService transportService, ActionListener<Boolean> listener) {
try {
if (adTaskCacheManager.getRealtimeTaskCache(detectorId) != null) {
listener.onResponse(false);
return;
}
getAndExecuteOnLatestDetectorLevelTask(detectorId, REALTIME_TASK_TYPES, (adTaskOptional) -> {
if (!adTaskOptional.isPresent()) {
logger.debug("Can't find realtime task for detector {}, init realtime task cache directly", detectorId);
AnomalyDetectorFunction function = () -> createNewADTask(detector, null, detector.getUser(), clusterService.localNode().getId(), ActionListener.wrap(r -> {
logger.info("Recreate realtime task successfully for detector {}", detectorId);
adTaskCacheManager.initRealtimeTaskCache(detectorId, detector.getDetectorIntervalInMilliseconds());
listener.onResponse(true);
}, e -> {
logger.error("Failed to recreate realtime task for detector " + detectorId, e);
listener.onFailure(e);
}));
recreateRealtimeTask(function, listener);
return;
}
ADTask adTask = adTaskOptional.get();
String localNodeId = clusterService.localNode().getId();
String oldCoordinatingNode = adTask.getCoordinatingNode();
if (oldCoordinatingNode != null && !localNodeId.equals(oldCoordinatingNode)) {
logger.warn("AD realtime job coordinating node changed from {} to this node {} for detector {}", oldCoordinatingNode, localNodeId, detectorId);
cleanDetectorCache(adTask, transportService, () -> {
logger.info("Realtime task cache cleaned on old coordinating node {} for detector {}", oldCoordinatingNode, detectorId);
adTaskCacheManager.initRealtimeTaskCache(detectorId, detector.getDetectorIntervalInMilliseconds());
listener.onResponse(true);
}, listener);
} else {
logger.info("Init realtime task cache for detector {}", detectorId);
adTaskCacheManager.initRealtimeTaskCache(detectorId, detector.getDetectorIntervalInMilliseconds());
listener.onResponse(true);
}
}, transportService, false, listener);
} catch (Exception e) {
logger.error("Failed to init realtime task cache for " + detectorId, e);
listener.onFailure(e);
}
}
use of org.opensearch.ad.rest.handler.AnomalyDetectorFunction in project anomaly-detection by opensearch-project.
the class ADTaskManager method deleteADTasks.
/**
* Delete AD tasks docs.
* [Important!] Make sure listener returns in function
*
* @param detectorId detector id
* @param function AD function
* @param listener action listener
*/
public void deleteADTasks(String detectorId, AnomalyDetectorFunction function, ActionListener<DeleteResponse> listener) {
DeleteByQueryRequest request = new DeleteByQueryRequest(DETECTION_STATE_INDEX);
BoolQueryBuilder query = new BoolQueryBuilder();
query.filter(new TermQueryBuilder(DETECTOR_ID_FIELD, detectorId));
request.setQuery(query);
client.execute(DeleteByQueryAction.INSTANCE, request, ActionListener.wrap(r -> {
if (r.getBulkFailures() == null || r.getBulkFailures().size() == 0) {
logger.info("AD tasks deleted for detector {}", detectorId);
deleteADResultOfDetector(detectorId);
function.execute();
} else {
listener.onFailure(new OpenSearchStatusException("Failed to delete all AD tasks", RestStatus.INTERNAL_SERVER_ERROR));
}
}, e -> {
logger.info("Failed to delete AD tasks for " + detectorId, e);
if (e instanceof IndexNotFoundException) {
deleteADResultOfDetector(detectorId);
function.execute();
} else {
listener.onFailure(e);
}
}));
}
use of org.opensearch.ad.rest.handler.AnomalyDetectorFunction in project anomaly-detection by opensearch-project.
the class ADTaskManager method cleanDetectorCache.
/**
* Clean detector cache on coordinating node.
* If task's coordinating node is still in cluster, will forward stop
* task request to coordinating node, then coordinating node will
* remove detector from cache.
* If task's coordinating node is not in cluster, we don't need to
* forward stop task request to coordinating node.
* [Important!] Make sure listener returns in function
*
* @param adTask AD task
* @param transportService transport service
* @param function will execute it when detector cache cleaned successfully or coordinating node left cluster
* @param listener action listener
* @param <T> response type of listener
*/
public <T> void cleanDetectorCache(ADTask adTask, TransportService transportService, AnomalyDetectorFunction function, ActionListener<T> listener) {
String coordinatingNode = adTask.getCoordinatingNode();
String detectorId = adTask.getDetectorId();
String taskId = adTask.getTaskId();
try {
forwardADTaskToCoordinatingNode(adTask, ADTaskAction.CLEAN_CACHE, transportService, ActionListener.wrap(r -> {
function.execute();
}, e -> {
logger.error("Failed to clear detector cache on coordinating node " + coordinatingNode, e);
listener.onFailure(e);
}));
} catch (ResourceNotFoundException e) {
logger.warn("Task coordinating node left cluster, taskId: {}, detectorId: {}, coordinatingNode: {}", taskId, detectorId, coordinatingNode);
function.execute();
} catch (Exception e) {
logger.error("Failed to forward clean cache event for detector " + detectorId + ", task " + taskId, e);
listener.onFailure(e);
}
}
use of org.opensearch.ad.rest.handler.AnomalyDetectorFunction in project anomaly-detection by opensearch-project.
the class ADTaskManager method stopLatestRealtimeTask.
/**
* Update latest realtime task.
*
* @param detectorId detector id
* @param state task state
* @param error error
* @param transportService transport service
* @param listener action listener
*/
public void stopLatestRealtimeTask(String detectorId, ADTaskState state, Exception error, TransportService transportService, ActionListener<AnomalyDetectorJobResponse> listener) {
getAndExecuteOnLatestDetectorLevelTask(detectorId, REALTIME_TASK_TYPES, (adTask) -> {
if (adTask.isPresent() && !adTask.get().isDone()) {
Map<String, Object> updatedFields = new HashMap<>();
updatedFields.put(ADTask.STATE_FIELD, state.name());
if (error != null) {
updatedFields.put(ADTask.ERROR_FIELD, error.getMessage());
}
AnomalyDetectorFunction function = () -> updateADTask(adTask.get().getTaskId(), updatedFields, ActionListener.wrap(r -> {
if (error == null) {
listener.onResponse(new AnomalyDetectorJobResponse(detectorId, 0, 0, 0, RestStatus.OK));
} else {
listener.onFailure(error);
}
}, e -> {
listener.onFailure(e);
}));
String coordinatingNode = adTask.get().getCoordinatingNode();
if (coordinatingNode != null && transportService != null) {
cleanDetectorCache(adTask.get(), transportService, function, listener);
} else {
function.execute();
}
} else {
listener.onFailure(new OpenSearchStatusException("Anomaly detector job is already stopped: " + detectorId, RestStatus.OK));
}
}, null, false, listener);
}
Aggregations