use of org.opensearch.ad.model.DetectionDateRange in project anomaly-detection by opensearch-project.
the class ADBatchTaskRunner method forwardOrExecuteADTask.
/**
* Forward AD task to work node.
* 1. For HC detector, return directly if no more pending entity. Otherwise check if
* there is AD task created for this entity. If yes, just forward the entity task
* to worker node; otherwise, create entity task first, then forward.
* 2. For single entity detector, set task as INIT state and forward task to worker
* node.
*
* @param adTask AD task
* @param transportService transport service
* @param listener action listener
*/
public void forwardOrExecuteADTask(ADTask adTask, TransportService transportService, ActionListener<ADBatchAnomalyResultResponse> listener) {
try {
checkIfADTaskCancelledAndCleanupCache(adTask);
String detectorId = adTask.getDetectorId();
AnomalyDetector detector = adTask.getDetector();
boolean isHCDetector = detector.isMultientityDetector();
if (isHCDetector) {
String entityString = adTaskCacheManager.pollEntity(detectorId);
logger.debug("Start to run entity: {} of detector {}", entityString, detectorId);
if (entityString == null) {
listener.onResponse(new ADBatchAnomalyResultResponse(clusterService.localNode().getId(), false));
return;
}
ActionListener<Object> wrappedListener = ActionListener.wrap(r -> logger.debug("Entity task created successfully"), e -> {
logger.error("Failed to start entity task for detector: {}, entity: {}", detectorId, entityString);
// If fail, move the entity into pending task queue
adTaskCacheManager.addPendingEntity(detectorId, entityString);
});
// This is to handle retry case. To retry entity, we need to get the old entity task created before.
Entity entity = adTaskManager.parseEntityFromString(entityString, adTask);
String parentTaskId = adTask.getTaskType().equals(ADTaskType.HISTORICAL_HC_ENTITY.name()) ? // For HISTORICAL_HC_ENTITY task, return its parent task id
adTask.getParentTaskId() : // For HISTORICAL_HC_DETECTOR task, its task id is parent task id
adTask.getTaskId();
adTaskManager.getAndExecuteOnLatestADTask(detectorId, parentTaskId, entity, ImmutableList.of(ADTaskType.HISTORICAL_HC_ENTITY), existingEntityTask -> {
if (existingEntityTask.isPresent()) {
// retry failed entity caused by limit exceed exception
// TODO: if task failed due to limit exceed exception in half way, resume from the break point or just clear
// the
// old AD tasks and rerun it? Currently we just support rerunning task failed due to limit exceed exception
// before starting.
ADTask adEntityTask = existingEntityTask.get();
logger.debug("Rerun entity task for task id: {}, error of last run: {}", adEntityTask.getTaskId(), adEntityTask.getError());
ActionListener<ADBatchAnomalyResultResponse> workerNodeResponseListener = workerNodeResponseListener(adEntityTask, transportService, listener);
forwardOrExecuteEntityTask(adEntityTask, transportService, workerNodeResponseListener);
} else {
logger.info("Create entity task for entity:{}", entityString);
Instant now = Instant.now();
ADTask adEntityTask = new ADTask.Builder().detectorId(adTask.getDetectorId()).detector(detector).isLatest(true).taskType(ADTaskType.HISTORICAL_HC_ENTITY.name()).executionStartTime(now).taskProgress(0.0f).initProgress(0.0f).state(ADTaskState.INIT.name()).initProgress(0.0f).lastUpdateTime(now).startedBy(adTask.getStartedBy()).coordinatingNode(clusterService.localNode().getId()).detectionDateRange(adTask.getDetectionDateRange()).user(adTask.getUser()).entity(entity).parentTaskId(parentTaskId).build();
adTaskManager.createADTaskDirectly(adEntityTask, r -> {
adEntityTask.setTaskId(r.getId());
ActionListener<ADBatchAnomalyResultResponse> workerNodeResponseListener = workerNodeResponseListener(adEntityTask, transportService, listener);
forwardOrExecuteEntityTask(adEntityTask, transportService, workerNodeResponseListener);
}, wrappedListener);
}
}, transportService, false, wrappedListener);
} else {
Map<String, Object> updatedFields = new HashMap<>();
updatedFields.put(STATE_FIELD, ADTaskState.INIT.name());
updatedFields.put(INIT_PROGRESS_FIELD, 0.0f);
ActionListener<ADBatchAnomalyResultResponse> workerNodeResponseListener = workerNodeResponseListener(adTask, transportService, listener);
adTaskManager.updateADTask(adTask.getTaskId(), updatedFields, ActionListener.wrap(r -> forwardOrExecuteEntityTask(adTask, transportService, workerNodeResponseListener), e -> {
workerNodeResponseListener.onFailure(e);
}));
}
} catch (Exception e) {
logger.error("Failed to forward or execute AD task " + adTask.getTaskId(), e);
listener.onFailure(e);
}
}
use of org.opensearch.ad.model.DetectionDateRange in project anomaly-detection by opensearch-project.
the class ADTaskManager method updateLatestFlagOfOldTasksAndCreateNewTask.
private void updateLatestFlagOfOldTasksAndCreateNewTask(AnomalyDetector detector, DetectionDateRange detectionDateRange, User user, ActionListener<AnomalyDetectorJobResponse> listener) {
UpdateByQueryRequest updateByQueryRequest = new UpdateByQueryRequest();
updateByQueryRequest.indices(DETECTION_STATE_INDEX);
BoolQueryBuilder query = new BoolQueryBuilder();
query.filter(new TermQueryBuilder(DETECTOR_ID_FIELD, detector.getDetectorId()));
query.filter(new TermQueryBuilder(IS_LATEST_FIELD, true));
// make sure we reset all latest task as false when user switch from single entity to HC, vice versa.
query.filter(new TermsQueryBuilder(TASK_TYPE_FIELD, taskTypeToString(getADTaskTypes(detectionDateRange, true))));
updateByQueryRequest.setQuery(query);
updateByQueryRequest.setRefresh(true);
String script = String.format(Locale.ROOT, "ctx._source.%s=%s;", IS_LATEST_FIELD, false);
updateByQueryRequest.setScript(new Script(script));
client.execute(UpdateByQueryAction.INSTANCE, updateByQueryRequest, ActionListener.wrap(r -> {
List<BulkItemResponse.Failure> bulkFailures = r.getBulkFailures();
if (bulkFailures.isEmpty()) {
// Realtime AD coordinating node is chosen by job scheduler, we won't know it until realtime AD job
// runs. Just set realtime AD coordinating node as null here, and AD job runner will reset correct
// coordinating node once realtime job starts.
// For historical analysis, this method will be called on coordinating node, so we can set coordinating
// node as local node.
String coordinatingNode = detectionDateRange == null ? null : clusterService.localNode().getId();
createNewADTask(detector, detectionDateRange, user, coordinatingNode, listener);
} else {
logger.error("Failed to update old task's state for detector: {}, response: {} ", detector.getDetectorId(), r.toString());
listener.onFailure(bulkFailures.get(0).getCause());
}
}, e -> {
logger.error("Failed to reset old tasks as not latest for detector " + detector.getDetectorId(), e);
listener.onFailure(e);
}));
}
use of org.opensearch.ad.model.DetectionDateRange in project anomaly-detection by opensearch-project.
the class ADRestTestUtils method createAnomalyDetector.
public static Response createAnomalyDetector(RestClient client, String indexName, String timeField, int detectionIntervalInMinutes, int windowDelayIntervalInMinutes, String valueField, String aggregationMethod, String filterQuery, List<String> categoryFields, boolean historical) throws Exception {
Instant now = Instant.now();
AnomalyDetector detector = new AnomalyDetector(randomAlphaOfLength(10), randomLong(), // TODO: check why throw duplicate detector name error with randomAlphaOfLength(20) in twoThirdsUpgradedClusterTask
randomAlphaOfLength(20) + now.toEpochMilli(), randomAlphaOfLength(30), timeField, ImmutableList.of(indexName), ImmutableList.of(TestHelpers.randomFeature(randomAlphaOfLength(5), valueField, aggregationMethod, true)), filterQuery == null ? TestHelpers.randomQuery("{\"match_all\":{\"boost\":1}}") : TestHelpers.randomQuery(filterQuery), new IntervalTimeConfiguration(detectionIntervalInMinutes, ChronoUnit.MINUTES), new IntervalTimeConfiguration(windowDelayIntervalInMinutes, ChronoUnit.MINUTES), randomIntBetween(1, 20), null, randomInt(), now, categoryFields, TestHelpers.randomUser(), null);
if (historical) {
detector.setDetectionDateRange(new DetectionDateRange(now.minus(30, ChronoUnit.DAYS), now));
}
return TestHelpers.makeRequest(client, "POST", TestHelpers.LEGACY_OPENDISTRO_AD_BASE_DETECTORS_URI, ImmutableMap.of(), TestHelpers.toHttpEntity(detector), null);
}
use of org.opensearch.ad.model.DetectionDateRange in project anomaly-detection by opensearch-project.
the class MockAnomalyDetectorJobTransportActionWithUser method doExecute.
@Override
protected void doExecute(Task task, AnomalyDetectorJobRequest request, ActionListener<AnomalyDetectorJobResponse> listener) {
String detectorId = request.getDetectorID();
DetectionDateRange detectionDateRange = request.getDetectionDateRange();
boolean historical = request.isHistorical();
long seqNo = request.getSeqNo();
long primaryTerm = request.getPrimaryTerm();
String rawPath = request.getRawPath();
TimeValue requestTimeout = REQUEST_TIMEOUT.get(settings);
String userStr = "user_name|backendrole1,backendrole2|roles1,role2";
// By the time request reaches here, the user permissions are validated by Security plugin.
User user = User.parse(userStr);
try (ThreadContext.StoredContext context = client.threadPool().getThreadContext().stashContext()) {
resolveUserAndExecute(user, detectorId, filterByEnabled, listener, (anomalyDetector) -> executeDetector(listener, detectorId, seqNo, primaryTerm, rawPath, requestTimeout, user, detectionDateRange, historical), client, clusterService, xContentRegistry);
} catch (Exception e) {
logger.error(e);
listener.onFailure(e);
}
}
use of org.opensearch.ad.model.DetectionDateRange in project anomaly-detection by opensearch-project.
the class ADTaskManagerTests method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
Instant now = Instant.now();
Instant startTime = now.minus(10, ChronoUnit.DAYS);
Instant endTime = now.minus(1, ChronoUnit.DAYS);
detectionDateRange = new DetectionDateRange(startTime, endTime);
settings = Settings.builder().put(MAX_OLD_AD_TASK_DOCS_PER_DETECTOR.getKey(), 2).put(BATCH_TASK_PIECE_INTERVAL_SECONDS.getKey(), 1).put(REQUEST_TIMEOUT.getKey(), TimeValue.timeValueSeconds(10)).build();
clusterSettings = clusterSetting(settings, MAX_OLD_AD_TASK_DOCS_PER_DETECTOR, BATCH_TASK_PIECE_INTERVAL_SECONDS, REQUEST_TIMEOUT, DELETE_AD_RESULT_WHEN_DELETE_DETECTOR, MAX_BATCH_TASK_PER_NODE, MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS);
maxBatchTaskPerNode = MAX_BATCH_TASK_PER_NODE.get(settings);
clusterService = spy(new ClusterService(settings, clusterSettings, null));
client = mock(Client.class);
nodeFilter = mock(DiscoveryNodeFilterer.class);
detectionIndices = mock(AnomalyDetectionIndices.class);
adTaskCacheManager = mock(ADTaskCacheManager.class);
hashRing = mock(HashRing.class);
transportService = mock(TransportService.class);
threadPool = mock(ThreadPool.class);
threadContext = new ThreadContext(settings);
when(threadPool.getThreadContext()).thenReturn(threadContext);
when(client.threadPool()).thenReturn(threadPool);
indexAnomalyDetectorJobActionHandler = mock(IndexAnomalyDetectorJobActionHandler.class);
adTaskManager = spy(new ADTaskManager(settings, clusterService, client, TestHelpers.xContentRegistry(), detectionIndices, nodeFilter, hashRing, adTaskCacheManager, threadPool));
listener = spy(new ActionListener<AnomalyDetectorJobResponse>() {
@Override
public void onResponse(AnomalyDetectorJobResponse bulkItemResponses) {
}
@Override
public void onFailure(Exception e) {
}
});
node1 = new DiscoveryNode("nodeName1", "node1", new TransportAddress(TransportAddress.META_ADDRESS, 9300), emptyMap(), emptySet(), Version.CURRENT);
node2 = new DiscoveryNode("nodeName2", "node2", new TransportAddress(TransportAddress.META_ADDRESS, 9300), emptyMap(), emptySet(), Version.CURRENT);
maxRunningEntities = MAX_RUNNING_ENTITIES_PER_DETECTOR_FOR_HISTORICAL_ANALYSIS.get(settings).intValue();
ThreadContext threadContext = new ThreadContext(settings);
context = threadContext.stashContext();
}
Aggregations