use of org.opensearch.ad.ml.EntityModel in project anomaly-detection by opensearch-project.
the class CheckpointReadWorkerTests method setUp.
@Override
public void setUp() throws Exception {
super.setUp();
clusterService = mock(ClusterService.class);
clusterSettings = new ClusterSettings(Settings.EMPTY, Collections.unmodifiableSet(new HashSet<>(Arrays.asList(AnomalyDetectorSettings.CHECKPOINT_READ_QUEUE_MAX_HEAP_PERCENT, AnomalyDetectorSettings.CHECKPOINT_READ_QUEUE_CONCURRENCY, AnomalyDetectorSettings.CHECKPOINT_READ_QUEUE_BATCH_SIZE))));
when(clusterService.getClusterSettings()).thenReturn(clusterSettings);
state = MLUtil.randomModelState(new RandomModelStateConfig.Builder().fullModel(true).build());
checkpoint = mock(CheckpointDao.class);
Map.Entry<EntityModel, Instant> entry = new SimpleImmutableEntry<EntityModel, Instant>(state.getModel(), Instant.now());
when(checkpoint.processGetResponse(any(), anyString())).thenReturn(Optional.of(entry));
checkpointWriteQueue = mock(CheckpointWriteWorker.class);
modelManager = mock(ModelManager.class);
when(modelManager.processEntityCheckpoint(any(), any(), anyString(), anyString(), anyInt())).thenReturn(state);
when(modelManager.score(any(), anyString(), any())).thenReturn(new ThresholdingResult(0, 1, 0.7));
coldstartQueue = mock(EntityColdStartWorker.class);
resultWriteQueue = mock(ResultWriteWorker.class);
anomalyDetectionIndices = mock(AnomalyDetectionIndices.class);
cacheProvider = mock(CacheProvider.class);
entityCache = mock(EntityCache.class);
when(cacheProvider.get()).thenReturn(entityCache);
when(entityCache.hostIfPossible(any(), any())).thenReturn(true);
// Integer.MAX_VALUE makes a huge heap
worker = new CheckpointReadWorker(Integer.MAX_VALUE, AnomalyDetectorSettings.ENTITY_FEATURE_REQUEST_SIZE_IN_BYTES, AnomalyDetectorSettings.CHECKPOINT_READ_QUEUE_MAX_HEAP_PERCENT, clusterService, new Random(42), mock(ADCircuitBreakerService.class), threadPool, Settings.EMPTY, AnomalyDetectorSettings.MAX_QUEUED_TASKS_RATIO, clock, AnomalyDetectorSettings.MEDIUM_SEGMENT_PRUNE_RATIO, AnomalyDetectorSettings.LOW_SEGMENT_PRUNE_RATIO, AnomalyDetectorSettings.MAINTENANCE_FREQ_CONSTANT, AnomalyDetectorSettings.QUEUE_MAINTENANCE, modelManager, checkpoint, coldstartQueue, resultWriteQueue, nodeStateManager, anomalyDetectionIndices, cacheProvider, AnomalyDetectorSettings.HOURLY_MAINTENANCE, checkpointWriteQueue);
request = new EntityFeatureRequest(Integer.MAX_VALUE, detectorId, RequestPriority.MEDIUM, entity, new double[] { 0 }, 0);
request2 = new EntityFeatureRequest(Integer.MAX_VALUE, detectorId, RequestPriority.MEDIUM, entity2, new double[] { 0 }, 0);
request3 = new EntityFeatureRequest(Integer.MAX_VALUE, detectorId, RequestPriority.MEDIUM, entity3, new double[] { 0 }, 0);
}
use of org.opensearch.ad.ml.EntityModel in project anomaly-detection by opensearch-project.
the class CheckpointWriteWorkerTests method testTriggerAutoFlush.
/**
* Test that when more requests are coming than concurrency allowed, queues will be
* auto-flushed given enough time.
* @throws InterruptedException when thread.sleep gets interrupted
*/
public void testTriggerAutoFlush() throws InterruptedException {
final CountDownLatch processingLatch = new CountDownLatch(1);
ExecutorService executorService = mock(ExecutorService.class);
ThreadPool mockThreadPool = mock(ThreadPool.class);
when(mockThreadPool.executor(AnomalyDetectorPlugin.AD_THREAD_POOL_NAME)).thenReturn(executorService);
doAnswer(invocation -> {
Runnable runnable = () -> {
try {
processingLatch.await(100, TimeUnit.SECONDS);
} catch (InterruptedException e) {
LOG.error(e);
assertTrue("Unexpected exception", false);
}
Runnable toInvoke = invocation.getArgument(0);
toInvoke.run();
};
// start a new thread so it won't block main test thread's execution
new Thread(runnable).start();
return null;
}).when(executorService).execute(any(Runnable.class));
// make sure permits are released and the next request probe starts
doAnswer(invocation -> {
ActionListener<BulkResponse> listener = invocation.getArgument(1);
listener.onResponse(null);
return null;
}).when(checkpoint).batchWrite(any(), any());
// Integer.MAX_VALUE makes a huge heap
// create a worker to use mockThreadPool
worker = new CheckpointWriteWorker(Integer.MAX_VALUE, AnomalyDetectorSettings.CHECKPOINT_WRITE_QUEUE_SIZE_IN_BYTES, AnomalyDetectorSettings.CHECKPOINT_WRITE_QUEUE_MAX_HEAP_PERCENT, clusterService, new Random(42), mock(ADCircuitBreakerService.class), mockThreadPool, Settings.EMPTY, AnomalyDetectorSettings.MAX_QUEUED_TASKS_RATIO, clock, AnomalyDetectorSettings.MEDIUM_SEGMENT_PRUNE_RATIO, AnomalyDetectorSettings.LOW_SEGMENT_PRUNE_RATIO, AnomalyDetectorSettings.MAINTENANCE_FREQ_CONSTANT, AnomalyDetectorSettings.QUEUE_MAINTENANCE, checkpoint, CommonName.CHECKPOINT_INDEX_NAME, AnomalyDetectorSettings.HOURLY_MAINTENANCE, nodeStateManager, AnomalyDetectorSettings.HOURLY_MAINTENANCE);
// our concurrency is 2, so first 2 requests cause two batches. And the
// remaining 1 stays in the queue until the 2 concurrent runs finish.
// first 2 batch account for one checkpoint.batchWrite; the remaining one
// calls checkpoint.batchWrite
// CHECKPOINT_WRITE_QUEUE_BATCH_SIZE is the largest batch size
int numberOfRequests = 2 * CHECKPOINT_WRITE_QUEUE_BATCH_SIZE.getDefault(Settings.EMPTY) + 1;
for (int i = 0; i < numberOfRequests; i++) {
ModelState<EntityModel> state = MLUtil.randomModelState(new RandomModelStateConfig.Builder().build());
worker.write(state, true, RequestPriority.MEDIUM);
}
// Here, we allow the first 2 pulling batch from queue operations to start.
processingLatch.countDown();
// wait until queues get emptied
int waitIntervals = 20;
while (!worker.isQueueEmpty() && waitIntervals-- >= 0) {
Thread.sleep(500);
}
assertTrue(worker.isQueueEmpty());
// of requests cause at least one batch.
verify(checkpoint, times(3)).batchWrite(any(), any());
}
use of org.opensearch.ad.ml.EntityModel in project anomaly-detection by opensearch-project.
the class CheckpointWriteWorkerTests method testEmptyModelId.
@SuppressWarnings("unchecked")
public void testEmptyModelId() {
ModelState<EntityModel> state = mock(ModelState.class);
when(state.getLastCheckpointTime()).thenReturn(Instant.now());
EntityModel model = mock(EntityModel.class);
when(state.getModel()).thenReturn(model);
when(state.getDetectorId()).thenReturn("1");
when(state.getModelId()).thenReturn(null);
worker.write(state, true, RequestPriority.MEDIUM);
verify(checkpoint, never()).batchWrite(any(), any());
}
use of org.opensearch.ad.ml.EntityModel in project anomaly-detection by opensearch-project.
the class CheckpointReadWorker method onGetDetector.
private ActionListener<Optional<AnomalyDetector>> onGetDetector(EntityFeatureRequest origRequest, int index, String detectorId, List<EntityFeatureRequest> toProcess, Map<String, MultiGetItemResponse> successfulRequests, Set<String> retryableRequests, Optional<Entry<EntityModel, Instant>> checkpoint, Entity entity, String modelId) {
return ActionListener.wrap(detectorOptional -> {
if (false == detectorOptional.isPresent()) {
LOG.warn(new ParameterizedMessage("AnomalyDetector [{}] is not available.", detectorId));
processCheckpointIteration(index + 1, toProcess, successfulRequests, retryableRequests);
return;
}
AnomalyDetector detector = detectorOptional.get();
ModelState<EntityModel> modelState = modelManager.processEntityCheckpoint(checkpoint, entity, modelId, detectorId, detector.getShingleSize());
EntityModel entityModel = modelState.getModel();
ThresholdingResult result = null;
if (entityModel.getTrcf().isPresent()) {
result = modelManager.score(origRequest.getCurrentFeature(), modelId, modelState);
} else {
entityModel.addSample(origRequest.getCurrentFeature());
}
if (result != null && result.getRcfScore() > 0) {
AnomalyResult resultToSave = result.toAnomalyResult(detector, Instant.ofEpochMilli(origRequest.getDataStartTimeMillis()), Instant.ofEpochMilli(origRequest.getDataStartTimeMillis() + detector.getDetectorIntervalInMilliseconds()), Instant.now(), Instant.now(), ParseUtils.getFeatureData(origRequest.getCurrentFeature(), detector), entity, indexUtil.getSchemaVersion(ADIndex.RESULT), modelId, null, null);
resultWriteQueue.put(new ResultWriteRequest(origRequest.getExpirationEpochMs(), detectorId, result.getGrade() > 0 ? RequestPriority.HIGH : RequestPriority.MEDIUM, resultToSave, detector.getResultIndex()));
}
// try to load to cache
boolean loaded = cacheProvider.get().hostIfPossible(detector, modelState);
if (false == loaded) {
// not in memory. Maybe cold entities or some other entities
// have filled the slot while waiting for loading checkpoints.
checkpointWriteQueue.write(modelState, true, RequestPriority.LOW);
}
processCheckpointIteration(index + 1, toProcess, successfulRequests, retryableRequests);
}, exception -> {
LOG.error(new ParameterizedMessage("fail to get checkpoint [{}]", modelId, exception));
nodeStateManager.setException(detectorId, exception);
processCheckpointIteration(index + 1, toProcess, successfulRequests, retryableRequests);
});
}
use of org.opensearch.ad.ml.EntityModel in project anomaly-detection by opensearch-project.
the class CheckpointWriteWorker method writeAll.
public void writeAll(List<ModelState<EntityModel>> modelStates, String detectorId, boolean forceWrite, RequestPriority priority) {
ActionListener<Optional<AnomalyDetector>> onGetForAll = ActionListener.wrap(detectorOptional -> {
if (false == detectorOptional.isPresent()) {
LOG.warn(new ParameterizedMessage("AnomalyDetector [{}] is not available.", detectorId));
return;
}
AnomalyDetector detector = detectorOptional.get();
try {
List<CheckpointWriteRequest> allRequests = new ArrayList<>();
for (ModelState<EntityModel> state : modelStates) {
Instant instant = state.getLastCheckpointTime();
if (!shouldSave(instant, forceWrite)) {
continue;
}
Map<String, Object> source = checkpoint.toIndexSource(state);
String modelId = state.getModelId();
// the model state is bloated or empty (empty samples and models), skip
if (source == null || source.isEmpty() || Strings.isEmpty(modelId)) {
continue;
}
state.setLastCheckpointTime(clock.instant());
allRequests.add(new CheckpointWriteRequest(System.currentTimeMillis() + detector.getDetectorIntervalInMilliseconds(), detectorId, priority, // If the document exists, update fields in the map
new UpdateRequest(indexName, modelId).docAsUpsert(true).doc(source)));
}
putAll(allRequests);
} catch (Exception e) {
// Example exception:
// ConcurrentModificationException when calling toCheckpoint
// and updating rcf model at the same time. To prevent this,
// we need to have a deep copy of models or have a lock. Both
// options are costly.
// As we are gonna retry serializing either when the entity is
// evicted out of cache or during the next maintenance period,
// don't do anything when the exception happens.
LOG.info(new ParameterizedMessage("Exception while serializing models for [{}]", detectorId), e);
}
}, exception -> {
LOG.error(new ParameterizedMessage("fail to get detector [{}]", detectorId), exception);
});
nodeStateManager.getAnomalyDetector(detectorId, onGetForAll);
}
Aggregations