use of org.opensearch.ad.model.Entity in project anomaly-detection by opensearch-project.
the class CheckpointDao method toIndexSource.
/**
* Prepare for index request using the contents of the given model state
* @param modelState an entity model state
* @return serialized JSON map or empty map if the state is too bloated
* @throws IOException when serialization fails
*/
public Map<String, Object> toIndexSource(ModelState<EntityModel> modelState) throws IOException {
Map<String, Object> source = new HashMap<>();
EntityModel model = modelState.getModel();
Optional<String> serializedModel = toCheckpoint(model, modelState.getModelId());
if (!serializedModel.isPresent() || serializedModel.get().length() > maxCheckpointBytes) {
logger.warn(new ParameterizedMessage("[{}]'s model is empty or too large: [{}] bytes", modelState.getModelId(), serializedModel.isPresent() ? serializedModel.get().length() : 0));
return source;
}
String detectorId = modelState.getDetectorId();
source.put(DETECTOR_ID, detectorId);
// we cannot pass Optional as OpenSearch does not know how to serialize an Optional value
source.put(FIELD_MODELV2, serializedModel.get());
source.put(TIMESTAMP, ZonedDateTime.now(ZoneOffset.UTC));
source.put(CommonName.SCHEMA_VERSION_FIELD, indexUtil.getSchemaVersion(ADIndex.CHECKPOINT));
Optional<Entity> entity = model.getEntity();
if (entity.isPresent()) {
source.put(CommonName.ENTITY_KEY, entity.get());
}
return source;
}
use of org.opensearch.ad.model.Entity in project anomaly-detection by opensearch-project.
the class EntityColdStarter method getEntityColdStartData.
/**
* Get training data for an entity.
*
* We first note the maximum and minimum timestamp, and sample at most 24 points
* (with 60 points apart between two neighboring samples) between those minimum
* and maximum timestamps. Samples can be missing. We only interpolate points
* between present neighboring samples. We then transform samples and interpolate
* points to shingles. Finally, full shingles will be used for cold start.
*
* @param detectorId detector Id
* @param entity the entity's information
* @param listener listener to return training data
*/
private void getEntityColdStartData(String detectorId, Entity entity, ActionListener<Optional<List<double[][]>>> listener) {
ActionListener<Optional<AnomalyDetector>> getDetectorListener = ActionListener.wrap(detectorOp -> {
if (!detectorOp.isPresent()) {
listener.onFailure(new EndRunException(detectorId, "AnomalyDetector is not available.", false));
return;
}
List<double[][]> coldStartData = new ArrayList<>();
AnomalyDetector detector = detectorOp.get();
ActionListener<Optional<Long>> minTimeListener = ActionListener.wrap(earliest -> {
if (earliest.isPresent()) {
long startTimeMs = earliest.get().longValue();
nodeStateManager.getAnomalyDetectorJob(detectorId, ActionListener.wrap(jobOp -> {
if (!jobOp.isPresent()) {
listener.onFailure(new EndRunException(detectorId, "AnomalyDetector job is not available.", false));
return;
}
AnomalyDetectorJob job = jobOp.get();
// End time uses milliseconds as start time is assumed to be in milliseconds.
// Opensearch uses a set of preconfigured formats to recognize and parse these strings into a long value
// representing milliseconds-since-the-epoch in UTC.
// More on https://tinyurl.com/wub4fk92
// Existing samples either predates or coincide with cold start data. In either case,
// combining them without reordering based on time stamps is not ok. We might introduce
// anomalies in the process.
// An ideal solution would be to record time stamps of data points and combine existing
// samples and cold start samples and do interpolation afterwards. Recording time stamps
// requires changes across the board like bwc in checkpoints. A pragmatic solution is to use
// job enabled time as the end time of cold start period as it is easier to combine
// existing samples with cold start data. We just need to appends existing samples after
// cold start data as existing samples all happen after job enabled time. There might
// be some gaps in between the last cold start sample and the first accumulated sample.
// We will need to accept that precision loss in current solution.
long endTimeMs = job.getEnabledTime().toEpochMilli();
Pair<Integer, Integer> params = selectRangeParam(detector);
int stride = params.getLeft();
int numberOfSamples = params.getRight();
// we start with round 0
getFeatures(listener, 0, coldStartData, detector, entity, stride, numberOfSamples, startTimeMs, endTimeMs);
}, listener::onFailure));
} else {
listener.onResponse(Optional.empty());
}
}, listener::onFailure);
searchFeatureDao.getEntityMinDataTime(detector, entity, new ThreadedActionListener<>(logger, threadPool, AnomalyDetectorPlugin.AD_THREAD_POOL_NAME, minTimeListener, false));
}, listener::onFailure);
nodeStateManager.getAnomalyDetector(detectorId, new ThreadedActionListener<>(logger, threadPool, AnomalyDetectorPlugin.AD_THREAD_POOL_NAME, getDetectorListener, false));
}
use of org.opensearch.ad.model.Entity in project anomaly-detection by opensearch-project.
the class PriorityCache method getModelProfile.
/**
* Gets an entity's model state
*
* @param detectorId detector id
* @param entityModelId entity model id
* @return the model state
*/
@Override
public Optional<ModelProfile> getModelProfile(String detectorId, String entityModelId) {
CacheBuffer cacheBuffer = activeEnities.get(detectorId);
if (cacheBuffer != null && cacheBuffer.getModel(entityModelId).isPresent()) {
EntityModel model = cacheBuffer.getModel(entityModelId).get();
Entity entity = null;
if (model != null && model.getEntity().isPresent()) {
entity = model.getEntity().get();
}
return Optional.of(new ModelProfile(entityModelId, entity, cacheBuffer.getMemoryConsumptionPerEntity()));
}
return Optional.empty();
}
use of org.opensearch.ad.model.Entity in project anomaly-detection by opensearch-project.
the class PriorityCache method selectUpdateCandidate.
@Override
public Pair<List<Entity>, List<Entity>> selectUpdateCandidate(Collection<Entity> cacheMissEntities, String detectorId, AnomalyDetector detector) {
List<Entity> hotEntities = new ArrayList<>();
List<Entity> coldEntities = new ArrayList<>();
CacheBuffer buffer = activeEnities.get(detectorId);
if (buffer == null) {
// Since this method is public, need to deal with this case in case of misuse.
return Pair.of(hotEntities, coldEntities);
}
Iterator<Entity> cacheMissEntitiesIter = cacheMissEntities.iterator();
// current buffer's dedicated cache has free slots
while (cacheMissEntitiesIter.hasNext() && buffer.dedicatedCacheAvailable()) {
addEntity(hotEntities, cacheMissEntitiesIter.next(), detectorId);
}
while (cacheMissEntitiesIter.hasNext() && memoryTracker.canAllocate(buffer.getMemoryConsumptionPerEntity())) {
// can allocate in shared cache
// race conditions can happen when multiple threads evaluating this condition.
// This is a problem as our AD memory usage is close to full and we put
// more things than we planned. One model in HCAD is small,
// it is fine we exceed a little. We have regular maintenance to remove
// extra memory usage.
addEntity(hotEntities, cacheMissEntitiesIter.next(), detectorId);
}
// check if we can replace anything in dedicated or shared cache
// have a copy since we need to do the iteration twice: one for
// dedicated cache and one for shared cache
List<Entity> otherBufferReplaceCandidates = new ArrayList<>();
while (cacheMissEntitiesIter.hasNext()) {
// can replace an entity in the same CacheBuffer living in reserved
// or shared cache
// thread safe as each detector has one thread at one time and only the
// thread can access its buffer.
Entity entity = cacheMissEntitiesIter.next();
Optional<String> modelId = entity.getModelId(detectorId);
if (false == modelId.isPresent()) {
continue;
}
Optional<ModelState<EntityModel>> state = getStateFromInactiveEntiiyCache(modelId.get());
if (false == state.isPresent()) {
// not even recorded in inActiveEntities yet because of doorKeeper
continue;
}
ModelState<EntityModel> modelState = state.get();
float priority = modelState.getPriority();
if (buffer.canReplaceWithinDetector(priority)) {
addEntity(hotEntities, entity, detectorId);
} else {
// re-evaluate replacement condition in other buffers
otherBufferReplaceCandidates.add(entity);
}
}
// record current minimum priority among all detectors to save redundant
// scanning of all CacheBuffers
CacheBuffer bufferToRemove = null;
float minPriority = Float.MIN_VALUE;
// check if we can replace in other CacheBuffer
cacheMissEntitiesIter = otherBufferReplaceCandidates.iterator();
while (cacheMissEntitiesIter.hasNext()) {
// If two threads try to remove the same entity and add their own state, the 2nd remove
// returns null and only the first one succeeds.
Entity entity = cacheMissEntitiesIter.next();
Optional<String> modelId = entity.getModelId(detectorId);
if (false == modelId.isPresent()) {
continue;
}
Optional<ModelState<EntityModel>> inactiveState = getStateFromInactiveEntiiyCache(modelId.get());
if (false == inactiveState.isPresent()) {
// empty state should not stand a chance to replace others
continue;
}
ModelState<EntityModel> state = inactiveState.get();
float priority = state.getPriority();
float scaledPriority = buffer.getPriorityTracker().getScaledPriority(priority);
if (scaledPriority <= minPriority) {
// not even larger than the minPriority, we can put this to coldEntities
addEntity(coldEntities, entity, detectorId);
continue;
}
// Float.MIN_VALUE means we need to re-iterate through all CacheBuffers
if (minPriority == Float.MIN_VALUE) {
Triple<CacheBuffer, String, Float> bufferToRemoveEntity = canReplaceInSharedCache(buffer, scaledPriority);
bufferToRemove = bufferToRemoveEntity.getLeft();
minPriority = bufferToRemoveEntity.getRight();
}
if (bufferToRemove != null) {
addEntity(hotEntities, entity, detectorId);
// reset minPriority after the replacement so that we need to iterate all CacheBuffer
// again
minPriority = Float.MIN_VALUE;
} else {
// after trying everything, we can now safely put this to cold entities list
addEntity(coldEntities, entity, detectorId);
}
}
return Pair.of(hotEntities, coldEntities);
}
use of org.opensearch.ad.model.Entity in project anomaly-detection by opensearch-project.
the class SearchFeatureDao method getFeaturesForPeriodByBatch.
public void getFeaturesForPeriodByBatch(AnomalyDetector detector, Entity entity, long startTime, long endTime, ActionListener<Map<Long, Optional<double[]>>> listener) throws IOException {
SearchSourceBuilder searchSourceBuilder = batchFeatureQuery(detector, entity, startTime, endTime, xContent);
logger.debug("Batch query for detector {}: {} ", detector.getDetectorId(), searchSourceBuilder);
SearchRequest searchRequest = new SearchRequest(detector.getIndices().toArray(new String[0])).source(searchSourceBuilder);
client.search(searchRequest, ActionListener.wrap(response -> {
listener.onResponse(parseBucketAggregationResponse(response, detector.getEnabledFeatureIds()));
}, listener::onFailure));
}
Aggregations