Search in sources :

Example 1 with Entity

use of org.opensearch.ad.model.Entity in project anomaly-detection by opensearch-project.

the class CheckpointDao method toIndexSource.

/**
 * Prepare for index request using the contents of the given model state
 * @param modelState an entity model state
 * @return serialized JSON map or empty map if the state is too bloated
 * @throws IOException  when serialization fails
 */
public Map<String, Object> toIndexSource(ModelState<EntityModel> modelState) throws IOException {
    Map<String, Object> source = new HashMap<>();
    EntityModel model = modelState.getModel();
    Optional<String> serializedModel = toCheckpoint(model, modelState.getModelId());
    if (!serializedModel.isPresent() || serializedModel.get().length() > maxCheckpointBytes) {
        logger.warn(new ParameterizedMessage("[{}]'s model is empty or too large: [{}] bytes", modelState.getModelId(), serializedModel.isPresent() ? serializedModel.get().length() : 0));
        return source;
    }
    String detectorId = modelState.getDetectorId();
    source.put(DETECTOR_ID, detectorId);
    // we cannot pass Optional as OpenSearch does not know how to serialize an Optional value
    source.put(FIELD_MODELV2, serializedModel.get());
    source.put(TIMESTAMP, ZonedDateTime.now(ZoneOffset.UTC));
    source.put(CommonName.SCHEMA_VERSION_FIELD, indexUtil.getSchemaVersion(ADIndex.CHECKPOINT));
    Optional<Entity> entity = model.getEntity();
    if (entity.isPresent()) {
        source.put(CommonName.ENTITY_KEY, entity.get());
    }
    return source;
}
Also used : Entity(org.opensearch.ad.model.Entity) HashMap(java.util.HashMap) JsonObject(com.google.gson.JsonObject) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage)

Example 2 with Entity

use of org.opensearch.ad.model.Entity in project anomaly-detection by opensearch-project.

the class EntityColdStarter method getEntityColdStartData.

/**
 * Get training data for an entity.
 *
 * We first note the maximum and minimum timestamp, and sample at most 24 points
 * (with 60 points apart between two neighboring samples) between those minimum
 * and maximum timestamps.  Samples can be missing.  We only interpolate points
 * between present neighboring samples. We then transform samples and interpolate
 * points to shingles. Finally, full shingles will be used for cold start.
 *
 * @param detectorId detector Id
 * @param entity the entity's information
 * @param listener listener to return training data
 */
private void getEntityColdStartData(String detectorId, Entity entity, ActionListener<Optional<List<double[][]>>> listener) {
    ActionListener<Optional<AnomalyDetector>> getDetectorListener = ActionListener.wrap(detectorOp -> {
        if (!detectorOp.isPresent()) {
            listener.onFailure(new EndRunException(detectorId, "AnomalyDetector is not available.", false));
            return;
        }
        List<double[][]> coldStartData = new ArrayList<>();
        AnomalyDetector detector = detectorOp.get();
        ActionListener<Optional<Long>> minTimeListener = ActionListener.wrap(earliest -> {
            if (earliest.isPresent()) {
                long startTimeMs = earliest.get().longValue();
                nodeStateManager.getAnomalyDetectorJob(detectorId, ActionListener.wrap(jobOp -> {
                    if (!jobOp.isPresent()) {
                        listener.onFailure(new EndRunException(detectorId, "AnomalyDetector job is not available.", false));
                        return;
                    }
                    AnomalyDetectorJob job = jobOp.get();
                    // End time uses milliseconds as start time is assumed to be in milliseconds.
                    // Opensearch uses a set of preconfigured formats to recognize and parse these strings into a long value
                    // representing milliseconds-since-the-epoch in UTC.
                    // More on https://tinyurl.com/wub4fk92
                    // Existing samples either predates or coincide with cold start data. In either case,
                    // combining them without reordering based on time stamps is not ok. We might introduce
                    // anomalies in the process.
                    // An ideal solution would be to record time stamps of data points and combine existing
                    // samples and cold start samples and do interpolation afterwards. Recording time stamps
                    // requires changes across the board like bwc in checkpoints. A pragmatic solution is to use
                    // job enabled time as the end time of cold start period as it is easier to combine
                    // existing samples with cold start data. We just need to appends existing samples after
                    // cold start data as existing samples all happen after job enabled time. There might
                    // be some gaps in between the last cold start sample and the first accumulated sample.
                    // We will need to accept that precision loss in current solution.
                    long endTimeMs = job.getEnabledTime().toEpochMilli();
                    Pair<Integer, Integer> params = selectRangeParam(detector);
                    int stride = params.getLeft();
                    int numberOfSamples = params.getRight();
                    // we start with round 0
                    getFeatures(listener, 0, coldStartData, detector, entity, stride, numberOfSamples, startTimeMs, endTimeMs);
                }, listener::onFailure));
            } else {
                listener.onResponse(Optional.empty());
            }
        }, listener::onFailure);
        searchFeatureDao.getEntityMinDataTime(detector, entity, new ThreadedActionListener<>(logger, threadPool, AnomalyDetectorPlugin.AD_THREAD_POOL_NAME, minTimeListener, false));
    }, listener::onFailure);
    nodeStateManager.getAnomalyDetector(detectorId, new ThreadedActionListener<>(logger, threadPool, AnomalyDetectorPlugin.AD_THREAD_POOL_NAME, getDetectorListener, false));
}
Also used : Arrays(java.util.Arrays) Precision(com.amazon.randomcutforest.config.Precision) RequestPriority(org.opensearch.ad.ratelimit.RequestPriority) ThreadPool(org.opensearch.threadpool.ThreadPool) SimpleImmutableEntry(java.util.AbstractMap.SimpleImmutableEntry) ThreadedActionListener(org.opensearch.action.support.ThreadedActionListener) ParameterizedMessage(org.apache.logging.log4j.message.ParameterizedMessage) AnomalyDetectorSettings(org.opensearch.ad.settings.AnomalyDetectorSettings) ArrayList(java.util.ArrayList) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) Throwables(org.apache.logging.log4j.core.util.Throwables) DoorKeeper(org.opensearch.ad.caching.DoorKeeper) Pair(org.apache.commons.lang3.tuple.Pair) Duration(java.time.Duration) Map(java.util.Map) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) ActionListener(org.opensearch.action.ActionListener) EndRunException(org.opensearch.ad.common.exception.EndRunException) Interpolator(org.opensearch.ad.dataprocessor.Interpolator) COOLDOWN_MINUTES(org.opensearch.ad.settings.AnomalyDetectorSettings.COOLDOWN_MINUTES) FeatureManager(org.opensearch.ad.feature.FeatureManager) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Settings(org.opensearch.common.settings.Settings) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) MaintenanceState(org.opensearch.ad.MaintenanceState) List(java.util.List) Stream(java.util.stream.Stream) Logger(org.apache.logging.log4j.Logger) ExceptionUtil(org.opensearch.ad.util.ExceptionUtil) Entity(org.opensearch.ad.model.Entity) NodeStateManager(org.opensearch.ad.NodeStateManager) AnomalyDetectorJob(org.opensearch.ad.model.AnomalyDetectorJob) ThresholdedRandomCutForest(com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest) Entry(java.util.Map.Entry) SearchFeatureDao(org.opensearch.ad.feature.SearchFeatureDao) Clock(java.time.Clock) Optional(java.util.Optional) AnomalyDetectorPlugin(org.opensearch.ad.AnomalyDetectorPlugin) Queue(java.util.Queue) ArrayDeque(java.util.ArrayDeque) IntervalTimeConfiguration(org.opensearch.ad.model.IntervalTimeConfiguration) LogManager(org.apache.logging.log4j.LogManager) CheckpointWriteWorker(org.opensearch.ad.ratelimit.CheckpointWriteWorker) EndRunException(org.opensearch.ad.common.exception.EndRunException) Optional(java.util.Optional) ArrayList(java.util.ArrayList) AnomalyDetectorJob(org.opensearch.ad.model.AnomalyDetectorJob) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) Pair(org.apache.commons.lang3.tuple.Pair)

Example 3 with Entity

use of org.opensearch.ad.model.Entity in project anomaly-detection by opensearch-project.

the class PriorityCache method getModelProfile.

/**
 * Gets an entity's model state
 *
 * @param detectorId detector id
 * @param entityModelId  entity model id
 * @return the model state
 */
@Override
public Optional<ModelProfile> getModelProfile(String detectorId, String entityModelId) {
    CacheBuffer cacheBuffer = activeEnities.get(detectorId);
    if (cacheBuffer != null && cacheBuffer.getModel(entityModelId).isPresent()) {
        EntityModel model = cacheBuffer.getModel(entityModelId).get();
        Entity entity = null;
        if (model != null && model.getEntity().isPresent()) {
            entity = model.getEntity().get();
        }
        return Optional.of(new ModelProfile(entityModelId, entity, cacheBuffer.getMemoryConsumptionPerEntity()));
    }
    return Optional.empty();
}
Also used : Entity(org.opensearch.ad.model.Entity) EntityModel(org.opensearch.ad.ml.EntityModel) ModelProfile(org.opensearch.ad.model.ModelProfile)

Example 4 with Entity

use of org.opensearch.ad.model.Entity in project anomaly-detection by opensearch-project.

the class PriorityCache method selectUpdateCandidate.

@Override
public Pair<List<Entity>, List<Entity>> selectUpdateCandidate(Collection<Entity> cacheMissEntities, String detectorId, AnomalyDetector detector) {
    List<Entity> hotEntities = new ArrayList<>();
    List<Entity> coldEntities = new ArrayList<>();
    CacheBuffer buffer = activeEnities.get(detectorId);
    if (buffer == null) {
        // Since this method is public, need to deal with this case in case of misuse.
        return Pair.of(hotEntities, coldEntities);
    }
    Iterator<Entity> cacheMissEntitiesIter = cacheMissEntities.iterator();
    // current buffer's dedicated cache has free slots
    while (cacheMissEntitiesIter.hasNext() && buffer.dedicatedCacheAvailable()) {
        addEntity(hotEntities, cacheMissEntitiesIter.next(), detectorId);
    }
    while (cacheMissEntitiesIter.hasNext() && memoryTracker.canAllocate(buffer.getMemoryConsumptionPerEntity())) {
        // can allocate in shared cache
        // race conditions can happen when multiple threads evaluating this condition.
        // This is a problem as our AD memory usage is close to full and we put
        // more things than we planned. One model in HCAD is small,
        // it is fine we exceed a little. We have regular maintenance to remove
        // extra memory usage.
        addEntity(hotEntities, cacheMissEntitiesIter.next(), detectorId);
    }
    // check if we can replace anything in dedicated or shared cache
    // have a copy since we need to do the iteration twice: one for
    // dedicated cache and one for shared cache
    List<Entity> otherBufferReplaceCandidates = new ArrayList<>();
    while (cacheMissEntitiesIter.hasNext()) {
        // can replace an entity in the same CacheBuffer living in reserved
        // or shared cache
        // thread safe as each detector has one thread at one time and only the
        // thread can access its buffer.
        Entity entity = cacheMissEntitiesIter.next();
        Optional<String> modelId = entity.getModelId(detectorId);
        if (false == modelId.isPresent()) {
            continue;
        }
        Optional<ModelState<EntityModel>> state = getStateFromInactiveEntiiyCache(modelId.get());
        if (false == state.isPresent()) {
            // not even recorded in inActiveEntities yet because of doorKeeper
            continue;
        }
        ModelState<EntityModel> modelState = state.get();
        float priority = modelState.getPriority();
        if (buffer.canReplaceWithinDetector(priority)) {
            addEntity(hotEntities, entity, detectorId);
        } else {
            // re-evaluate replacement condition in other buffers
            otherBufferReplaceCandidates.add(entity);
        }
    }
    // record current minimum priority among all detectors to save redundant
    // scanning of all CacheBuffers
    CacheBuffer bufferToRemove = null;
    float minPriority = Float.MIN_VALUE;
    // check if we can replace in other CacheBuffer
    cacheMissEntitiesIter = otherBufferReplaceCandidates.iterator();
    while (cacheMissEntitiesIter.hasNext()) {
        // If two threads try to remove the same entity and add their own state, the 2nd remove
        // returns null and only the first one succeeds.
        Entity entity = cacheMissEntitiesIter.next();
        Optional<String> modelId = entity.getModelId(detectorId);
        if (false == modelId.isPresent()) {
            continue;
        }
        Optional<ModelState<EntityModel>> inactiveState = getStateFromInactiveEntiiyCache(modelId.get());
        if (false == inactiveState.isPresent()) {
            // empty state should not stand a chance to replace others
            continue;
        }
        ModelState<EntityModel> state = inactiveState.get();
        float priority = state.getPriority();
        float scaledPriority = buffer.getPriorityTracker().getScaledPriority(priority);
        if (scaledPriority <= minPriority) {
            // not even larger than the minPriority, we can put this to coldEntities
            addEntity(coldEntities, entity, detectorId);
            continue;
        }
        // Float.MIN_VALUE means we need to re-iterate through all CacheBuffers
        if (minPriority == Float.MIN_VALUE) {
            Triple<CacheBuffer, String, Float> bufferToRemoveEntity = canReplaceInSharedCache(buffer, scaledPriority);
            bufferToRemove = bufferToRemoveEntity.getLeft();
            minPriority = bufferToRemoveEntity.getRight();
        }
        if (bufferToRemove != null) {
            addEntity(hotEntities, entity, detectorId);
            // reset minPriority after the replacement so that we need to iterate all CacheBuffer
            // again
            minPriority = Float.MIN_VALUE;
        } else {
            // after trying everything, we can now safely put this to cold entities list
            addEntity(coldEntities, entity, detectorId);
        }
    }
    return Pair.of(hotEntities, coldEntities);
}
Also used : Entity(org.opensearch.ad.model.Entity) ArrayList(java.util.ArrayList) EntityModel(org.opensearch.ad.ml.EntityModel) ModelState(org.opensearch.ad.ml.ModelState)

Example 5 with Entity

use of org.opensearch.ad.model.Entity in project anomaly-detection by opensearch-project.

the class SearchFeatureDao method getFeaturesForPeriodByBatch.

public void getFeaturesForPeriodByBatch(AnomalyDetector detector, Entity entity, long startTime, long endTime, ActionListener<Map<Long, Optional<double[]>>> listener) throws IOException {
    SearchSourceBuilder searchSourceBuilder = batchFeatureQuery(detector, entity, startTime, endTime, xContent);
    logger.debug("Batch query for detector {}: {} ", detector.getDetectorId(), searchSourceBuilder);
    SearchRequest searchRequest = new SearchRequest(detector.getIndices().toArray(new String[0])).source(searchSourceBuilder);
    client.search(searchRequest, ActionListener.wrap(response -> {
        listener.onResponse(parseBucketAggregationResponse(response, detector.getEnabledFeatureIds()));
    }, listener::onFailure));
}
Also used : Arrays(java.util.Arrays) Max(org.opensearch.search.aggregations.metrics.Max) Aggregation(org.opensearch.search.aggregations.Aggregation) ZonedDateTime(java.time.ZonedDateTime) AnomalyDetectionException(org.opensearch.ad.common.exception.AnomalyDetectionException) AggregationBuilder(org.opensearch.search.aggregations.AggregationBuilder) FieldSortBuilder(org.opensearch.search.sort.FieldSortBuilder) Locale(java.util.Locale) Map(java.util.Map) ParseUtils.batchFeatureQuery(org.opensearch.ad.util.ParseUtils.batchFeatureQuery) ActionListener(org.opensearch.action.ActionListener) Interpolator(org.opensearch.ad.dataprocessor.Interpolator) Client(org.opensearch.client.Client) Settings(org.opensearch.common.settings.Settings) MultiBucketsAggregation(org.opensearch.search.aggregations.bucket.MultiBucketsAggregation) Terms(org.opensearch.search.aggregations.bucket.terms.Terms) Collectors(java.util.stream.Collectors) List(java.util.List) Logger(org.apache.logging.log4j.Logger) PREVIEW_TIMEOUT_IN_MILLIS(org.opensearch.ad.settings.AnomalyDetectorSettings.PREVIEW_TIMEOUT_IN_MILLIS) SearchSourceBuilder(org.opensearch.search.builder.SearchSourceBuilder) Entry(java.util.Map.Entry) DATE_HISTOGRAM(org.opensearch.ad.constant.CommonName.DATE_HISTOGRAM) Optional(java.util.Optional) Bucket(org.opensearch.search.aggregations.bucket.range.InternalDateRange.Bucket) TermsValuesSourceBuilder(org.opensearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder) BoolQueryBuilder(org.opensearch.index.query.BoolQueryBuilder) InternalDateRange(org.opensearch.search.aggregations.bucket.range.InternalDateRange) MatrixUtils.createRealMatrix(org.apache.commons.math3.linear.MatrixUtils.createRealMatrix) HashMap(java.util.HashMap) Aggregations(org.opensearch.search.aggregations.Aggregations) ArrayList(java.util.ArrayList) SortOrder(org.opensearch.search.sort.SortOrder) PAGE_SIZE(org.opensearch.ad.settings.AnomalyDetectorSettings.PAGE_SIZE) AnomalyDetector(org.opensearch.ad.model.AnomalyDetector) SearchRequest(org.opensearch.action.search.SearchRequest) SearchResponse(org.opensearch.action.search.SearchResponse) SimpleEntry(java.util.AbstractMap.SimpleEntry) MAX_ENTITIES_FOR_PREVIEW(org.opensearch.ad.settings.AnomalyDetectorSettings.MAX_ENTITIES_FOR_PREVIEW) QueryBuilders(org.opensearch.index.query.QueryBuilders) CommonName(org.opensearch.ad.constant.CommonName) ClientUtil(org.opensearch.ad.util.ClientUtil) RangeQueryBuilder(org.opensearch.index.query.RangeQueryBuilder) InternalComposite(org.opensearch.search.aggregations.bucket.composite.InternalComposite) TermQueryBuilder(org.opensearch.index.query.TermQueryBuilder) IOException(java.io.IOException) PipelineAggregatorBuilders(org.opensearch.search.aggregations.PipelineAggregatorBuilders) Min(org.opensearch.search.aggregations.metrics.Min) CompositeAggregation(org.opensearch.search.aggregations.bucket.composite.CompositeAggregation) AggregationBuilders(org.opensearch.search.aggregations.AggregationBuilders) Entity(org.opensearch.ad.model.Entity) NamedXContentRegistry(org.opensearch.common.xcontent.NamedXContentRegistry) ClusterService(org.opensearch.cluster.service.ClusterService) Clock(java.time.Clock) ArrayDeque(java.util.ArrayDeque) Comparator(java.util.Comparator) IntervalTimeConfiguration(org.opensearch.ad.model.IntervalTimeConfiguration) Collections(java.util.Collections) LogManager(org.apache.logging.log4j.LogManager) ParseUtils(org.opensearch.ad.util.ParseUtils) SearchRequest(org.opensearch.action.search.SearchRequest) SearchSourceBuilder(org.opensearch.search.builder.SearchSourceBuilder)

Aggregations

Entity (org.opensearch.ad.model.Entity)66 ActionListener (org.opensearch.action.ActionListener)37 ArrayList (java.util.ArrayList)36 List (java.util.List)35 AnomalyDetector (org.opensearch.ad.model.AnomalyDetector)34 Client (org.opensearch.client.Client)31 Optional (java.util.Optional)30 SearchRequest (org.opensearch.action.search.SearchRequest)30 HashMap (java.util.HashMap)29 Map (java.util.Map)28 IOException (java.io.IOException)26 LogManager (org.apache.logging.log4j.LogManager)26 Logger (org.apache.logging.log4j.Logger)26 SearchResponse (org.opensearch.action.search.SearchResponse)25 Settings (org.opensearch.common.settings.Settings)23 Set (java.util.Set)22 GetRequest (org.opensearch.action.get.GetRequest)22 ANOMALY_DETECTORS_INDEX (org.opensearch.ad.model.AnomalyDetector.ANOMALY_DETECTORS_INDEX)22 AnomalyDetectorJob (org.opensearch.ad.model.AnomalyDetectorJob)22 AnomalyDetectionException (org.opensearch.ad.common.exception.AnomalyDetectionException)21