Search in sources :

Example 1 with RawAnomalyResultDTO

use of com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO in project pinot by linkedin.

the class AnomalyMergeExecutor method updateMergedScoreAndPersist.

private void updateMergedScoreAndPersist(MergedAnomalyResultDTO mergedResult, AnomalyMergeConfig mergeConfig) {
    // Calculate default score and weight in case of the failure during updating score and weight through Pinot's value
    double weightedScoreSum = 0.0;
    double weightedWeightSum = 0.0;
    double totalBucketSize = 0.0;
    // to prevent from double overflow
    double normalizationFactor = 1000;
    String anomalyMessage = "";
    for (RawAnomalyResultDTO anomalyResult : mergedResult.getAnomalyResults()) {
        anomalyResult.setMerged(true);
        double bucketSizeSeconds = (anomalyResult.getEndTime() - anomalyResult.getStartTime()) / 1000;
        weightedScoreSum += (anomalyResult.getScore() / normalizationFactor) * bucketSizeSeconds;
        weightedWeightSum += (anomalyResult.getWeight() / normalizationFactor) * bucketSizeSeconds;
        totalBucketSize += bucketSizeSeconds;
        anomalyMessage = anomalyResult.getMessage();
    }
    if (totalBucketSize != 0) {
        mergedResult.setScore((weightedScoreSum / totalBucketSize) * normalizationFactor);
        mergedResult.setWeight((weightedWeightSum / totalBucketSize) * normalizationFactor);
    }
    mergedResult.setMessage(anomalyMessage);
    if (mergedResult.getAnomalyResults().size() > 1) {
        // recompute weight using anomaly function specific method
        try {
            updateMergedAnomalyWeight(mergedResult, mergeConfig);
        } catch (Exception e) {
            AnomalyFunctionDTO function = mergedResult.getFunction();
            LOG.warn("Unable to compute merged weight and the average weight of raw anomalies is used. Dataset: {}, Topic Metric: {}, Function: {}, Time:{} - {}, Exception: {}", function.getCollection(), function.getTopicMetric(), function.getFunctionName(), new DateTime(mergedResult.getStartTime()), new DateTime(mergedResult.getEndTime()), e);
        }
    }
    try {
        // persist the merged result
        mergedResultDAO.update(mergedResult);
        for (RawAnomalyResultDTO rawAnomalyResultDTO : mergedResult.getAnomalyResults()) {
            anomalyResultDAO.update(rawAnomalyResultDTO);
        }
    } catch (Exception e) {
        LOG.error("Could not persist merged result : [" + mergedResult.toString() + "]", e);
    }
}
Also used : RawAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO) AnomalyFunctionDTO(com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO) DateTime(org.joda.time.DateTime)

Example 2 with RawAnomalyResultDTO

use of com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO in project pinot by linkedin.

the class AnomalyTimeBasedSummarizer method mergeAnomalies.

/**
   * @param mergedAnomaly : last merged anomaly
   * @param anomalies     : list of raw anomalies to be merged with last mergedAnomaly
   * @param maxMergedDurationMillis   : length of a merged anomaly
   * @param sequentialAllowedGap : allowed gap between two raw anomalies in order to merge
   * @return
   */
public static List<MergedAnomalyResultDTO> mergeAnomalies(MergedAnomalyResultDTO mergedAnomaly, List<RawAnomalyResultDTO> anomalies, long maxMergedDurationMillis, long sequentialAllowedGap) {
    // sort anomalies in natural order of start time
    Collections.sort(anomalies, (o1, o2) -> (int) ((o1.getStartTime() - o2.getStartTime()) / 1000));
    boolean applySequentialGapBasedSplit = false;
    boolean applyMaxDurationBasedSplit = false;
    if (maxMergedDurationMillis > 0) {
        applyMaxDurationBasedSplit = true;
    }
    if (sequentialAllowedGap > 0) {
        applySequentialGapBasedSplit = true;
    }
    List<MergedAnomalyResultDTO> mergedAnomalies = new ArrayList<>();
    for (int i = 0; i < anomalies.size(); i++) {
        RawAnomalyResultDTO currentResult = anomalies.get(i);
        if (mergedAnomaly == null || currentResult.getEndTime() < mergedAnomaly.getStartTime()) {
            mergedAnomaly = new MergedAnomalyResultDTO();
            populateMergedResult(mergedAnomaly, currentResult);
        } else {
            // compare current with merged and decide whether to merge the current result or create a new one
            if (applySequentialGapBasedSplit && (currentResult.getStartTime() - mergedAnomaly.getEndTime()) > sequentialAllowedGap) {
                // Split here
                // add previous merged result
                mergedAnomalies.add(mergedAnomaly);
                //set current raw result
                mergedAnomaly = new MergedAnomalyResultDTO();
                populateMergedResult(mergedAnomaly, currentResult);
            } else {
                // add the current raw result into mergedResult
                if (currentResult.getStartTime() < mergedAnomaly.getStartTime()) {
                    mergedAnomaly.setStartTime(currentResult.getStartTime());
                }
                if (currentResult.getEndTime() > mergedAnomaly.getEndTime()) {
                    mergedAnomaly.setEndTime(currentResult.getEndTime());
                }
                if (!mergedAnomaly.getAnomalyResults().contains(currentResult)) {
                    mergedAnomaly.getAnomalyResults().add(currentResult);
                    currentResult.setMerged(true);
                }
            }
        }
        // till this point merged result contains current raw result
        if (applyMaxDurationBasedSplit && // check if Max Duration for merged has passed, if so, create new one
        mergedAnomaly.getEndTime() - mergedAnomaly.getStartTime() >= maxMergedDurationMillis) {
            // check if next anomaly has same start time as current one, that should be merged with current one too
            if (i < (anomalies.size() - 1) && anomalies.get(i + 1).getStartTime().equals(currentResult.getStartTime())) {
            // no need to split as we want to include the next raw anomaly into the current one
            } else {
                // Split here
                mergedAnomalies.add(mergedAnomaly);
                mergedAnomaly = null;
            }
        }
        if (i == (anomalies.size() - 1) && mergedAnomaly != null) {
            mergedAnomalies.add(mergedAnomaly);
        }
    }
    LOG.info("merging [{}] raw anomalies", anomalies.size());
    return mergedAnomalies;
}
Also used : RawAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO) MergedAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO) ArrayList(java.util.ArrayList)

Example 3 with RawAnomalyResultDTO

use of com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO in project pinot by linkedin.

the class TimeBasedAnomalyMerger method updateMergedAnomalyInfo.

private void updateMergedAnomalyInfo(MergedAnomalyResultDTO mergedResult, AnomalyMergeConfig mergeConfig) {
    List<RawAnomalyResultDTO> rawAnomalies = mergedResult.getAnomalyResults();
    if (CollectionUtils.isEmpty(rawAnomalies)) {
        LOG.warn("Skip updating anomaly (id={}) because its does not have any children anomalies.", mergedResult.getId());
        return;
    }
    // Update the info of merged anomalies
    if (rawAnomalies.size() == 1) {
        RawAnomalyResultDTO rawAnomaly = rawAnomalies.get(0);
        mergedResult.setScore(rawAnomaly.getScore());
        mergedResult.setWeight(rawAnomaly.getWeight());
        mergedResult.setAvgCurrentVal(rawAnomaly.getAvgCurrentVal());
        mergedResult.setAvgBaselineVal(rawAnomaly.getAvgBaselineVal());
        mergedResult.setMessage(rawAnomaly.getMessage());
    } else {
        // Calculate default score and weight in case of any failure (e.g., DB exception) during the update
        double weightedScoreSum = 0.0;
        double weightedWeightSum = 0.0;
        double totalBucketSize = 0.0;
        double avgCurrent = 0.0;
        double avgBaseline = 0.0;
        String anomalyMessage = "";
        for (RawAnomalyResultDTO anomalyResult : rawAnomalies) {
            anomalyResult.setMerged(true);
            double bucketSizeSeconds = (anomalyResult.getEndTime() - anomalyResult.getStartTime()) / 1000;
            double normalizedBucketSize = getNormalizedBucketSize(bucketSizeSeconds);
            totalBucketSize += bucketSizeSeconds;
            weightedScoreSum += anomalyResult.getScore() * normalizedBucketSize;
            weightedWeightSum += anomalyResult.getWeight() * normalizedBucketSize;
            avgCurrent += anomalyResult.getAvgCurrentVal() * normalizedBucketSize;
            avgBaseline += anomalyResult.getAvgBaselineVal() * normalizedBucketSize;
            anomalyMessage = anomalyResult.getMessage();
        }
        if (totalBucketSize != 0) {
            double normalizedTotalBucketSize = getNormalizedBucketSize(totalBucketSize);
            mergedResult.setScore(weightedScoreSum / normalizedTotalBucketSize);
            mergedResult.setWeight(weightedWeightSum / normalizedTotalBucketSize);
            mergedResult.setAvgCurrentVal(avgCurrent / normalizedTotalBucketSize);
            mergedResult.setAvgBaselineVal(avgBaseline / normalizedTotalBucketSize);
        }
        mergedResult.setMessage(anomalyMessage);
        // recompute weight using anomaly function specific method
        try {
            computeMergedAnomalyInfo(mergedResult, mergeConfig);
        } catch (Exception e) {
            AnomalyFunctionDTO function = mergedResult.getFunction();
            LOG.warn("Unable to compute merged weight and the average weight of raw anomalies is used. Dataset: {}, Topic Metric: {}, Function: {}, Time:{} - {}, Exception: {}", function.getCollection(), function.getTopicMetric(), function.getFunctionName(), new DateTime(mergedResult.getStartTime()), new DateTime(mergedResult.getEndTime()), e);
        }
    }
}
Also used : RawAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO) AnomalyFunctionDTO(com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO) DateTime(org.joda.time.DateTime)

Example 4 with RawAnomalyResultDTO

use of com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO in project pinot by linkedin.

the class TimeBasedAnomalyMerger method dimensionalShuffleAndUnifyMerge.

private ListMultimap<DimensionMap, MergedAnomalyResultDTO> dimensionalShuffleAndUnifyMerge(AnomalyFunctionDTO function, AnomalyMergeConfig mergeConfig, ListMultimap<DimensionMap, RawAnomalyResultDTO> dimensionsResultMap) {
    ListMultimap<DimensionMap, MergedAnomalyResultDTO> mergedAnomalies = ArrayListMultimap.create();
    for (DimensionMap dimensionMap : dimensionsResultMap.keySet()) {
        List<RawAnomalyResultDTO> unmergedResultsByDimensions = dimensionsResultMap.get(dimensionMap);
        long anomalyWindowStart = Long.MAX_VALUE;
        long anomalyWindowEnd = Long.MIN_VALUE;
        for (RawAnomalyResultDTO unmergedResultsByDimension : unmergedResultsByDimensions) {
            anomalyWindowStart = Math.min(anomalyWindowStart, unmergedResultsByDimension.getStartTime());
            anomalyWindowEnd = Math.max(anomalyWindowEnd, unmergedResultsByDimension.getEndTime());
        }
        // NOTE: We get "latest overlapped (Conflict)" merged anomaly instead of "recent" merged anomaly in order to
        // prevent the merge results of current (online) detection interfere the merge results of back-fill (offline)
        // detection.
        // Moreover, the window start is modified by mergeConfig.getSequentialAllowedGap() in order to allow a gap between
        // anomalies to be merged.
        MergedAnomalyResultDTO latestOverlappedMergedResult = mergedResultDAO.findLatestConflictByFunctionIdDimensions(function.getId(), dimensionMap.toString(), anomalyWindowStart - mergeConfig.getSequentialAllowedGap(), anomalyWindowEnd);
        List<MergedAnomalyResultDTO> mergedResults = AnomalyTimeBasedSummarizer.mergeAnomalies(latestOverlappedMergedResult, unmergedResultsByDimensions, mergeConfig.getMaxMergeDurationLength(), mergeConfig.getSequentialAllowedGap());
        for (MergedAnomalyResultDTO mergedResult : mergedResults) {
            mergedResult.setFunction(function);
            mergedResult.setDimensions(dimensionMap);
        }
        LOG.info("Merging [{}] raw anomalies into [{}] merged anomalies for function id : [{}] and dimensions : [{}]", unmergedResultsByDimensions.size(), mergedResults.size(), function.getId(), dimensionMap);
        mergedAnomalies.putAll(dimensionMap, mergedResults);
    }
    return mergedAnomalies;
}
Also used : RawAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO) MergedAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO) DimensionMap(com.linkedin.thirdeye.api.DimensionMap)

Example 5 with RawAnomalyResultDTO

use of com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO in project pinot by linkedin.

the class DetectionTaskRunner method runTask.

private void runTask(DateTime windowStart, DateTime windowEnd) throws JobExecutionException, ExecutionException {
    LOG.info("Running anomaly detection for time range {} to  {}", windowStart, windowEnd);
    // TODO: Change to DataFetchers/DataSources
    AnomalyDetectionInputContext adContext = fetchData(windowStart, windowEnd);
    ListMultimap<DimensionMap, RawAnomalyResultDTO> resultRawAnomalies = dimensionalShuffleAndUnifyAnalyze(windowStart, windowEnd, adContext);
    detectionTaskSuccessCounter.inc();
    boolean isBackfill = false;
    // If the current job is a backfill (adhoc) detection job, set notified flag to true so the merged anomalies do not
    // induce alerts and emails.
    String jobName = DAO_REGISTRY.getJobDAO().getJobNameByJobId(jobExecutionId);
    if (jobName != null && jobName.toLowerCase().startsWith(BACKFILL_PREFIX)) {
        isBackfill = true;
    }
    // Update merged anomalies
    TimeBasedAnomalyMerger timeBasedAnomalyMerger = new TimeBasedAnomalyMerger(anomalyFunctionFactory);
    ListMultimap<DimensionMap, MergedAnomalyResultDTO> resultMergedAnomalies = timeBasedAnomalyMerger.mergeAnomalies(anomalyFunctionSpec, resultRawAnomalies, isBackfill);
    detectionTaskSuccessCounter.inc();
    // TODO: Change to DataSink
    AnomalyDetectionOutputContext adOutputContext = new AnomalyDetectionOutputContext();
    adOutputContext.setRawAnomalies(resultRawAnomalies);
    adOutputContext.setMergedAnomalies(resultMergedAnomalies);
    storeData(adOutputContext);
}
Also used : RawAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO) TimeBasedAnomalyMerger(com.linkedin.thirdeye.anomaly.merge.TimeBasedAnomalyMerger) MergedAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO) DimensionMap(com.linkedin.thirdeye.api.DimensionMap)

Aggregations

RawAnomalyResultDTO (com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO)48 ArrayList (java.util.ArrayList)22 AnomalyFunctionDTO (com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO)19 MergedAnomalyResultDTO (com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO)17 Test (org.testng.annotations.Test)11 DimensionMap (com.linkedin.thirdeye.api.DimensionMap)9 Interval (org.joda.time.Interval)9 DateTime (org.joda.time.DateTime)8 AnomalyDetectionContext (com.linkedin.thirdeye.anomalydetection.context.AnomalyDetectionContext)7 TimeSeries (com.linkedin.thirdeye.anomalydetection.context.TimeSeries)7 AnomalyFeedbackDTO (com.linkedin.thirdeye.datalayer.dto.AnomalyFeedbackDTO)5 HashMap (java.util.HashMap)5 Path (javax.ws.rs.Path)5 MetricTimeSeries (com.linkedin.thirdeye.api.MetricTimeSeries)3 RawAnomalyResultBean (com.linkedin.thirdeye.datalayer.pojo.RawAnomalyResultBean)3 POST (javax.ws.rs.POST)3 Pair (com.linkedin.pinot.pql.parsers.utils.Pair)2 ExpectedTimeSeriesPredictionModel (com.linkedin.thirdeye.anomalydetection.model.prediction.ExpectedTimeSeriesPredictionModel)2 PredictionModel (com.linkedin.thirdeye.anomalydetection.model.prediction.PredictionModel)2 DimensionKey (com.linkedin.thirdeye.api.DimensionKey)2