Search in sources :

Example 6 with DimensionMap

use of com.linkedin.thirdeye.api.DimensionMap in project pinot by linkedin.

the class DataReportHelper method convertToStringKeyBasedMap.

/**
   * Convert a map of "dimension map to merged anomalies" to a map of "human readable dimension string to merged
   * anomalies".
   *
   * The dimension map is converted as follows. Assume that we have a dimension map (in Json string):
   * {"country"="US","page_name"="front_page'}, then it is converted to this String: "country=US, page_name=front_page".
   *
   * @param groupedResults a map of dimensionMap to a group of merged anomaly results
   * @return a map of "human readable dimension string to merged anomalies"
   */
public static Map<String, List<MergedAnomalyResultDTO>> convertToStringKeyBasedMap(Map<DimensionMap, List<MergedAnomalyResultDTO>> groupedResults) {
    // Sorted by dimension name and value pairs
    Map<String, List<MergedAnomalyResultDTO>> freemarkerGroupedResults = new TreeMap<>();
    if (MapUtils.isNotEmpty(groupedResults)) {
        for (Map.Entry<DimensionMap, List<MergedAnomalyResultDTO>> entry : groupedResults.entrySet()) {
            DimensionMap dimensionMap = entry.getKey();
            String dimensionMapString;
            if (MapUtils.isNotEmpty(dimensionMap)) {
                StringBuilder sb = new StringBuilder();
                String dimensionValueSeparator = "";
                for (Map.Entry<String, String> dimensionMapEntry : dimensionMap.entrySet()) {
                    sb.append(dimensionValueSeparator).append(dimensionMapEntry.getKey());
                    sb.append(EQUALS).append(dimensionMapEntry.getValue());
                    dimensionValueSeparator = DIMENSION_VALUE_SEPARATOR;
                }
                dimensionMapString = sb.toString();
            } else {
                dimensionMapString = "ALL";
            }
            freemarkerGroupedResults.put(dimensionMapString, entry.getValue());
        }
    }
    return freemarkerGroupedResults;
}
Also used : ArrayList(java.util.ArrayList) List(java.util.List) DimensionMap(com.linkedin.thirdeye.api.DimensionMap) TreeMap(java.util.TreeMap) HashMap(java.util.HashMap) DimensionMap(com.linkedin.thirdeye.api.DimensionMap) LinkedHashMap(java.util.LinkedHashMap) TreeMap(java.util.TreeMap) Map(java.util.Map)

Example 7 with DimensionMap

use of com.linkedin.thirdeye.api.DimensionMap in project pinot by linkedin.

the class DetectionTaskRunner method runTask.

private void runTask(DateTime windowStart, DateTime windowEnd) throws JobExecutionException, ExecutionException {
    LOG.info("Running anomaly detection for time range {} to  {}", windowStart, windowEnd);
    // TODO: Change to DataFetchers/DataSources
    AnomalyDetectionInputContext adContext = fetchData(windowStart, windowEnd);
    ListMultimap<DimensionMap, RawAnomalyResultDTO> resultRawAnomalies = dimensionalShuffleAndUnifyAnalyze(windowStart, windowEnd, adContext);
    detectionTaskSuccessCounter.inc();
    boolean isBackfill = false;
    // If the current job is a backfill (adhoc) detection job, set notified flag to true so the merged anomalies do not
    // induce alerts and emails.
    String jobName = DAO_REGISTRY.getJobDAO().getJobNameByJobId(jobExecutionId);
    if (jobName != null && jobName.toLowerCase().startsWith(BACKFILL_PREFIX)) {
        isBackfill = true;
    }
    // Update merged anomalies
    TimeBasedAnomalyMerger timeBasedAnomalyMerger = new TimeBasedAnomalyMerger(anomalyFunctionFactory);
    ListMultimap<DimensionMap, MergedAnomalyResultDTO> resultMergedAnomalies = timeBasedAnomalyMerger.mergeAnomalies(anomalyFunctionSpec, resultRawAnomalies, isBackfill);
    detectionTaskSuccessCounter.inc();
    // TODO: Change to DataSink
    AnomalyDetectionOutputContext adOutputContext = new AnomalyDetectionOutputContext();
    adOutputContext.setRawAnomalies(resultRawAnomalies);
    adOutputContext.setMergedAnomalies(resultMergedAnomalies);
    storeData(adOutputContext);
}
Also used : RawAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO) TimeBasedAnomalyMerger(com.linkedin.thirdeye.anomaly.merge.TimeBasedAnomalyMerger) MergedAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO) DimensionMap(com.linkedin.thirdeye.api.DimensionMap)

Example 8 with DimensionMap

use of com.linkedin.thirdeye.api.DimensionMap in project pinot by linkedin.

the class MinMaxThresholdDetectionModel method detect.

@Override
public List<RawAnomalyResultDTO> detect(String metricName, AnomalyDetectionContext anomalyDetectionContext) {
    List<RawAnomalyResultDTO> anomalyResults = new ArrayList<>();
    // Get min / max props
    Double min = null;
    if (properties.containsKey(MIN_VAL)) {
        min = Double.valueOf(properties.getProperty(MIN_VAL));
    }
    Double max = null;
    if (properties.containsKey(MAX_VAL)) {
        max = Double.valueOf(properties.getProperty(MAX_VAL));
    }
    TimeSeries timeSeries = anomalyDetectionContext.getTransformedCurrent(metricName);
    // Compute the weight of this time series (average across whole)
    double averageValue = 0;
    for (long time : timeSeries.timestampSet()) {
        averageValue += timeSeries.get(time);
    }
    // Compute the bucket size, so we can iterate in those steps
    long bucketMillis = anomalyDetectionContext.getBucketSizeInMS();
    Interval timeSeriesInterval = timeSeries.getTimeSeriesInterval();
    long numBuckets = Math.abs(timeSeriesInterval.getEndMillis() - timeSeriesInterval.getStartMillis()) / bucketMillis;
    // avg value of this time series
    averageValue /= numBuckets;
    DimensionMap dimensionMap = anomalyDetectionContext.getTimeSeriesKey().getDimensionMap();
    for (long timeBucket : timeSeries.timestampSet()) {
        double value = timeSeries.get(timeBucket);
        double deviationFromThreshold = getDeviationFromThreshold(value, min, max);
        if (deviationFromThreshold != 0) {
            RawAnomalyResultDTO anomalyResult = new RawAnomalyResultDTO();
            anomalyResult.setProperties(properties.toString());
            anomalyResult.setStartTime(timeBucket);
            // point-in-time
            anomalyResult.setEndTime(timeBucket + bucketMillis);
            anomalyResult.setDimensions(dimensionMap);
            anomalyResult.setScore(averageValue);
            // higher change, higher the severity
            anomalyResult.setWeight(deviationFromThreshold);
            anomalyResult.setAvgCurrentVal(value);
            String message = String.format(DEFAULT_MESSAGE_TEMPLATE, deviationFromThreshold, value, min, max);
            anomalyResult.setMessage(message);
            if (value == 0.0) {
                anomalyResult.setDataMissing(true);
            }
            anomalyResults.add(anomalyResult);
        }
    }
    return anomalyResults;
}
Also used : RawAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO) TimeSeries(com.linkedin.thirdeye.anomalydetection.context.TimeSeries) ArrayList(java.util.ArrayList) DimensionMap(com.linkedin.thirdeye.api.DimensionMap) Interval(org.joda.time.Interval)

Example 9 with DimensionMap

use of com.linkedin.thirdeye.api.DimensionMap in project pinot by linkedin.

the class SimpleThresholdDetectionModel method detect.

@Override
public List<RawAnomalyResultDTO> detect(String metricName, AnomalyDetectionContext anomalyDetectionContext) {
    List<RawAnomalyResultDTO> anomalyResults = new ArrayList<>();
    // Get thresholds
    double changeThreshold = Double.valueOf(getProperties().getProperty(CHANGE_THRESHOLD));
    double volumeThreshold = 0d;
    if (getProperties().containsKey(AVERAGE_VOLUME_THRESHOLD)) {
        volumeThreshold = Double.valueOf(getProperties().getProperty(AVERAGE_VOLUME_THRESHOLD));
    }
    long bucketSizeInMillis = anomalyDetectionContext.getBucketSizeInMS();
    // Compute the weight of this time series (average across whole)
    TimeSeries currentTimeSeries = anomalyDetectionContext.getTransformedCurrent(metricName);
    double averageValue = 0;
    for (long time : currentTimeSeries.timestampSet()) {
        averageValue += currentTimeSeries.get(time);
    }
    Interval currentInterval = currentTimeSeries.getTimeSeriesInterval();
    long currentStart = currentInterval.getStartMillis();
    long currentEnd = currentInterval.getEndMillis();
    long numBuckets = (currentEnd - currentStart) / bucketSizeInMillis;
    if (numBuckets != 0) {
        averageValue /= numBuckets;
    }
    // Check if this time series even meets our volume threshold
    DimensionMap dimensionMap = anomalyDetectionContext.getTimeSeriesKey().getDimensionMap();
    if (averageValue < volumeThreshold) {
        LOGGER.info("{} does not meet volume threshold {}: {}", dimensionMap, volumeThreshold, averageValue);
        // empty list
        return anomalyResults;
    }
    PredictionModel predictionModel = anomalyDetectionContext.getTrainedPredictionModel(metricName);
    if (!(predictionModel instanceof ExpectedTimeSeriesPredictionModel)) {
        LOGGER.info("SimpleThresholdDetectionModel detection model expects an ExpectedTimeSeriesPredictionModel but the trained prediction model in anomaly detection context is not.");
        // empty list
        return anomalyResults;
    }
    ExpectedTimeSeriesPredictionModel expectedTimeSeriesPredictionModel = (ExpectedTimeSeriesPredictionModel) predictionModel;
    TimeSeries expectedTimeSeries = expectedTimeSeriesPredictionModel.getExpectedTimeSeries();
    Interval expectedTSInterval = expectedTimeSeries.getTimeSeriesInterval();
    long expectedStart = expectedTSInterval.getStartMillis();
    long seasonalOffset = currentStart - expectedStart;
    for (long currentTimestamp : currentTimeSeries.timestampSet()) {
        long expectedTimestamp = currentTimestamp - seasonalOffset;
        if (!expectedTimeSeries.hasTimestamp(expectedTimestamp)) {
            continue;
        }
        double baselineValue = expectedTimeSeries.get(expectedTimestamp);
        double currentValue = currentTimeSeries.get(currentTimestamp);
        if (isAnomaly(currentValue, baselineValue, changeThreshold)) {
            RawAnomalyResultDTO anomalyResult = new RawAnomalyResultDTO();
            anomalyResult.setDimensions(dimensionMap);
            anomalyResult.setProperties(getProperties().toString());
            anomalyResult.setStartTime(currentTimestamp);
            // point-in-time
            anomalyResult.setEndTime(currentTimestamp + bucketSizeInMillis);
            anomalyResult.setScore(averageValue);
            anomalyResult.setWeight(calculateChange(currentValue, baselineValue));
            anomalyResult.setAvgCurrentVal(currentValue);
            anomalyResult.setAvgBaselineVal(baselineValue);
            String message = getAnomalyResultMessage(changeThreshold, currentValue, baselineValue);
            anomalyResult.setMessage(message);
            anomalyResults.add(anomalyResult);
            if (currentValue == 0.0 || baselineValue == 0.0) {
                anomalyResult.setDataMissing(true);
            }
        }
    }
    return anomalyResults;
}
Also used : RawAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO) TimeSeries(com.linkedin.thirdeye.anomalydetection.context.TimeSeries) ExpectedTimeSeriesPredictionModel(com.linkedin.thirdeye.anomalydetection.model.prediction.ExpectedTimeSeriesPredictionModel) ArrayList(java.util.ArrayList) DimensionMap(com.linkedin.thirdeye.api.DimensionMap) ExpectedTimeSeriesPredictionModel(com.linkedin.thirdeye.anomalydetection.model.prediction.ExpectedTimeSeriesPredictionModel) PredictionModel(com.linkedin.thirdeye.anomalydetection.model.prediction.PredictionModel) Interval(org.joda.time.Interval)

Example 10 with DimensionMap

use of com.linkedin.thirdeye.api.DimensionMap in project pinot by linkedin.

the class MovingAverageSmoothingFunction method transform.

/**
   * Smooths the given time series using moving average.
   *
   * If the input time series is shorter than the moving average window size, then this method
   * does not apply smoothing on the time series, i.e., it returns the original time series.
   *
   * The transformed time series is shorten by the size of the moving average window in
   * comparison to the original time series. For instance, if there are 10 consecutive data points
   * the a time series and the window size for moving average is 2, then the transformed time series
   * contains only 9 consecutive data points; The first data points has no other data point to
   * average and thus it is discarded.
   *
   * @param timeSeries the time series that provides the data points to be transformed.
   * @param anomalyDetectionContext the anomaly detection context that could provide additional
   *                                information for the transformation.
   * @return a time series that is smoothed using moving average.
   */
@Override
public TimeSeries transform(TimeSeries timeSeries, AnomalyDetectionContext anomalyDetectionContext) {
    Interval timeSeriesInterval = timeSeries.getTimeSeriesInterval();
    long startTime = timeSeriesInterval.getStartMillis();
    long endTime = timeSeriesInterval.getEndMillis();
    long bucketSizeInMillis = anomalyDetectionContext.getBucketSizeInMS();
    int movingAverageWindowSize = Integer.valueOf(getProperties().getProperty(MOVING_AVERAGE_SMOOTHING_WINDOW_SIZE));
    // Check if the moving average window size is larger than the time series itself
    long transformedStartTime = startTime + bucketSizeInMillis * (movingAverageWindowSize - 1);
    if (transformedStartTime > endTime) {
        String metricName = anomalyDetectionContext.getAnomalyDetectionFunction().getSpec().getTopicMetric();
        DimensionMap dimensionMap = anomalyDetectionContext.getTimeSeriesKey().getDimensionMap();
        LOGGER.warn("Input time series (Metric:{}, Dimension:{}) is shorter than the moving average " + "smoothing window; therefore, smoothing is not applied on this time series.", metricName, dimensionMap);
        return timeSeries;
    }
    TimeSeries transformedTimeSeries = new TimeSeries();
    Interval transformedInterval = new Interval(transformedStartTime, endTime);
    transformedTimeSeries.setTimeSeriesInterval(transformedInterval);
    for (long timeKeyToTransform : timeSeries.timestampSet()) {
        if (!transformedInterval.contains(timeKeyToTransform)) {
            continue;
        }
        double sum = 0d;
        int count = 0;
        for (int i = 0; i < movingAverageWindowSize; ++i) {
            long timeKey = timeKeyToTransform - bucketSizeInMillis * i;
            if (timeSeries.hasTimestamp(timeKey)) {
                sum += timeSeries.get(timeKey);
                ++count;
            }
        }
        // count is at least one due to the existence of timeKeyToTransform
        double average = sum / count;
        transformedTimeSeries.set(timeKeyToTransform, average);
    }
    return transformedTimeSeries;
}
Also used : TimeSeries(com.linkedin.thirdeye.anomalydetection.context.TimeSeries) DimensionMap(com.linkedin.thirdeye.api.DimensionMap) Interval(org.joda.time.Interval)

Aggregations

DimensionMap (com.linkedin.thirdeye.api.DimensionMap)20 MergedAnomalyResultDTO (com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO)11 RawAnomalyResultDTO (com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO)9 ArrayList (java.util.ArrayList)9 MetricTimeSeries (com.linkedin.thirdeye.api.MetricTimeSeries)6 TimeSeries (com.linkedin.thirdeye.anomalydetection.context.TimeSeries)5 ScalingFactor (com.linkedin.thirdeye.detector.metric.transfer.ScalingFactor)5 HashMap (java.util.HashMap)5 Properties (java.util.Properties)5 Interval (org.joda.time.Interval)5 AnomalyDetectionInputContext (com.linkedin.thirdeye.anomaly.detection.AnomalyDetectionInputContext)4 AnomalyFunctionDTO (com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO)4 BaseAnomalyFunction (com.linkedin.thirdeye.detector.function.BaseAnomalyFunction)4 Pair (com.linkedin.pinot.pql.parsers.utils.Pair)3 TimeGranularity (com.linkedin.thirdeye.api.TimeGranularity)3 IOException (java.io.IOException)3 List (java.util.List)3 Map (java.util.Map)3 Path (javax.ws.rs.Path)3 DateTime (org.joda.time.DateTime)3