Search in sources :

Example 6 with ScalingFactor

use of com.linkedin.thirdeye.detector.metric.transfer.ScalingFactor in project pinot by linkedin.

the class AnomalyResource method getAnomalyMergedResultTimeSeries.

/**
   * Returns the time series for the given anomaly.
   *
   * If viewWindowStartTime and/or viewWindowEndTime is not given, then a window is padded automatically. The padded
   * windows is half of the anomaly window size. For instance, if the anomaly lasts for 4 hours, then the pad window
   * size is 2 hours. The max padding size is 1 day.
   *
   * @param anomalyResultId the id of the given anomaly
   * @param viewWindowStartTime start time of the time series, inclusive
   * @param viewWindowEndTime end time of the time series, inclusive
   * @return the time series of the given anomaly
   * @throws Exception when it fails to retrieve collection, i.e., dataset, information
   */
@GET
@Path("/anomaly-merged-result/timeseries/{anomaly_merged_result_id}")
public AnomalyTimelinesView getAnomalyMergedResultTimeSeries(@NotNull @PathParam("anomaly_merged_result_id") long anomalyResultId, @NotNull @QueryParam("aggTimeGranularity") String aggTimeGranularity, @QueryParam("start") long viewWindowStartTime, @QueryParam("end") long viewWindowEndTime) throws Exception {
    boolean loadRawAnomalies = false;
    MergedAnomalyResultDTO anomalyResult = anomalyMergedResultDAO.findById(anomalyResultId, loadRawAnomalies);
    DimensionMap dimensions = anomalyResult.getDimensions();
    AnomalyFunctionDTO anomalyFunctionSpec = anomalyResult.getFunction();
    BaseAnomalyFunction anomalyFunction = anomalyFunctionFactory.fromSpec(anomalyFunctionSpec);
    // By default, the padding window size is half of the anomaly window.
    if (viewWindowStartTime == 0 || viewWindowEndTime == 0) {
        long anomalyWindowStartTime = anomalyResult.getStartTime();
        long anomalyWindowEndTime = anomalyResult.getEndTime();
        long bucketMillis = TimeUnit.MILLISECONDS.convert(anomalyFunctionSpec.getBucketSize(), anomalyFunctionSpec.getBucketUnit());
        long bucketCount = (anomalyWindowEndTime - anomalyWindowStartTime) / bucketMillis;
        long paddingMillis = Math.max(1, (bucketCount / 2)) * bucketMillis;
        if (paddingMillis > TimeUnit.DAYS.toMillis(1)) {
            paddingMillis = TimeUnit.DAYS.toMillis(1);
        }
        if (viewWindowStartTime == 0) {
            viewWindowStartTime = anomalyWindowStartTime - paddingMillis;
        }
        if (viewWindowEndTime == 0) {
            viewWindowEndTime = anomalyWindowEndTime + paddingMillis;
        }
    }
    TimeGranularity timeGranularity = Utils.getAggregationTimeGranularity(aggTimeGranularity, anomalyFunctionSpec.getCollection());
    long bucketMillis = timeGranularity.toMillis();
    // ThirdEye backend is end time exclusive, so one more bucket is appended to make end time inclusive for frontend.
    viewWindowEndTime += bucketMillis;
    long maxDataTime = collectionMaxDataTimeCache.get(anomalyResult.getCollection());
    if (viewWindowEndTime > maxDataTime) {
        viewWindowEndTime = (anomalyResult.getEndTime() > maxDataTime) ? anomalyResult.getEndTime() : maxDataTime;
    }
    AnomalyDetectionInputContext adInputContext = TimeBasedAnomalyMerger.fetchDataByDimension(viewWindowStartTime, viewWindowEndTime, dimensions, anomalyFunction, anomalyMergedResultDAO, overrideConfigDAO, false);
    MetricTimeSeries metricTimeSeries = adInputContext.getDimensionKeyMetricTimeSeriesMap().get(dimensions);
    if (metricTimeSeries == null) {
        // the timeseries for the given anomaly
        return new AnomalyTimelinesView();
    }
    // Transform time series with scaling factor
    List<ScalingFactor> scalingFactors = adInputContext.getScalingFactors();
    if (CollectionUtils.isNotEmpty(scalingFactors)) {
        Properties properties = anomalyFunction.getProperties();
        MetricTransfer.rescaleMetric(metricTimeSeries, viewWindowStartTime, scalingFactors, anomalyFunctionSpec.getTopicMetric(), properties);
    }
    List<MergedAnomalyResultDTO> knownAnomalies = adInputContext.getKnownMergedAnomalies().get(dimensions);
    // Known anomalies are ignored (the null parameter) because 1. we can reduce users' waiting time and 2. presentation
    // data does not need to be as accurate as the one used for detecting anomalies
    AnomalyTimelinesView anomalyTimelinesView = anomalyFunction.getTimeSeriesView(metricTimeSeries, bucketMillis, anomalyFunctionSpec.getTopicMetric(), viewWindowStartTime, viewWindowEndTime, knownAnomalies);
    // Generate summary for frontend
    List<TimeBucket> timeBuckets = anomalyTimelinesView.getTimeBuckets();
    if (timeBuckets.size() > 0) {
        TimeBucket firstBucket = timeBuckets.get(0);
        anomalyTimelinesView.addSummary("currentStart", Long.toString(firstBucket.getCurrentStart()));
        anomalyTimelinesView.addSummary("baselineStart", Long.toString(firstBucket.getBaselineStart()));
        TimeBucket lastBucket = timeBuckets.get(timeBuckets.size() - 1);
        anomalyTimelinesView.addSummary("currentEnd", Long.toString(lastBucket.getCurrentStart()));
        anomalyTimelinesView.addSummary("baselineEnd", Long.toString(lastBucket.getBaselineEnd()));
    }
    return anomalyTimelinesView;
}
Also used : BaseAnomalyFunction(com.linkedin.thirdeye.detector.function.BaseAnomalyFunction) TimeBucket(com.linkedin.thirdeye.dashboard.views.TimeBucket) MetricTimeSeries(com.linkedin.thirdeye.api.MetricTimeSeries) ScalingFactor(com.linkedin.thirdeye.detector.metric.transfer.ScalingFactor) AnomalyTimelinesView(com.linkedin.thirdeye.anomaly.views.AnomalyTimelinesView) Properties(java.util.Properties) AnomalyDetectionInputContext(com.linkedin.thirdeye.anomaly.detection.AnomalyDetectionInputContext) MergedAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO) TimeGranularity(com.linkedin.thirdeye.api.TimeGranularity) DimensionMap(com.linkedin.thirdeye.api.DimensionMap) AnomalyFunctionDTO(com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO) Path(javax.ws.rs.Path) GET(javax.ws.rs.GET)

Example 7 with ScalingFactor

use of com.linkedin.thirdeye.detector.metric.transfer.ScalingFactor in project pinot by linkedin.

the class AnomaliesResource method getAnomalyDetails.

/**
   * Generates Anomaly Details for each merged anomaly
   * @param mergedAnomaly
   * @param datasetConfig
   * @param timeSeriesDateFormatter
   * @param startEndDateFormatterHours
   * @param startEndDateFormatterDays
   * @param externalUrl
   * @return
   */
private AnomalyDetails getAnomalyDetails(MergedAnomalyResultDTO mergedAnomaly, DatasetConfigDTO datasetConfig, DateTimeFormatter timeSeriesDateFormatter, DateTimeFormatter startEndDateFormatterHours, DateTimeFormatter startEndDateFormatterDays, String externalUrl) throws Exception {
    String dataset = datasetConfig.getDataset();
    String metricName = mergedAnomaly.getMetric();
    AnomalyFunctionDTO anomalyFunctionSpec = anomalyFunctionDAO.findById(mergedAnomaly.getFunctionId());
    BaseAnomalyFunction anomalyFunction = anomalyFunctionFactory.fromSpec(anomalyFunctionSpec);
    String aggGranularity = constructAggGranularity(datasetConfig);
    long anomalyStartTime = mergedAnomaly.getStartTime();
    long anomalyEndTime = mergedAnomaly.getEndTime();
    TimeRange range = getTimeseriesOffsetedTimes(anomalyStartTime, anomalyEndTime, datasetConfig);
    long currentStartTime = range.getStart();
    long currentEndTime = range.getEnd();
    DimensionMap dimensions = mergedAnomaly.getDimensions();
    TimeGranularity timeGranularity = Utils.getAggregationTimeGranularity(aggGranularity, anomalyFunctionSpec.getCollection());
    long bucketMillis = timeGranularity.toMillis();
    AnomalyDetails anomalyDetails = null;
    try {
        AnomalyDetectionInputContext adInputContext = TimeBasedAnomalyMerger.fetchDataByDimension(currentStartTime, currentEndTime, dimensions, anomalyFunction, mergedAnomalyResultDAO, overrideConfigDAO, true);
        MetricTimeSeries metricTimeSeries = adInputContext.getDimensionKeyMetricTimeSeriesMap().get(dimensions);
        // Transform time series with scaling factor
        List<ScalingFactor> scalingFactors = adInputContext.getScalingFactors();
        if (CollectionUtils.isNotEmpty(scalingFactors)) {
            Properties properties = anomalyFunction.getProperties();
            MetricTransfer.rescaleMetric(metricTimeSeries, currentStartTime, scalingFactors, anomalyFunctionSpec.getTopicMetric(), properties);
        }
        List<MergedAnomalyResultDTO> knownAnomalies = adInputContext.getKnownMergedAnomalies().get(dimensions);
        // Known anomalies are ignored (the null parameter) because 1. we can reduce users' waiting time and 2. presentation
        // data does not need to be as accurate as the one used for detecting anomalies
        AnomalyTimelinesView anomalyTimelinesView = anomalyFunction.getTimeSeriesView(metricTimeSeries, bucketMillis, anomalyFunctionSpec.getTopicMetric(), currentStartTime, currentEndTime, knownAnomalies);
        anomalyDetails = constructAnomalyDetails(metricName, dataset, datasetConfig, mergedAnomaly, anomalyFunctionSpec, currentStartTime, currentEndTime, anomalyTimelinesView, timeSeriesDateFormatter, startEndDateFormatterHours, startEndDateFormatterDays, externalUrl);
    } catch (Exception e) {
        LOG.error("Exception in constructing anomaly wrapper for anomaly {}", mergedAnomaly.getId(), e);
    }
    return anomalyDetails;
}
Also used : BaseAnomalyFunction(com.linkedin.thirdeye.detector.function.BaseAnomalyFunction) AnomalyDetails(com.linkedin.thirdeye.dashboard.resources.v2.pojo.AnomalyDetails) MetricTimeSeries(com.linkedin.thirdeye.api.MetricTimeSeries) ScalingFactor(com.linkedin.thirdeye.detector.metric.transfer.ScalingFactor) AnomalyTimelinesView(com.linkedin.thirdeye.anomaly.views.AnomalyTimelinesView) Properties(java.util.Properties) TimeoutException(java.util.concurrent.TimeoutException) JSONException(org.json.JSONException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TimeRange(com.linkedin.thirdeye.api.TimeRange) AnomalyDetectionInputContext(com.linkedin.thirdeye.anomaly.detection.AnomalyDetectionInputContext) MergedAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO) TimeGranularity(com.linkedin.thirdeye.api.TimeGranularity) DimensionMap(com.linkedin.thirdeye.api.DimensionMap) AnomalyFunctionDTO(com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO)

Example 8 with ScalingFactor

use of com.linkedin.thirdeye.detector.metric.transfer.ScalingFactor in project pinot by linkedin.

the class DetectionTaskRunner method fetchData.

private AnomalyDetectionInputContext fetchData(DateTime windowStart, DateTime windowEnd) throws JobExecutionException, ExecutionException {
    AnomalyDetectionInputContext adContext = new AnomalyDetectionInputContext();
    // Get Time Series
    List<Pair<Long, Long>> startEndTimeRanges = anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis());
    Map<DimensionKey, MetricTimeSeries> dimensionKeyMetricTimeSeriesMap = TimeSeriesUtil.getTimeSeriesForAnomalyDetection(anomalyFunctionSpec, startEndTimeRanges);
    Map<DimensionMap, MetricTimeSeries> dimensionMapMetricTimeSeriesMap = new HashMap<>();
    for (Map.Entry<DimensionKey, MetricTimeSeries> entry : dimensionKeyMetricTimeSeriesMap.entrySet()) {
        DimensionKey dimensionKey = entry.getKey();
        // If the current time series belongs to OTHER dimension, which consists of time series whose
        // sum of all its values belows 1% of sum of all time series values, then its anomaly is
        // meaningless and hence we don't want to detection anomalies on it.
        String[] dimensionValues = dimensionKey.getDimensionValues();
        boolean isOTHERDimension = false;
        for (String dimensionValue : dimensionValues) {
            if (dimensionValue.equalsIgnoreCase(ResponseParserUtils.OTHER) || dimensionValue.equalsIgnoreCase(ResponseParserUtils.UNKNOWN)) {
                isOTHERDimension = true;
                break;
            }
        }
        if (isOTHERDimension) {
            continue;
        }
        DimensionMap dimensionMap = DimensionMap.fromDimensionKey(dimensionKey, collectionDimensions);
        dimensionMapMetricTimeSeriesMap.put(dimensionMap, entry.getValue());
        if (entry.getValue().getTimeWindowSet().size() < 1) {
            LOG.warn("Insufficient data for {} to run anomaly detection function", dimensionMap);
            continue;
        }
    }
    adContext.setDimensionKeyMetricTimeSeriesMap(dimensionMapMetricTimeSeriesMap);
    // Get existing anomalies for this time range and this function id for all combinations of dimensions
    List<MergedAnomalyResultDTO> knownMergedAnomalies;
    if (anomalyFunction.useHistoryAnomaly()) {
        // if this anomaly function uses history data, then we get all time ranges
        knownMergedAnomalies = getKnownMergedAnomalies(anomalyFunctionSpec.getId(), anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis()));
    } else {
        // otherwise, we only get the merge anomaly for current window in order to remove duplicate raw anomalies
        List<Pair<Long, Long>> currentTimeRange = new ArrayList<>();
        currentTimeRange.add(new Pair<>(windowStart.getMillis(), windowEnd.getMillis()));
        knownMergedAnomalies = getKnownMergedAnomalies(anomalyFunctionSpec.getId(), currentTimeRange);
    }
    // Sort the known merged and raw anomalies by their dimension names
    ArrayListMultimap<DimensionMap, MergedAnomalyResultDTO> dimensionMapToKnownMergedAnomalies = ArrayListMultimap.create();
    for (MergedAnomalyResultDTO knownMergedAnomaly : knownMergedAnomalies) {
        dimensionMapToKnownMergedAnomalies.put(knownMergedAnomaly.getDimensions(), knownMergedAnomaly);
    }
    adContext.setKnownMergedAnomalies(dimensionMapToKnownMergedAnomalies);
    // We always find existing raw anomalies to prevent duplicate raw anomalies are generated
    List<RawAnomalyResultDTO> existingRawAnomalies = getExistingRawAnomalies(anomalyFunctionSpec.getId(), windowStart.getMillis(), windowEnd.getMillis());
    ArrayListMultimap<DimensionMap, RawAnomalyResultDTO> dimensionNamesToKnownRawAnomalies = ArrayListMultimap.create();
    for (RawAnomalyResultDTO existingRawAnomaly : existingRawAnomalies) {
        dimensionNamesToKnownRawAnomalies.put(existingRawAnomaly.getDimensions(), existingRawAnomaly);
    }
    adContext.setExistingRawAnomalies(dimensionNamesToKnownRawAnomalies);
    List<ScalingFactor> scalingFactors = OverrideConfigHelper.getTimeSeriesScalingFactors(DAO_REGISTRY.getOverrideConfigDAO(), anomalyFunctionSpec.getCollection(), anomalyFunctionSpec.getMetric(), anomalyFunctionSpec.getId(), anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis()));
    adContext.setScalingFactors(scalingFactors);
    return adContext;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MetricTimeSeries(com.linkedin.thirdeye.api.MetricTimeSeries) ScalingFactor(com.linkedin.thirdeye.detector.metric.transfer.ScalingFactor) RawAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO) DimensionKey(com.linkedin.thirdeye.api.DimensionKey) MergedAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO) DimensionMap(com.linkedin.thirdeye.api.DimensionMap) HashMap(java.util.HashMap) Map(java.util.Map) DimensionMap(com.linkedin.thirdeye.api.DimensionMap) Pair(com.linkedin.pinot.pql.parsers.utils.Pair)

Aggregations

ScalingFactor (com.linkedin.thirdeye.detector.metric.transfer.ScalingFactor)8 MetricTimeSeries (com.linkedin.thirdeye.api.MetricTimeSeries)7 MergedAnomalyResultDTO (com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO)7 DimensionMap (com.linkedin.thirdeye.api.DimensionMap)5 AnomalyFunctionDTO (com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO)5 Properties (java.util.Properties)5 AnomalyDetectionInputContext (com.linkedin.thirdeye.anomaly.detection.AnomalyDetectionInputContext)4 TimeGranularity (com.linkedin.thirdeye.api.TimeGranularity)4 BaseAnomalyFunction (com.linkedin.thirdeye.detector.function.BaseAnomalyFunction)4 Pair (com.linkedin.pinot.pql.parsers.utils.Pair)3 AnomalyTimelinesView (com.linkedin.thirdeye.anomaly.views.AnomalyTimelinesView)2 RawAnomalyResultDTO (com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO)2 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)2 ExecutionException (java.util.concurrent.ExecutionException)2 DateTime (org.joda.time.DateTime)2 DimensionKey (com.linkedin.thirdeye.api.DimensionKey)1 TimeRange (com.linkedin.thirdeye.api.TimeRange)1 AnomalyDetails (com.linkedin.thirdeye.dashboard.resources.v2.pojo.AnomalyDetails)1 TimeBucket (com.linkedin.thirdeye.dashboard.views.TimeBucket)1