Search in sources :

Example 6 with Pair

use of com.linkedin.pinot.pql.parsers.utils.Pair in project pinot by linkedin.

the class DetectionTaskRunner method fetchData.

private AnomalyDetectionInputContext fetchData(DateTime windowStart, DateTime windowEnd) throws JobExecutionException, ExecutionException {
    AnomalyDetectionInputContext adContext = new AnomalyDetectionInputContext();
    // Get Time Series
    List<Pair<Long, Long>> startEndTimeRanges = anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis());
    Map<DimensionKey, MetricTimeSeries> dimensionKeyMetricTimeSeriesMap = TimeSeriesUtil.getTimeSeriesForAnomalyDetection(anomalyFunctionSpec, startEndTimeRanges);
    Map<DimensionMap, MetricTimeSeries> dimensionMapMetricTimeSeriesMap = new HashMap<>();
    for (Map.Entry<DimensionKey, MetricTimeSeries> entry : dimensionKeyMetricTimeSeriesMap.entrySet()) {
        DimensionKey dimensionKey = entry.getKey();
        // If the current time series belongs to OTHER dimension, which consists of time series whose
        // sum of all its values belows 1% of sum of all time series values, then its anomaly is
        // meaningless and hence we don't want to detection anomalies on it.
        String[] dimensionValues = dimensionKey.getDimensionValues();
        boolean isOTHERDimension = false;
        for (String dimensionValue : dimensionValues) {
            if (dimensionValue.equalsIgnoreCase(ResponseParserUtils.OTHER) || dimensionValue.equalsIgnoreCase(ResponseParserUtils.UNKNOWN)) {
                isOTHERDimension = true;
                break;
            }
        }
        if (isOTHERDimension) {
            continue;
        }
        DimensionMap dimensionMap = DimensionMap.fromDimensionKey(dimensionKey, collectionDimensions);
        dimensionMapMetricTimeSeriesMap.put(dimensionMap, entry.getValue());
        if (entry.getValue().getTimeWindowSet().size() < 1) {
            LOG.warn("Insufficient data for {} to run anomaly detection function", dimensionMap);
            continue;
        }
    }
    adContext.setDimensionKeyMetricTimeSeriesMap(dimensionMapMetricTimeSeriesMap);
    // Get existing anomalies for this time range and this function id for all combinations of dimensions
    List<MergedAnomalyResultDTO> knownMergedAnomalies;
    if (anomalyFunction.useHistoryAnomaly()) {
        // if this anomaly function uses history data, then we get all time ranges
        knownMergedAnomalies = getKnownMergedAnomalies(anomalyFunctionSpec.getId(), anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis()));
    } else {
        // otherwise, we only get the merge anomaly for current window in order to remove duplicate raw anomalies
        List<Pair<Long, Long>> currentTimeRange = new ArrayList<>();
        currentTimeRange.add(new Pair<>(windowStart.getMillis(), windowEnd.getMillis()));
        knownMergedAnomalies = getKnownMergedAnomalies(anomalyFunctionSpec.getId(), currentTimeRange);
    }
    // Sort the known merged and raw anomalies by their dimension names
    ArrayListMultimap<DimensionMap, MergedAnomalyResultDTO> dimensionMapToKnownMergedAnomalies = ArrayListMultimap.create();
    for (MergedAnomalyResultDTO knownMergedAnomaly : knownMergedAnomalies) {
        dimensionMapToKnownMergedAnomalies.put(knownMergedAnomaly.getDimensions(), knownMergedAnomaly);
    }
    adContext.setKnownMergedAnomalies(dimensionMapToKnownMergedAnomalies);
    // We always find existing raw anomalies to prevent duplicate raw anomalies are generated
    List<RawAnomalyResultDTO> existingRawAnomalies = getExistingRawAnomalies(anomalyFunctionSpec.getId(), windowStart.getMillis(), windowEnd.getMillis());
    ArrayListMultimap<DimensionMap, RawAnomalyResultDTO> dimensionNamesToKnownRawAnomalies = ArrayListMultimap.create();
    for (RawAnomalyResultDTO existingRawAnomaly : existingRawAnomalies) {
        dimensionNamesToKnownRawAnomalies.put(existingRawAnomaly.getDimensions(), existingRawAnomaly);
    }
    adContext.setExistingRawAnomalies(dimensionNamesToKnownRawAnomalies);
    List<ScalingFactor> scalingFactors = OverrideConfigHelper.getTimeSeriesScalingFactors(DAO_REGISTRY.getOverrideConfigDAO(), anomalyFunctionSpec.getCollection(), anomalyFunctionSpec.getMetric(), anomalyFunctionSpec.getId(), anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis()));
    adContext.setScalingFactors(scalingFactors);
    return adContext;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MetricTimeSeries(com.linkedin.thirdeye.api.MetricTimeSeries) ScalingFactor(com.linkedin.thirdeye.detector.metric.transfer.ScalingFactor) RawAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO) DimensionKey(com.linkedin.thirdeye.api.DimensionKey) MergedAnomalyResultDTO(com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO) DimensionMap(com.linkedin.thirdeye.api.DimensionMap) HashMap(java.util.HashMap) Map(java.util.Map) DimensionMap(com.linkedin.thirdeye.api.DimensionMap) Pair(com.linkedin.pinot.pql.parsers.utils.Pair)

Aggregations

Pair (com.linkedin.pinot.pql.parsers.utils.Pair)6 MetricTimeSeries (com.linkedin.thirdeye.api.MetricTimeSeries)5 MergedAnomalyResultDTO (com.linkedin.thirdeye.datalayer.dto.MergedAnomalyResultDTO)4 DimensionMap (com.linkedin.thirdeye.api.DimensionMap)3 TimeGranularity (com.linkedin.thirdeye.api.TimeGranularity)3 ScalingFactor (com.linkedin.thirdeye.detector.metric.transfer.ScalingFactor)3 ArrayList (java.util.ArrayList)3 HashMap (java.util.HashMap)3 DimensionKey (com.linkedin.thirdeye.api.DimensionKey)2 AnomalyFunctionDTO (com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO)2 RawAnomalyResultDTO (com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO)2 BaseAnomalyFunction (com.linkedin.thirdeye.detector.function.BaseAnomalyFunction)2 IOException (java.io.IOException)2 Map (java.util.Map)2 Path (javax.ws.rs.Path)2 DateTime (org.joda.time.DateTime)2 AnomalyDetectionInputContext (com.linkedin.thirdeye.anomaly.detection.AnomalyDetectionInputContext)1 TimeSeriesResponse (com.linkedin.thirdeye.client.timeseries.TimeSeriesResponse)1 AnomalyDataCompare (com.linkedin.thirdeye.dashboard.resources.v2.pojo.AnomalyDataCompare)1 DatasetConfigDTO (com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO)1