use of com.linkedin.thirdeye.api.DimensionMap in project pinot by linkedin.
the class DataReportHelper method convertToStringKeyBasedMap.
/**
* Convert a map of "dimension map to merged anomalies" to a map of "human readable dimension string to merged
* anomalies".
*
* The dimension map is converted as follows. Assume that we have a dimension map (in Json string):
* {"country"="US","page_name"="front_page'}, then it is converted to this String: "country=US, page_name=front_page".
*
* @param groupedResults a map of dimensionMap to a group of merged anomaly results
* @return a map of "human readable dimension string to merged anomalies"
*/
public static Map<String, List<MergedAnomalyResultDTO>> convertToStringKeyBasedMap(Map<DimensionMap, List<MergedAnomalyResultDTO>> groupedResults) {
// Sorted by dimension name and value pairs
Map<String, List<MergedAnomalyResultDTO>> freemarkerGroupedResults = new TreeMap<>();
if (MapUtils.isNotEmpty(groupedResults)) {
for (Map.Entry<DimensionMap, List<MergedAnomalyResultDTO>> entry : groupedResults.entrySet()) {
DimensionMap dimensionMap = entry.getKey();
String dimensionMapString;
if (MapUtils.isNotEmpty(dimensionMap)) {
StringBuilder sb = new StringBuilder();
String dimensionValueSeparator = "";
for (Map.Entry<String, String> dimensionMapEntry : dimensionMap.entrySet()) {
sb.append(dimensionValueSeparator).append(dimensionMapEntry.getKey());
sb.append(EQUALS).append(dimensionMapEntry.getValue());
dimensionValueSeparator = DIMENSION_VALUE_SEPARATOR;
}
dimensionMapString = sb.toString();
} else {
dimensionMapString = "ALL";
}
freemarkerGroupedResults.put(dimensionMapString, entry.getValue());
}
}
return freemarkerGroupedResults;
}
use of com.linkedin.thirdeye.api.DimensionMap in project pinot by linkedin.
the class DetectionTaskRunner method runTask.
private void runTask(DateTime windowStart, DateTime windowEnd) throws JobExecutionException, ExecutionException {
LOG.info("Running anomaly detection for time range {} to {}", windowStart, windowEnd);
// TODO: Change to DataFetchers/DataSources
AnomalyDetectionInputContext adContext = fetchData(windowStart, windowEnd);
ListMultimap<DimensionMap, RawAnomalyResultDTO> resultRawAnomalies = dimensionalShuffleAndUnifyAnalyze(windowStart, windowEnd, adContext);
detectionTaskSuccessCounter.inc();
boolean isBackfill = false;
// If the current job is a backfill (adhoc) detection job, set notified flag to true so the merged anomalies do not
// induce alerts and emails.
String jobName = DAO_REGISTRY.getJobDAO().getJobNameByJobId(jobExecutionId);
if (jobName != null && jobName.toLowerCase().startsWith(BACKFILL_PREFIX)) {
isBackfill = true;
}
// Update merged anomalies
TimeBasedAnomalyMerger timeBasedAnomalyMerger = new TimeBasedAnomalyMerger(anomalyFunctionFactory);
ListMultimap<DimensionMap, MergedAnomalyResultDTO> resultMergedAnomalies = timeBasedAnomalyMerger.mergeAnomalies(anomalyFunctionSpec, resultRawAnomalies, isBackfill);
detectionTaskSuccessCounter.inc();
// TODO: Change to DataSink
AnomalyDetectionOutputContext adOutputContext = new AnomalyDetectionOutputContext();
adOutputContext.setRawAnomalies(resultRawAnomalies);
adOutputContext.setMergedAnomalies(resultMergedAnomalies);
storeData(adOutputContext);
}
use of com.linkedin.thirdeye.api.DimensionMap in project pinot by linkedin.
the class MinMaxThresholdDetectionModel method detect.
@Override
public List<RawAnomalyResultDTO> detect(String metricName, AnomalyDetectionContext anomalyDetectionContext) {
List<RawAnomalyResultDTO> anomalyResults = new ArrayList<>();
// Get min / max props
Double min = null;
if (properties.containsKey(MIN_VAL)) {
min = Double.valueOf(properties.getProperty(MIN_VAL));
}
Double max = null;
if (properties.containsKey(MAX_VAL)) {
max = Double.valueOf(properties.getProperty(MAX_VAL));
}
TimeSeries timeSeries = anomalyDetectionContext.getTransformedCurrent(metricName);
// Compute the weight of this time series (average across whole)
double averageValue = 0;
for (long time : timeSeries.timestampSet()) {
averageValue += timeSeries.get(time);
}
// Compute the bucket size, so we can iterate in those steps
long bucketMillis = anomalyDetectionContext.getBucketSizeInMS();
Interval timeSeriesInterval = timeSeries.getTimeSeriesInterval();
long numBuckets = Math.abs(timeSeriesInterval.getEndMillis() - timeSeriesInterval.getStartMillis()) / bucketMillis;
// avg value of this time series
averageValue /= numBuckets;
DimensionMap dimensionMap = anomalyDetectionContext.getTimeSeriesKey().getDimensionMap();
for (long timeBucket : timeSeries.timestampSet()) {
double value = timeSeries.get(timeBucket);
double deviationFromThreshold = getDeviationFromThreshold(value, min, max);
if (deviationFromThreshold != 0) {
RawAnomalyResultDTO anomalyResult = new RawAnomalyResultDTO();
anomalyResult.setProperties(properties.toString());
anomalyResult.setStartTime(timeBucket);
// point-in-time
anomalyResult.setEndTime(timeBucket + bucketMillis);
anomalyResult.setDimensions(dimensionMap);
anomalyResult.setScore(averageValue);
// higher change, higher the severity
anomalyResult.setWeight(deviationFromThreshold);
anomalyResult.setAvgCurrentVal(value);
String message = String.format(DEFAULT_MESSAGE_TEMPLATE, deviationFromThreshold, value, min, max);
anomalyResult.setMessage(message);
if (value == 0.0) {
anomalyResult.setDataMissing(true);
}
anomalyResults.add(anomalyResult);
}
}
return anomalyResults;
}
use of com.linkedin.thirdeye.api.DimensionMap in project pinot by linkedin.
the class SimpleThresholdDetectionModel method detect.
@Override
public List<RawAnomalyResultDTO> detect(String metricName, AnomalyDetectionContext anomalyDetectionContext) {
List<RawAnomalyResultDTO> anomalyResults = new ArrayList<>();
// Get thresholds
double changeThreshold = Double.valueOf(getProperties().getProperty(CHANGE_THRESHOLD));
double volumeThreshold = 0d;
if (getProperties().containsKey(AVERAGE_VOLUME_THRESHOLD)) {
volumeThreshold = Double.valueOf(getProperties().getProperty(AVERAGE_VOLUME_THRESHOLD));
}
long bucketSizeInMillis = anomalyDetectionContext.getBucketSizeInMS();
// Compute the weight of this time series (average across whole)
TimeSeries currentTimeSeries = anomalyDetectionContext.getTransformedCurrent(metricName);
double averageValue = 0;
for (long time : currentTimeSeries.timestampSet()) {
averageValue += currentTimeSeries.get(time);
}
Interval currentInterval = currentTimeSeries.getTimeSeriesInterval();
long currentStart = currentInterval.getStartMillis();
long currentEnd = currentInterval.getEndMillis();
long numBuckets = (currentEnd - currentStart) / bucketSizeInMillis;
if (numBuckets != 0) {
averageValue /= numBuckets;
}
// Check if this time series even meets our volume threshold
DimensionMap dimensionMap = anomalyDetectionContext.getTimeSeriesKey().getDimensionMap();
if (averageValue < volumeThreshold) {
LOGGER.info("{} does not meet volume threshold {}: {}", dimensionMap, volumeThreshold, averageValue);
// empty list
return anomalyResults;
}
PredictionModel predictionModel = anomalyDetectionContext.getTrainedPredictionModel(metricName);
if (!(predictionModel instanceof ExpectedTimeSeriesPredictionModel)) {
LOGGER.info("SimpleThresholdDetectionModel detection model expects an ExpectedTimeSeriesPredictionModel but the trained prediction model in anomaly detection context is not.");
// empty list
return anomalyResults;
}
ExpectedTimeSeriesPredictionModel expectedTimeSeriesPredictionModel = (ExpectedTimeSeriesPredictionModel) predictionModel;
TimeSeries expectedTimeSeries = expectedTimeSeriesPredictionModel.getExpectedTimeSeries();
Interval expectedTSInterval = expectedTimeSeries.getTimeSeriesInterval();
long expectedStart = expectedTSInterval.getStartMillis();
long seasonalOffset = currentStart - expectedStart;
for (long currentTimestamp : currentTimeSeries.timestampSet()) {
long expectedTimestamp = currentTimestamp - seasonalOffset;
if (!expectedTimeSeries.hasTimestamp(expectedTimestamp)) {
continue;
}
double baselineValue = expectedTimeSeries.get(expectedTimestamp);
double currentValue = currentTimeSeries.get(currentTimestamp);
if (isAnomaly(currentValue, baselineValue, changeThreshold)) {
RawAnomalyResultDTO anomalyResult = new RawAnomalyResultDTO();
anomalyResult.setDimensions(dimensionMap);
anomalyResult.setProperties(getProperties().toString());
anomalyResult.setStartTime(currentTimestamp);
// point-in-time
anomalyResult.setEndTime(currentTimestamp + bucketSizeInMillis);
anomalyResult.setScore(averageValue);
anomalyResult.setWeight(calculateChange(currentValue, baselineValue));
anomalyResult.setAvgCurrentVal(currentValue);
anomalyResult.setAvgBaselineVal(baselineValue);
String message = getAnomalyResultMessage(changeThreshold, currentValue, baselineValue);
anomalyResult.setMessage(message);
anomalyResults.add(anomalyResult);
if (currentValue == 0.0 || baselineValue == 0.0) {
anomalyResult.setDataMissing(true);
}
}
}
return anomalyResults;
}
use of com.linkedin.thirdeye.api.DimensionMap in project pinot by linkedin.
the class MovingAverageSmoothingFunction method transform.
/**
* Smooths the given time series using moving average.
*
* If the input time series is shorter than the moving average window size, then this method
* does not apply smoothing on the time series, i.e., it returns the original time series.
*
* The transformed time series is shorten by the size of the moving average window in
* comparison to the original time series. For instance, if there are 10 consecutive data points
* the a time series and the window size for moving average is 2, then the transformed time series
* contains only 9 consecutive data points; The first data points has no other data point to
* average and thus it is discarded.
*
* @param timeSeries the time series that provides the data points to be transformed.
* @param anomalyDetectionContext the anomaly detection context that could provide additional
* information for the transformation.
* @return a time series that is smoothed using moving average.
*/
@Override
public TimeSeries transform(TimeSeries timeSeries, AnomalyDetectionContext anomalyDetectionContext) {
Interval timeSeriesInterval = timeSeries.getTimeSeriesInterval();
long startTime = timeSeriesInterval.getStartMillis();
long endTime = timeSeriesInterval.getEndMillis();
long bucketSizeInMillis = anomalyDetectionContext.getBucketSizeInMS();
int movingAverageWindowSize = Integer.valueOf(getProperties().getProperty(MOVING_AVERAGE_SMOOTHING_WINDOW_SIZE));
// Check if the moving average window size is larger than the time series itself
long transformedStartTime = startTime + bucketSizeInMillis * (movingAverageWindowSize - 1);
if (transformedStartTime > endTime) {
String metricName = anomalyDetectionContext.getAnomalyDetectionFunction().getSpec().getTopicMetric();
DimensionMap dimensionMap = anomalyDetectionContext.getTimeSeriesKey().getDimensionMap();
LOGGER.warn("Input time series (Metric:{}, Dimension:{}) is shorter than the moving average " + "smoothing window; therefore, smoothing is not applied on this time series.", metricName, dimensionMap);
return timeSeries;
}
TimeSeries transformedTimeSeries = new TimeSeries();
Interval transformedInterval = new Interval(transformedStartTime, endTime);
transformedTimeSeries.setTimeSeriesInterval(transformedInterval);
for (long timeKeyToTransform : timeSeries.timestampSet()) {
if (!transformedInterval.contains(timeKeyToTransform)) {
continue;
}
double sum = 0d;
int count = 0;
for (int i = 0; i < movingAverageWindowSize; ++i) {
long timeKey = timeKeyToTransform - bucketSizeInMillis * i;
if (timeSeries.hasTimestamp(timeKey)) {
sum += timeSeries.get(timeKey);
++count;
}
}
// count is at least one due to the existence of timeKeyToTransform
double average = sum / count;
transformedTimeSeries.set(timeKeyToTransform, average);
}
return transformedTimeSeries;
}
Aggregations