use of com.linkedin.thirdeye.api.MetricTimeSeries in project pinot by linkedin.
the class AnomalyResource method getAnomalyMergedResultTimeSeries.
/**
* Returns the time series for the given anomaly.
*
* If viewWindowStartTime and/or viewWindowEndTime is not given, then a window is padded automatically. The padded
* windows is half of the anomaly window size. For instance, if the anomaly lasts for 4 hours, then the pad window
* size is 2 hours. The max padding size is 1 day.
*
* @param anomalyResultId the id of the given anomaly
* @param viewWindowStartTime start time of the time series, inclusive
* @param viewWindowEndTime end time of the time series, inclusive
* @return the time series of the given anomaly
* @throws Exception when it fails to retrieve collection, i.e., dataset, information
*/
@GET
@Path("/anomaly-merged-result/timeseries/{anomaly_merged_result_id}")
public AnomalyTimelinesView getAnomalyMergedResultTimeSeries(@NotNull @PathParam("anomaly_merged_result_id") long anomalyResultId, @NotNull @QueryParam("aggTimeGranularity") String aggTimeGranularity, @QueryParam("start") long viewWindowStartTime, @QueryParam("end") long viewWindowEndTime) throws Exception {
boolean loadRawAnomalies = false;
MergedAnomalyResultDTO anomalyResult = anomalyMergedResultDAO.findById(anomalyResultId, loadRawAnomalies);
DimensionMap dimensions = anomalyResult.getDimensions();
AnomalyFunctionDTO anomalyFunctionSpec = anomalyResult.getFunction();
BaseAnomalyFunction anomalyFunction = anomalyFunctionFactory.fromSpec(anomalyFunctionSpec);
// By default, the padding window size is half of the anomaly window.
if (viewWindowStartTime == 0 || viewWindowEndTime == 0) {
long anomalyWindowStartTime = anomalyResult.getStartTime();
long anomalyWindowEndTime = anomalyResult.getEndTime();
long bucketMillis = TimeUnit.MILLISECONDS.convert(anomalyFunctionSpec.getBucketSize(), anomalyFunctionSpec.getBucketUnit());
long bucketCount = (anomalyWindowEndTime - anomalyWindowStartTime) / bucketMillis;
long paddingMillis = Math.max(1, (bucketCount / 2)) * bucketMillis;
if (paddingMillis > TimeUnit.DAYS.toMillis(1)) {
paddingMillis = TimeUnit.DAYS.toMillis(1);
}
if (viewWindowStartTime == 0) {
viewWindowStartTime = anomalyWindowStartTime - paddingMillis;
}
if (viewWindowEndTime == 0) {
viewWindowEndTime = anomalyWindowEndTime + paddingMillis;
}
}
TimeGranularity timeGranularity = Utils.getAggregationTimeGranularity(aggTimeGranularity, anomalyFunctionSpec.getCollection());
long bucketMillis = timeGranularity.toMillis();
// ThirdEye backend is end time exclusive, so one more bucket is appended to make end time inclusive for frontend.
viewWindowEndTime += bucketMillis;
long maxDataTime = collectionMaxDataTimeCache.get(anomalyResult.getCollection());
if (viewWindowEndTime > maxDataTime) {
viewWindowEndTime = (anomalyResult.getEndTime() > maxDataTime) ? anomalyResult.getEndTime() : maxDataTime;
}
AnomalyDetectionInputContext adInputContext = TimeBasedAnomalyMerger.fetchDataByDimension(viewWindowStartTime, viewWindowEndTime, dimensions, anomalyFunction, anomalyMergedResultDAO, overrideConfigDAO, false);
MetricTimeSeries metricTimeSeries = adInputContext.getDimensionKeyMetricTimeSeriesMap().get(dimensions);
if (metricTimeSeries == null) {
// the timeseries for the given anomaly
return new AnomalyTimelinesView();
}
// Transform time series with scaling factor
List<ScalingFactor> scalingFactors = adInputContext.getScalingFactors();
if (CollectionUtils.isNotEmpty(scalingFactors)) {
Properties properties = anomalyFunction.getProperties();
MetricTransfer.rescaleMetric(metricTimeSeries, viewWindowStartTime, scalingFactors, anomalyFunctionSpec.getTopicMetric(), properties);
}
List<MergedAnomalyResultDTO> knownAnomalies = adInputContext.getKnownMergedAnomalies().get(dimensions);
// Known anomalies are ignored (the null parameter) because 1. we can reduce users' waiting time and 2. presentation
// data does not need to be as accurate as the one used for detecting anomalies
AnomalyTimelinesView anomalyTimelinesView = anomalyFunction.getTimeSeriesView(metricTimeSeries, bucketMillis, anomalyFunctionSpec.getTopicMetric(), viewWindowStartTime, viewWindowEndTime, knownAnomalies);
// Generate summary for frontend
List<TimeBucket> timeBuckets = anomalyTimelinesView.getTimeBuckets();
if (timeBuckets.size() > 0) {
TimeBucket firstBucket = timeBuckets.get(0);
anomalyTimelinesView.addSummary("currentStart", Long.toString(firstBucket.getCurrentStart()));
anomalyTimelinesView.addSummary("baselineStart", Long.toString(firstBucket.getBaselineStart()));
TimeBucket lastBucket = timeBuckets.get(timeBuckets.size() - 1);
anomalyTimelinesView.addSummary("currentEnd", Long.toString(lastBucket.getCurrentStart()));
anomalyTimelinesView.addSummary("baselineEnd", Long.toString(lastBucket.getBaselineEnd()));
}
return anomalyTimelinesView;
}
use of com.linkedin.thirdeye.api.MetricTimeSeries in project pinot by linkedin.
the class AnomaliesResource method getAnomalyDataCompareResults.
@GET
@Path("/{anomalyId}")
public AnomalyDataCompare.Response getAnomalyDataCompareResults(@PathParam("anomalyId") Long anomalyId) {
MergedAnomalyResultDTO anomaly = mergedAnomalyResultDAO.findById(anomalyId);
if (anomaly == null) {
LOG.error("Anomaly not found with id " + anomalyId);
throw new IllegalArgumentException("Anomaly not found with id " + anomalyId);
}
AnomalyDataCompare.Response response = new AnomalyDataCompare.Response();
response.setCurrentStart(anomaly.getStartTime());
response.setCurrenEnd(anomaly.getEndTime());
try {
DatasetConfigDTO dataset = datasetConfigDAO.findByDataset(anomaly.getCollection());
TimeGranularity granularity = new TimeGranularity(dataset.getTimeDuration(), dataset.getTimeUnit());
// Lets compute currentTimeRange
Pair<Long, Long> currentTmeRange = new Pair<>(anomaly.getStartTime(), anomaly.getEndTime());
MetricTimeSeries ts = TimeSeriesUtil.getTimeSeriesByDimension(anomaly.getFunction(), Arrays.asList(currentTmeRange), anomaly.getDimensions(), granularity, false);
double currentVal = getTotalFromTimeSeries(ts, dataset.isAdditive());
response.setCurrentVal(currentVal);
for (AlertConfigBean.COMPARE_MODE compareMode : AlertConfigBean.COMPARE_MODE.values()) {
long baselineOffset = EmailHelper.getBaselineOffset(compareMode);
Pair<Long, Long> baselineTmeRange = new Pair<>(anomaly.getStartTime() - baselineOffset, anomaly.getEndTime() - baselineOffset);
MetricTimeSeries baselineTs = TimeSeriesUtil.getTimeSeriesByDimension(anomaly.getFunction(), Arrays.asList(baselineTmeRange), anomaly.getDimensions(), granularity, false);
AnomalyDataCompare.CompareResult cr = new AnomalyDataCompare.CompareResult();
double baseLineval = getTotalFromTimeSeries(baselineTs, dataset.isAdditive());
cr.setBaselineValue(baseLineval);
cr.setCompareMode(compareMode);
cr.setChange(calculateChange(currentVal, baseLineval));
response.getCompareResults().add(cr);
}
} catch (Exception e) {
LOG.error("Error fetching the timeseries data from pinot", e);
throw new RuntimeException(e);
}
return response;
}
use of com.linkedin.thirdeye.api.MetricTimeSeries in project pinot by linkedin.
the class testMetricTransfer method transfer.
@Test
public void transfer() {
// create a mock MetricTimeSeries
List<String> names = new ArrayList<>(1);
String mName = "metric0";
names.add(0, mName);
List<MetricType> types = Collections.nCopies(names.size(), MetricType.DOUBLE);
MetricSchema metricSchema = new MetricSchema(names, types);
MetricTimeSeries metrics = new MetricTimeSeries(metricSchema);
// the last three values are current values; the rest values are baseline values
double[] m0 = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 };
for (long i = 0l; i <= 5l; i++) {
metrics.set(i, mName, 1.0);
}
// create a list of mock scaling factors
ScalingFactor sf0 = new ScalingFactor(2l, 6l, 0.8);
List<ScalingFactor> sfList0 = new ArrayList<>();
sfList0.add(sf0);
Properties properties = new Properties();
properties.put(MetricTransfer.SEASONAL_SIZE, "3");
properties.put(MetricTransfer.SEASONAL_UNIT, TimeUnit.MILLISECONDS.toString());
// mistakenly set 2 on purpose
properties.put(MetricTransfer.BASELINE_SEASONAL_PERIOD, "2");
MetricTransfer.rescaleMetric(metrics, 3, sfList0, mName, properties);
double[] m1_expected = { 0.8, 0.8, 0.0, 1.0, 1.0, 1.0 };
double[] m_actual = new double[6];
for (int i = 0; i <= 5; i++) {
m_actual[i] = metrics.get(i, mName).doubleValue();
}
Assert.assertEquals(m_actual, m1_expected);
// // revert to the original cases
// ScalingFactor _sf0 = new ScalingFactor(2l, 4l, 1.25);
// // no points in time range and no change
// sfList0.remove(0);
// Assert.assertEquals(sfList0.size(), 0);
// sfList0.add(_sf0);
// MetricTransfer.rescaleMetric(metrics, , sfList0, mName);
// for (int i=0; i<=5; i++) {
// m_actual[i]= metrics.get(i, mName).doubleValue();
// }
// Assert.assertEquals(m_actual, m0);
//should not affect
sfList0.remove(0);
ScalingFactor sf1 = new ScalingFactor(12l, 14l, 0.8);
sfList0.add(sf1);
MetricTransfer.rescaleMetric(metrics, 3, sfList0, mName, properties);
for (int i = 0; i <= 5; i++) {
m_actual[i] = metrics.get(i, mName).doubleValue();
}
Assert.assertEquals(m_actual, m1_expected);
}
use of com.linkedin.thirdeye.api.MetricTimeSeries in project pinot by linkedin.
the class TimeSeriesUtil method getTimeSeriesForAnomalyDetection.
/**
* Returns the set of metric time series that are needed by the given anomaly function for detecting anomalies.
*
* The time granularity is the granularity of the function's collection, i.e., the buckets are not aggregated,
* in order to increase the accuracy for detecting anomalies.
*
* @param anomalyFunctionSpec spec of the anomaly function
* @param startEndTimeRanges the time ranges to retrieve the data for constructing the time series
*
* @return the data that is needed by the anomaly function for detecting anomalies.
* @throws JobExecutionException
* @throws ExecutionException
*/
public static Map<DimensionKey, MetricTimeSeries> getTimeSeriesForAnomalyDetection(AnomalyFunctionDTO anomalyFunctionSpec, List<Pair<Long, Long>> startEndTimeRanges) throws JobExecutionException, ExecutionException {
String filterString = anomalyFunctionSpec.getFilters();
Multimap<String, String> filters;
if (StringUtils.isNotBlank(filterString)) {
filters = ThirdEyeUtils.getFilterSet(filterString);
} else {
filters = HashMultimap.create();
}
List<String> groupByDimensions;
String exploreDimensionString = anomalyFunctionSpec.getExploreDimensions();
if (StringUtils.isNotBlank(exploreDimensionString)) {
groupByDimensions = Arrays.asList(exploreDimensionString.trim().split(","));
} else {
groupByDimensions = Collections.emptyList();
}
TimeGranularity timeGranularity = new TimeGranularity(anomalyFunctionSpec.getBucketSize(), anomalyFunctionSpec.getBucketUnit());
TimeSeriesResponse timeSeriesResponse = getTimeSeriesResponseImpl(anomalyFunctionSpec, startEndTimeRanges, timeGranularity, filters, groupByDimensions, false);
try {
Map<DimensionKey, MetricTimeSeries> dimensionKeyMetricTimeSeriesMap = TimeSeriesResponseConverter.toMap(timeSeriesResponse, Utils.getSchemaDimensionNames(anomalyFunctionSpec.getCollection()));
return dimensionKeyMetricTimeSeriesMap;
} catch (Exception e) {
LOG.info("Failed to get schema dimensions for constructing dimension keys:", e.toString());
return Collections.emptyMap();
}
}
use of com.linkedin.thirdeye.api.MetricTimeSeries in project pinot by linkedin.
the class DetectionTaskRunner method fetchData.
private AnomalyDetectionInputContext fetchData(DateTime windowStart, DateTime windowEnd) throws JobExecutionException, ExecutionException {
AnomalyDetectionInputContext adContext = new AnomalyDetectionInputContext();
// Get Time Series
List<Pair<Long, Long>> startEndTimeRanges = anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis());
Map<DimensionKey, MetricTimeSeries> dimensionKeyMetricTimeSeriesMap = TimeSeriesUtil.getTimeSeriesForAnomalyDetection(anomalyFunctionSpec, startEndTimeRanges);
Map<DimensionMap, MetricTimeSeries> dimensionMapMetricTimeSeriesMap = new HashMap<>();
for (Map.Entry<DimensionKey, MetricTimeSeries> entry : dimensionKeyMetricTimeSeriesMap.entrySet()) {
DimensionKey dimensionKey = entry.getKey();
// If the current time series belongs to OTHER dimension, which consists of time series whose
// sum of all its values belows 1% of sum of all time series values, then its anomaly is
// meaningless and hence we don't want to detection anomalies on it.
String[] dimensionValues = dimensionKey.getDimensionValues();
boolean isOTHERDimension = false;
for (String dimensionValue : dimensionValues) {
if (dimensionValue.equalsIgnoreCase(ResponseParserUtils.OTHER) || dimensionValue.equalsIgnoreCase(ResponseParserUtils.UNKNOWN)) {
isOTHERDimension = true;
break;
}
}
if (isOTHERDimension) {
continue;
}
DimensionMap dimensionMap = DimensionMap.fromDimensionKey(dimensionKey, collectionDimensions);
dimensionMapMetricTimeSeriesMap.put(dimensionMap, entry.getValue());
if (entry.getValue().getTimeWindowSet().size() < 1) {
LOG.warn("Insufficient data for {} to run anomaly detection function", dimensionMap);
continue;
}
}
adContext.setDimensionKeyMetricTimeSeriesMap(dimensionMapMetricTimeSeriesMap);
// Get existing anomalies for this time range and this function id for all combinations of dimensions
List<MergedAnomalyResultDTO> knownMergedAnomalies;
if (anomalyFunction.useHistoryAnomaly()) {
// if this anomaly function uses history data, then we get all time ranges
knownMergedAnomalies = getKnownMergedAnomalies(anomalyFunctionSpec.getId(), anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis()));
} else {
// otherwise, we only get the merge anomaly for current window in order to remove duplicate raw anomalies
List<Pair<Long, Long>> currentTimeRange = new ArrayList<>();
currentTimeRange.add(new Pair<>(windowStart.getMillis(), windowEnd.getMillis()));
knownMergedAnomalies = getKnownMergedAnomalies(anomalyFunctionSpec.getId(), currentTimeRange);
}
// Sort the known merged and raw anomalies by their dimension names
ArrayListMultimap<DimensionMap, MergedAnomalyResultDTO> dimensionMapToKnownMergedAnomalies = ArrayListMultimap.create();
for (MergedAnomalyResultDTO knownMergedAnomaly : knownMergedAnomalies) {
dimensionMapToKnownMergedAnomalies.put(knownMergedAnomaly.getDimensions(), knownMergedAnomaly);
}
adContext.setKnownMergedAnomalies(dimensionMapToKnownMergedAnomalies);
// We always find existing raw anomalies to prevent duplicate raw anomalies are generated
List<RawAnomalyResultDTO> existingRawAnomalies = getExistingRawAnomalies(anomalyFunctionSpec.getId(), windowStart.getMillis(), windowEnd.getMillis());
ArrayListMultimap<DimensionMap, RawAnomalyResultDTO> dimensionNamesToKnownRawAnomalies = ArrayListMultimap.create();
for (RawAnomalyResultDTO existingRawAnomaly : existingRawAnomalies) {
dimensionNamesToKnownRawAnomalies.put(existingRawAnomaly.getDimensions(), existingRawAnomaly);
}
adContext.setExistingRawAnomalies(dimensionNamesToKnownRawAnomalies);
List<ScalingFactor> scalingFactors = OverrideConfigHelper.getTimeSeriesScalingFactors(DAO_REGISTRY.getOverrideConfigDAO(), anomalyFunctionSpec.getCollection(), anomalyFunctionSpec.getMetric(), anomalyFunctionSpec.getId(), anomalyFunction.getDataRangeIntervals(windowStart.getMillis(), windowEnd.getMillis()));
adContext.setScalingFactors(scalingFactors);
return adContext;
}
Aggregations