use of com.linkedin.thirdeye.api.DimensionKey in project pinot by linkedin.
the class TimeSeriesUtil method getTimeSeriesByDimension.
/**
* Returns the metric time series that were given to the anomaly function for anomaly detection. If the dimension to
* retrieve is OTHER, this method retrieves all combinations of dimensions and calculate the metric time series for
* OTHER dimension on-the-fly.
*
* @param anomalyFunctionSpec spec of the anomaly function
* @param startEndTimeRanges the time ranges to retrieve the data for constructing the time series
* @param dimensionMap a dimension map that is used to construct the filter for retrieving the corresponding data
* that was used to detected the anomaly
* @param timeGranularity time granularity of the time series
* @param endTimeInclusive set to true if the end time should be inclusive; mainly used by the query for UI
* @return the time series in the same format as those used by the given anomaly function for anomaly detection
*
* @throws JobExecutionException
* @throws ExecutionException
*/
public static MetricTimeSeries getTimeSeriesByDimension(AnomalyFunctionDTO anomalyFunctionSpec, List<Pair<Long, Long>> startEndTimeRanges, DimensionMap dimensionMap, TimeGranularity timeGranularity, boolean endTimeInclusive) throws JobExecutionException, ExecutionException {
// Get the original filter
Multimap<String, String> filters;
String filterString = anomalyFunctionSpec.getFilters();
if (StringUtils.isNotBlank(filterString)) {
filters = ThirdEyeUtils.getFilterSet(filterString);
} else {
filters = HashMultimap.create();
}
// Decorate filters according to dimensionMap
filters = ThirdEyeUtils.getFilterSetFromDimensionMap(dimensionMap, filters);
boolean hasOTHERDimensionName = false;
for (String dimensionValue : dimensionMap.values()) {
if (dimensionValue.equalsIgnoreCase(ResponseParserUtils.OTHER)) {
hasOTHERDimensionName = true;
break;
}
}
// groupByDimensions (i.e., exploreDimensions) is empty by default because the query for getting the time series
// will have the decorated filters according to anomalies' explore dimensions.
// However, if there exists any dimension with value "OTHER, then we need to honor the origin groupBy in order to
// construct the data for OTHER
List<String> groupByDimensions = Collections.emptyList();
if (hasOTHERDimensionName && StringUtils.isNotBlank(anomalyFunctionSpec.getExploreDimensions().trim())) {
groupByDimensions = Arrays.asList(anomalyFunctionSpec.getExploreDimensions().trim().split(","));
}
TimeSeriesResponse response = getTimeSeriesResponseImpl(anomalyFunctionSpec, startEndTimeRanges, timeGranularity, filters, groupByDimensions, endTimeInclusive);
try {
Map<DimensionKey, MetricTimeSeries> metricTimeSeriesMap = TimeSeriesResponseConverter.toMap(response, Utils.getSchemaDimensionNames(anomalyFunctionSpec.getCollection()));
return extractMetricTimeSeriesByDimension(metricTimeSeriesMap);
} catch (Exception e) {
LOG.warn("Unable to get schema dimension name for retrieving metric time series: {}", e.toString());
return null;
}
}
use of com.linkedin.thirdeye.api.DimensionKey in project pinot by linkedin.
the class TimeSeriesUtil method extractMetricTimeSeriesByDimension.
/**
* Extract current and baseline values from the parsed Pinot results. There are two possible time series for presenting
* the time series after anomaly detection: 1. the time series with a specific dimension and 2. the time series for
* OTHER dimension.
*
* For case 1, the input map should contain only one time series and hence we can just return it. For case 2, the
* input map would contain all combination of explored dimension and hence we need to filter out the one for OTHER
* dimension.
*
* @param metricTimeSeriesMap
*
* @return the time series when the anomaly is detected
*/
private static MetricTimeSeries extractMetricTimeSeriesByDimension(Map<DimensionKey, MetricTimeSeries> metricTimeSeriesMap) {
MetricTimeSeries metricTimeSeries = null;
if (MapUtils.isNotEmpty(metricTimeSeriesMap)) {
// Therefore, we need to get the time series of OTHER dimension manually.
if (metricTimeSeriesMap.size() == 1) {
Iterator<MetricTimeSeries> ite = metricTimeSeriesMap.values().iterator();
if (ite.hasNext()) {
metricTimeSeries = ite.next();
}
} else {
// Retrieve the time series of OTHER dimension
Iterator<Map.Entry<DimensionKey, MetricTimeSeries>> ite = metricTimeSeriesMap.entrySet().iterator();
while (ite.hasNext()) {
Map.Entry<DimensionKey, MetricTimeSeries> entry = ite.next();
DimensionKey dimensionKey = entry.getKey();
boolean foundOTHER = false;
for (String dimensionValue : dimensionKey.getDimensionValues()) {
if (dimensionValue.equalsIgnoreCase(ResponseParserUtils.OTHER)) {
metricTimeSeries = entry.getValue();
foundOTHER = true;
break;
}
}
if (foundOTHER) {
break;
}
}
}
}
return metricTimeSeries;
}
use of com.linkedin.thirdeye.api.DimensionKey in project pinot by linkedin.
the class AnomalyFunctionResource method analyze.
@POST
@Path("/analyze")
@Consumes(MediaType.APPLICATION_JSON)
public Response analyze(AnomalyFunctionDTO anomalyFunctionSpec, @QueryParam("startTime") Long startTime, @QueryParam("endTime") Long endTime) throws Exception {
// TODO: replace this with Job/Task framework and job tracker page
BaseAnomalyFunction anomalyFunction = anomalyFunctionFactory.fromSpec(anomalyFunctionSpec);
List<Pair<Long, Long>> startEndTimeRanges = anomalyFunction.getDataRangeIntervals(startTime, endTime);
Map<DimensionKey, MetricTimeSeries> dimensionKeyMetricTimeSeriesMap = TimeSeriesUtil.getTimeSeriesForAnomalyDetection(anomalyFunctionSpec, startEndTimeRanges);
List<RawAnomalyResultDTO> anomalyResults = new ArrayList<>();
List<RawAnomalyResultDTO> results = new ArrayList<>();
List<String> collectionDimensions = DAO_REGISTRY.getDatasetConfigDAO().findByDataset(anomalyFunctionSpec.getCollection()).getDimensions();
for (Map.Entry<DimensionKey, MetricTimeSeries> entry : dimensionKeyMetricTimeSeriesMap.entrySet()) {
DimensionKey dimensionKey = entry.getKey();
DimensionMap dimensionMap = DimensionMap.fromDimensionKey(dimensionKey, collectionDimensions);
if (entry.getValue().getTimeWindowSet().size() < 2) {
LOG.warn("Insufficient data for {} to run anomaly detection function", dimensionMap);
continue;
}
try {
// Run algorithm
MetricTimeSeries metricTimeSeries = entry.getValue();
LOG.info("Analyzing anomaly function with dimensionKey: {}, windowStart: {}, windowEnd: {}", dimensionMap, startTime, endTime);
List<RawAnomalyResultDTO> resultsOfAnEntry = anomalyFunction.analyze(dimensionMap, metricTimeSeries, new DateTime(startTime), new DateTime(endTime), new ArrayList<>());
if (resultsOfAnEntry.size() != 0) {
results.addAll(resultsOfAnEntry);
}
LOG.info("{} has {} anomalies in window {} to {}", dimensionMap, resultsOfAnEntry.size(), new DateTime(startTime), new DateTime(endTime));
} catch (Exception e) {
LOG.error("Could not compute for {}", dimensionMap, e);
}
}
if (results.size() > 0) {
List<RawAnomalyResultDTO> validResults = new ArrayList<>();
for (RawAnomalyResultDTO anomaly : results) {
if (!anomaly.isDataMissing()) {
LOG.info("Found anomaly, sev [{}] start [{}] end [{}]", anomaly.getWeight(), new DateTime(anomaly.getStartTime()), new DateTime(anomaly.getEndTime()));
validResults.add(anomaly);
}
}
anomalyResults.addAll(validResults);
}
return Response.ok(anomalyResults).build();
}
use of com.linkedin.thirdeye.api.DimensionKey in project pinot by linkedin.
the class TimeSeriesResponseConverter method toMap.
/**
* Convert the response to a Map<DimensionKey, MetricTimeSeries>. DimensionKey is generated based
* off of schemaDimensions, while the MetricTimeSeries objects are generated based on the rows
* within the response input. The metrics returned in the MetricTimeSeries instances correspond to
* the metric names as opposed to the full metric function (eg __COUNT instead of SUM(__COUNT))
*/
public static Map<DimensionKey, MetricTimeSeries> toMap(TimeSeriesResponse response, List<String> schemaDimensions) {
DimensionKeyGenerator dimensionKeyGenerator = new DimensionKeyGenerator(schemaDimensions);
List<String> metrics = new ArrayList<>(response.getMetrics());
Set<String> metricSet = new HashSet<>(metrics);
List<MetricType> types = Collections.nCopies(metrics.size(), MetricType.DOUBLE);
MetricSchema metricSchema = new MetricSchema(metrics, types);
SetMultimap<DimensionKey, TimeSeriesRow> dimensionKeyToRows = HashMultimap.create();
// group the rows by their dimension key
for (int i = 0; i < response.getNumRows(); i++) {
TimeSeriesRow row = response.getRow(i);
DimensionKey dimensionKey = dimensionKeyGenerator.get(row.getDimensionNames(), row.getDimensionValues());
dimensionKeyToRows.put(dimensionKey, row);
}
Map<DimensionKey, MetricTimeSeries> result = new HashMap<>();
for (Entry<DimensionKey, Collection<TimeSeriesRow>> entry : dimensionKeyToRows.asMap().entrySet()) {
DimensionKey key = entry.getKey();
MetricTimeSeries metricTimeSeries = new MetricTimeSeries(metricSchema);
result.put(key, metricTimeSeries);
for (TimeSeriesRow timeSeriesRow : entry.getValue()) {
long timestamp = timeSeriesRow.getStart();
for (TimeSeriesMetric metric : timeSeriesRow.getMetrics()) {
String metricName = metric.getMetricName();
// contain additional info, eg the raw metrics required for calculating derived ones.
if (metricSet.contains(metricName)) {
Double value = metric.getValue();
metricTimeSeries.increment(timestamp, metricName, value);
}
}
}
}
return result;
}
use of com.linkedin.thirdeye.api.DimensionKey in project pinot by linkedin.
the class TimeSeriesUtil method getTimeSeriesForAnomalyDetection.
/**
* Returns the set of metric time series that are needed by the given anomaly function for detecting anomalies.
*
* The time granularity is the granularity of the function's collection, i.e., the buckets are not aggregated,
* in order to increase the accuracy for detecting anomalies.
*
* @param anomalyFunctionSpec spec of the anomaly function
* @param startEndTimeRanges the time ranges to retrieve the data for constructing the time series
*
* @return the data that is needed by the anomaly function for detecting anomalies.
* @throws JobExecutionException
* @throws ExecutionException
*/
public static Map<DimensionKey, MetricTimeSeries> getTimeSeriesForAnomalyDetection(AnomalyFunctionDTO anomalyFunctionSpec, List<Pair<Long, Long>> startEndTimeRanges) throws JobExecutionException, ExecutionException {
String filterString = anomalyFunctionSpec.getFilters();
Multimap<String, String> filters;
if (StringUtils.isNotBlank(filterString)) {
filters = ThirdEyeUtils.getFilterSet(filterString);
} else {
filters = HashMultimap.create();
}
List<String> groupByDimensions;
String exploreDimensionString = anomalyFunctionSpec.getExploreDimensions();
if (StringUtils.isNotBlank(exploreDimensionString)) {
groupByDimensions = Arrays.asList(exploreDimensionString.trim().split(","));
} else {
groupByDimensions = Collections.emptyList();
}
TimeGranularity timeGranularity = new TimeGranularity(anomalyFunctionSpec.getBucketSize(), anomalyFunctionSpec.getBucketUnit());
TimeSeriesResponse timeSeriesResponse = getTimeSeriesResponseImpl(anomalyFunctionSpec, startEndTimeRanges, timeGranularity, filters, groupByDimensions, false);
try {
Map<DimensionKey, MetricTimeSeries> dimensionKeyMetricTimeSeriesMap = TimeSeriesResponseConverter.toMap(timeSeriesResponse, Utils.getSchemaDimensionNames(anomalyFunctionSpec.getCollection()));
return dimensionKeyMetricTimeSeriesMap;
} catch (Exception e) {
LOG.info("Failed to get schema dimensions for constructing dimension keys:", e.toString());
return Collections.emptyMap();
}
}
Aggregations