use of com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO in project pinot by linkedin.
the class AnomalyMergeExecutor method updateMergedScoreAndPersist.
private void updateMergedScoreAndPersist(MergedAnomalyResultDTO mergedResult, AnomalyMergeConfig mergeConfig) {
// Calculate default score and weight in case of the failure during updating score and weight through Pinot's value
double weightedScoreSum = 0.0;
double weightedWeightSum = 0.0;
double totalBucketSize = 0.0;
// to prevent from double overflow
double normalizationFactor = 1000;
String anomalyMessage = "";
for (RawAnomalyResultDTO anomalyResult : mergedResult.getAnomalyResults()) {
anomalyResult.setMerged(true);
double bucketSizeSeconds = (anomalyResult.getEndTime() - anomalyResult.getStartTime()) / 1000;
weightedScoreSum += (anomalyResult.getScore() / normalizationFactor) * bucketSizeSeconds;
weightedWeightSum += (anomalyResult.getWeight() / normalizationFactor) * bucketSizeSeconds;
totalBucketSize += bucketSizeSeconds;
anomalyMessage = anomalyResult.getMessage();
}
if (totalBucketSize != 0) {
mergedResult.setScore((weightedScoreSum / totalBucketSize) * normalizationFactor);
mergedResult.setWeight((weightedWeightSum / totalBucketSize) * normalizationFactor);
}
mergedResult.setMessage(anomalyMessage);
if (mergedResult.getAnomalyResults().size() > 1) {
// recompute weight using anomaly function specific method
try {
updateMergedAnomalyWeight(mergedResult, mergeConfig);
} catch (Exception e) {
AnomalyFunctionDTO function = mergedResult.getFunction();
LOG.warn("Unable to compute merged weight and the average weight of raw anomalies is used. Dataset: {}, Topic Metric: {}, Function: {}, Time:{} - {}, Exception: {}", function.getCollection(), function.getTopicMetric(), function.getFunctionName(), new DateTime(mergedResult.getStartTime()), new DateTime(mergedResult.getEndTime()), e);
}
}
try {
// persist the merged result
mergedResultDAO.update(mergedResult);
for (RawAnomalyResultDTO rawAnomalyResultDTO : mergedResult.getAnomalyResults()) {
anomalyResultDAO.update(rawAnomalyResultDTO);
}
} catch (Exception e) {
LOG.error("Could not persist merged result : [" + mergedResult.toString() + "]", e);
}
}
use of com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO in project pinot by linkedin.
the class AnomalyTimeBasedSummarizer method mergeAnomalies.
/**
* @param mergedAnomaly : last merged anomaly
* @param anomalies : list of raw anomalies to be merged with last mergedAnomaly
* @param maxMergedDurationMillis : length of a merged anomaly
* @param sequentialAllowedGap : allowed gap between two raw anomalies in order to merge
* @return
*/
public static List<MergedAnomalyResultDTO> mergeAnomalies(MergedAnomalyResultDTO mergedAnomaly, List<RawAnomalyResultDTO> anomalies, long maxMergedDurationMillis, long sequentialAllowedGap) {
// sort anomalies in natural order of start time
Collections.sort(anomalies, (o1, o2) -> (int) ((o1.getStartTime() - o2.getStartTime()) / 1000));
boolean applySequentialGapBasedSplit = false;
boolean applyMaxDurationBasedSplit = false;
if (maxMergedDurationMillis > 0) {
applyMaxDurationBasedSplit = true;
}
if (sequentialAllowedGap > 0) {
applySequentialGapBasedSplit = true;
}
List<MergedAnomalyResultDTO> mergedAnomalies = new ArrayList<>();
for (int i = 0; i < anomalies.size(); i++) {
RawAnomalyResultDTO currentResult = anomalies.get(i);
if (mergedAnomaly == null || currentResult.getEndTime() < mergedAnomaly.getStartTime()) {
mergedAnomaly = new MergedAnomalyResultDTO();
populateMergedResult(mergedAnomaly, currentResult);
} else {
// compare current with merged and decide whether to merge the current result or create a new one
if (applySequentialGapBasedSplit && (currentResult.getStartTime() - mergedAnomaly.getEndTime()) > sequentialAllowedGap) {
// Split here
// add previous merged result
mergedAnomalies.add(mergedAnomaly);
//set current raw result
mergedAnomaly = new MergedAnomalyResultDTO();
populateMergedResult(mergedAnomaly, currentResult);
} else {
// add the current raw result into mergedResult
if (currentResult.getStartTime() < mergedAnomaly.getStartTime()) {
mergedAnomaly.setStartTime(currentResult.getStartTime());
}
if (currentResult.getEndTime() > mergedAnomaly.getEndTime()) {
mergedAnomaly.setEndTime(currentResult.getEndTime());
}
if (!mergedAnomaly.getAnomalyResults().contains(currentResult)) {
mergedAnomaly.getAnomalyResults().add(currentResult);
currentResult.setMerged(true);
}
}
}
// till this point merged result contains current raw result
if (applyMaxDurationBasedSplit && // check if Max Duration for merged has passed, if so, create new one
mergedAnomaly.getEndTime() - mergedAnomaly.getStartTime() >= maxMergedDurationMillis) {
// check if next anomaly has same start time as current one, that should be merged with current one too
if (i < (anomalies.size() - 1) && anomalies.get(i + 1).getStartTime().equals(currentResult.getStartTime())) {
// no need to split as we want to include the next raw anomaly into the current one
} else {
// Split here
mergedAnomalies.add(mergedAnomaly);
mergedAnomaly = null;
}
}
if (i == (anomalies.size() - 1) && mergedAnomaly != null) {
mergedAnomalies.add(mergedAnomaly);
}
}
LOG.info("merging [{}] raw anomalies", anomalies.size());
return mergedAnomalies;
}
use of com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO in project pinot by linkedin.
the class TimeBasedAnomalyMerger method updateMergedAnomalyInfo.
private void updateMergedAnomalyInfo(MergedAnomalyResultDTO mergedResult, AnomalyMergeConfig mergeConfig) {
List<RawAnomalyResultDTO> rawAnomalies = mergedResult.getAnomalyResults();
if (CollectionUtils.isEmpty(rawAnomalies)) {
LOG.warn("Skip updating anomaly (id={}) because its does not have any children anomalies.", mergedResult.getId());
return;
}
// Update the info of merged anomalies
if (rawAnomalies.size() == 1) {
RawAnomalyResultDTO rawAnomaly = rawAnomalies.get(0);
mergedResult.setScore(rawAnomaly.getScore());
mergedResult.setWeight(rawAnomaly.getWeight());
mergedResult.setAvgCurrentVal(rawAnomaly.getAvgCurrentVal());
mergedResult.setAvgBaselineVal(rawAnomaly.getAvgBaselineVal());
mergedResult.setMessage(rawAnomaly.getMessage());
} else {
// Calculate default score and weight in case of any failure (e.g., DB exception) during the update
double weightedScoreSum = 0.0;
double weightedWeightSum = 0.0;
double totalBucketSize = 0.0;
double avgCurrent = 0.0;
double avgBaseline = 0.0;
String anomalyMessage = "";
for (RawAnomalyResultDTO anomalyResult : rawAnomalies) {
anomalyResult.setMerged(true);
double bucketSizeSeconds = (anomalyResult.getEndTime() - anomalyResult.getStartTime()) / 1000;
double normalizedBucketSize = getNormalizedBucketSize(bucketSizeSeconds);
totalBucketSize += bucketSizeSeconds;
weightedScoreSum += anomalyResult.getScore() * normalizedBucketSize;
weightedWeightSum += anomalyResult.getWeight() * normalizedBucketSize;
avgCurrent += anomalyResult.getAvgCurrentVal() * normalizedBucketSize;
avgBaseline += anomalyResult.getAvgBaselineVal() * normalizedBucketSize;
anomalyMessage = anomalyResult.getMessage();
}
if (totalBucketSize != 0) {
double normalizedTotalBucketSize = getNormalizedBucketSize(totalBucketSize);
mergedResult.setScore(weightedScoreSum / normalizedTotalBucketSize);
mergedResult.setWeight(weightedWeightSum / normalizedTotalBucketSize);
mergedResult.setAvgCurrentVal(avgCurrent / normalizedTotalBucketSize);
mergedResult.setAvgBaselineVal(avgBaseline / normalizedTotalBucketSize);
}
mergedResult.setMessage(anomalyMessage);
// recompute weight using anomaly function specific method
try {
computeMergedAnomalyInfo(mergedResult, mergeConfig);
} catch (Exception e) {
AnomalyFunctionDTO function = mergedResult.getFunction();
LOG.warn("Unable to compute merged weight and the average weight of raw anomalies is used. Dataset: {}, Topic Metric: {}, Function: {}, Time:{} - {}, Exception: {}", function.getCollection(), function.getTopicMetric(), function.getFunctionName(), new DateTime(mergedResult.getStartTime()), new DateTime(mergedResult.getEndTime()), e);
}
}
}
use of com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO in project pinot by linkedin.
the class TimeBasedAnomalyMerger method dimensionalShuffleAndUnifyMerge.
private ListMultimap<DimensionMap, MergedAnomalyResultDTO> dimensionalShuffleAndUnifyMerge(AnomalyFunctionDTO function, AnomalyMergeConfig mergeConfig, ListMultimap<DimensionMap, RawAnomalyResultDTO> dimensionsResultMap) {
ListMultimap<DimensionMap, MergedAnomalyResultDTO> mergedAnomalies = ArrayListMultimap.create();
for (DimensionMap dimensionMap : dimensionsResultMap.keySet()) {
List<RawAnomalyResultDTO> unmergedResultsByDimensions = dimensionsResultMap.get(dimensionMap);
long anomalyWindowStart = Long.MAX_VALUE;
long anomalyWindowEnd = Long.MIN_VALUE;
for (RawAnomalyResultDTO unmergedResultsByDimension : unmergedResultsByDimensions) {
anomalyWindowStart = Math.min(anomalyWindowStart, unmergedResultsByDimension.getStartTime());
anomalyWindowEnd = Math.max(anomalyWindowEnd, unmergedResultsByDimension.getEndTime());
}
// NOTE: We get "latest overlapped (Conflict)" merged anomaly instead of "recent" merged anomaly in order to
// prevent the merge results of current (online) detection interfere the merge results of back-fill (offline)
// detection.
// Moreover, the window start is modified by mergeConfig.getSequentialAllowedGap() in order to allow a gap between
// anomalies to be merged.
MergedAnomalyResultDTO latestOverlappedMergedResult = mergedResultDAO.findLatestConflictByFunctionIdDimensions(function.getId(), dimensionMap.toString(), anomalyWindowStart - mergeConfig.getSequentialAllowedGap(), anomalyWindowEnd);
List<MergedAnomalyResultDTO> mergedResults = AnomalyTimeBasedSummarizer.mergeAnomalies(latestOverlappedMergedResult, unmergedResultsByDimensions, mergeConfig.getMaxMergeDurationLength(), mergeConfig.getSequentialAllowedGap());
for (MergedAnomalyResultDTO mergedResult : mergedResults) {
mergedResult.setFunction(function);
mergedResult.setDimensions(dimensionMap);
}
LOG.info("Merging [{}] raw anomalies into [{}] merged anomalies for function id : [{}] and dimensions : [{}]", unmergedResultsByDimensions.size(), mergedResults.size(), function.getId(), dimensionMap);
mergedAnomalies.putAll(dimensionMap, mergedResults);
}
return mergedAnomalies;
}
use of com.linkedin.thirdeye.datalayer.dto.RawAnomalyResultDTO in project pinot by linkedin.
the class DetectionTaskRunner method runTask.
private void runTask(DateTime windowStart, DateTime windowEnd) throws JobExecutionException, ExecutionException {
LOG.info("Running anomaly detection for time range {} to {}", windowStart, windowEnd);
// TODO: Change to DataFetchers/DataSources
AnomalyDetectionInputContext adContext = fetchData(windowStart, windowEnd);
ListMultimap<DimensionMap, RawAnomalyResultDTO> resultRawAnomalies = dimensionalShuffleAndUnifyAnalyze(windowStart, windowEnd, adContext);
detectionTaskSuccessCounter.inc();
boolean isBackfill = false;
// If the current job is a backfill (adhoc) detection job, set notified flag to true so the merged anomalies do not
// induce alerts and emails.
String jobName = DAO_REGISTRY.getJobDAO().getJobNameByJobId(jobExecutionId);
if (jobName != null && jobName.toLowerCase().startsWith(BACKFILL_PREFIX)) {
isBackfill = true;
}
// Update merged anomalies
TimeBasedAnomalyMerger timeBasedAnomalyMerger = new TimeBasedAnomalyMerger(anomalyFunctionFactory);
ListMultimap<DimensionMap, MergedAnomalyResultDTO> resultMergedAnomalies = timeBasedAnomalyMerger.mergeAnomalies(anomalyFunctionSpec, resultRawAnomalies, isBackfill);
detectionTaskSuccessCounter.inc();
// TODO: Change to DataSink
AnomalyDetectionOutputContext adOutputContext = new AnomalyDetectionOutputContext();
adOutputContext.setRawAnomalies(resultRawAnomalies);
adOutputContext.setMergedAnomalies(resultMergedAnomalies);
storeData(adOutputContext);
}
Aggregations