use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.
the class CollectionMaxDataTimeCacheLoader method load.
@Override
public Long load(String collection) throws Exception {
LOGGER.info("Loading maxDataTime cache {}", collection);
long maxTime = 0;
try {
DatasetConfigDTO datasetConfig = datasetConfigDAO.findByDataset(collection);
// By default, query only offline, unless dataset has been marked as realtime
String tableName = ThirdEyeUtils.computeTableName(collection);
TimeSpec timeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
long prevMaxDataTime = getPrevMaxDataTime(collection, timeSpec);
String maxTimePql = String.format(COLLECTION_MAX_TIME_QUERY_TEMPLATE, timeSpec.getColumnName(), tableName, timeSpec.getColumnName(), prevMaxDataTime);
PinotQuery maxTimePinotQuery = new PinotQuery(maxTimePql, tableName);
resultSetGroupCache.refresh(maxTimePinotQuery);
ResultSetGroup resultSetGroup = resultSetGroupCache.get(maxTimePinotQuery);
if (resultSetGroup.getResultSetCount() == 0 || resultSetGroup.getResultSet(0).getRowCount() == 0) {
LOGGER.info("resultSetGroup is Empty for collection {} is {}", tableName, resultSetGroup);
this.collectionToPrevMaxDataTimeMap.remove(collection);
} else {
long endTime = new Double(resultSetGroup.getResultSet(0).getDouble(0)).longValue();
this.collectionToPrevMaxDataTimeMap.put(collection, endTime);
// endTime + 1 to make sure we cover the time range of that time value.
String timeFormat = timeSpec.getFormat();
if (StringUtils.isBlank(timeFormat) || TimeSpec.SINCE_EPOCH_FORMAT.equals(timeFormat)) {
maxTime = timeSpec.getDataGranularity().toMillis(endTime + 1) - 1;
} else {
DateTimeFormatter inputDataDateTimeFormatter = DateTimeFormat.forPattern(timeFormat).withZone(Utils.getDataTimeZone(collection));
maxTime = DateTime.parse(String.valueOf(endTime), inputDataDateTimeFormatter).getMillis();
}
}
} catch (Exception e) {
LOGGER.warn("Exception getting maxTime from collection: {}", collection, e);
this.collectionToPrevMaxDataTimeMap.remove(collection);
}
if (maxTime <= 0) {
maxTime = System.currentTimeMillis();
}
return maxTime;
}
use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.
the class EmailHelper method writeTimeSeriesChart.
public static String writeTimeSeriesChart(final EmailConfigurationDTO config, TimeOnTimeComparisonHandler timeOnTimeComparisonHandler, final DateTime now, final DateTime then, final String collection, final Map<RawAnomalyResultDTO, String> anomaliesWithLabels) throws JobExecutionException {
try {
int windowSize = config.getWindowSize();
TimeUnit windowUnit = config.getWindowUnit();
long windowMillis = windowUnit.toMillis(windowSize);
// TODO provide a way for email reports to specify desired graph granularity.
DatasetConfigManager datasetConfigDAO = DAO_REGISTRY.getDatasetConfigDAO();
DatasetConfigDTO datasetConfig = datasetConfigDAO.findByDataset(collection);
TimeSpec timespec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
TimeGranularity dataGranularity = timespec.getDataGranularity();
TimeOnTimeComparisonResponse chartData = getData(timeOnTimeComparisonHandler, config, then, now, WEEK_MILLIS, dataGranularity);
AnomalyGraphGenerator anomalyGraphGenerator = AnomalyGraphGenerator.getInstance();
JFreeChart chart = anomalyGraphGenerator.createChart(chartData, dataGranularity, windowMillis, anomaliesWithLabels);
String chartFilePath = EMAIL_REPORT_CHART_PREFIX + config.getId() + PNG;
LOG.info("Writing chart to {}", chartFilePath);
anomalyGraphGenerator.writeChartToFile(chart, chartFilePath);
return chartFilePath;
} catch (Exception e) {
throw new JobExecutionException(e);
}
}
use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.
the class EmailHelper method getContributorDataForDataReport.
public static ContributorViewResponse getContributorDataForDataReport(String collection, String metric, List<String> dimensions, AlertConfigBean.COMPARE_MODE compareMode, long offsetDelayMillis, boolean intraday) throws Exception {
long baselineOffset = getBaselineOffset(compareMode);
ContributorViewRequest request = new ContributorViewRequest();
request.setCollection(collection);
List<MetricExpression> metricExpressions = Utils.convertToMetricExpressions(metric, MetricAggFunction.SUM, collection);
request.setMetricExpressions(metricExpressions);
long currentEnd = System.currentTimeMillis();
long maxDataTime = collectionMaxDataTimeCache.get(collection);
if (currentEnd > maxDataTime) {
currentEnd = maxDataTime;
}
// align to nearest hour
currentEnd = (currentEnd - (currentEnd % HOUR_MILLIS)) - offsetDelayMillis;
String aggTimeGranularity = "HOURS";
long currentStart = currentEnd - DAY_MILLIS;
// intraday option
if (intraday) {
DateTimeZone timeZone = DateTimeZone.forTimeZone(AlertTaskRunnerV2.DEFAULT_TIME_ZONE);
DateTime endDate = new DateTime(currentEnd, timeZone);
DateTime intraDayStartTime = new DateTime(endDate.toString().split("T")[0], timeZone);
if (intraDayStartTime.getMillis() != currentEnd) {
currentStart = intraDayStartTime.getMillis();
}
}
DatasetConfigDTO datasetConfigDTO = datasetConfigManager.findByDataset(collection);
if (datasetConfigDTO != null && TimeUnit.DAYS.equals(datasetConfigDTO.getTimeUnit())) {
aggTimeGranularity = datasetConfigDTO.getTimeUnit().name();
currentEnd = currentEnd - (currentEnd % DAY_MILLIS);
currentStart = currentEnd - WEEK_MILLIS;
}
long baselineStart = currentStart - baselineOffset;
long baselineEnd = currentEnd - baselineOffset;
String timeZone = datasetConfigDTO.getTimezone();
request.setBaselineStart(new DateTime(baselineStart, DateTimeZone.forID(timeZone)));
request.setBaselineEnd(new DateTime(baselineEnd, DateTimeZone.forID(timeZone)));
request.setCurrentStart(new DateTime(currentStart, DateTimeZone.forID(timeZone)));
request.setCurrentEnd(new DateTime(currentEnd, DateTimeZone.forID(timeZone)));
request.setTimeGranularity(Utils.getAggregationTimeGranularity(aggTimeGranularity, collection));
request.setGroupByDimensions(dimensions);
ContributorViewHandler handler = new ContributorViewHandler(queryCache);
return handler.process(request);
}
use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.
the class DetectionJobScheduler method run.
/**
* Reads all active anomaly functions
* For each function, finds all time periods for which detection needs to be run
* Calls run anomaly function for all those periods, and updates detection status
* {@inheritDoc}
* @see java.lang.Runnable#run()
*/
public void run() {
// read all anomaly functions
LOG.info("Reading all anomaly functions");
List<AnomalyFunctionDTO> anomalyFunctions = DAO_REGISTRY.getAnomalyFunctionDAO().findAllActiveFunctions();
// for each active anomaly function
for (AnomalyFunctionDTO anomalyFunction : anomalyFunctions) {
try {
LOG.info("Function: {}", anomalyFunction);
long functionId = anomalyFunction.getId();
String dataset = anomalyFunction.getCollection();
DatasetConfigDTO datasetConfig = CACHE_REGISTRY.getDatasetConfigCache().get(dataset);
DateTimeZone dateTimeZone = Utils.getDataTimeZone(dataset);
DateTime currentDateTime = new DateTime(dateTimeZone);
// find last entry into detectionStatus table, for this function
DetectionStatusDTO lastEntryForFunction = DAO_REGISTRY.getDetectionStatusDAO().findLatestEntryForFunctionId(functionId);
LOG.info("Function: {} Dataset: {} Last entry is {}", functionId, dataset, lastEntryForFunction);
// calculate entries from last entry to current time
Map<String, Long> newEntries = DetectionJobSchedulerUtils.getNewEntries(currentDateTime, lastEntryForFunction, anomalyFunction, datasetConfig, dateTimeZone);
LOG.info("Function: {} Dataset: {} Creating {} new entries {}", functionId, dataset, newEntries.size(), newEntries);
// create these entries
for (Entry<String, Long> entry : newEntries.entrySet()) {
DetectionStatusDTO detectionStatus = new DetectionStatusDTO();
detectionStatus.setDataset(anomalyFunction.getCollection());
detectionStatus.setFunctionId(functionId);
detectionStatus.setDateToCheckInSDF(entry.getKey());
detectionStatus.setDateToCheckInMS(entry.getValue());
DAO_REGISTRY.getDetectionStatusDAO().save(detectionStatus);
}
// find all entries in the past 3 days, which are still isRun = false
List<DetectionStatusDTO> entriesInLast3Days = DAO_REGISTRY.getDetectionStatusDAO().findAllInTimeRangeForFunctionAndDetectionRun(currentDateTime.minusDays(3).getMillis(), currentDateTime.getMillis(), functionId, false);
Collections.sort(entriesInLast3Days);
LOG.info("Function: {} Dataset: {} Entries in last 3 days {}", functionId, dataset, entriesInLast3Days);
// for each entry, collect startTime and endTime
List<Long> startTimes = new ArrayList<>();
List<Long> endTimes = new ArrayList<>();
List<DetectionStatusDTO> detectionStatusToUpdate = new ArrayList<>();
for (DetectionStatusDTO detectionStatus : entriesInLast3Days) {
try {
LOG.info("Function: {} Dataset: {} Entry : {}", functionId, dataset, detectionStatus);
long dateToCheck = detectionStatus.getDateToCheckInMS();
// check availability for monitoring window - delay
long endTime = dateToCheck - TimeUnit.MILLISECONDS.convert(anomalyFunction.getWindowDelay(), anomalyFunction.getWindowDelayUnit());
long startTime = endTime - TimeUnit.MILLISECONDS.convert(anomalyFunction.getWindowSize(), anomalyFunction.getWindowUnit());
LOG.info("Function: {} Dataset: {} Checking start:{} {} to end:{} {}", functionId, dataset, startTime, new DateTime(startTime, dateTimeZone), endTime, new DateTime(endTime, dateTimeZone));
boolean pass = checkIfDetectionRunCriteriaMet(startTime, endTime, datasetConfig, anomalyFunction);
if (pass) {
startTimes.add(startTime);
endTimes.add(endTime);
detectionStatusToUpdate.add(detectionStatus);
} else {
LOG.warn("Function: {} Dataset: {} Data incomplete for monitoring window {} ({}) to {} ({}), skipping anomaly detection", functionId, dataset, startTime, new DateTime(startTime), endTime, new DateTime(endTime));
// TODO: Send email to owners/dev team
}
} catch (Exception e) {
LOG.error("Function: {} Dataset: {} Exception in preparing entry {}", functionId, dataset, detectionStatus, e);
}
}
// If any time periods found, for which detection needs to be run
runAnomalyFunctionAndUpdateDetectionStatus(startTimes, endTimes, anomalyFunction, detectionStatusToUpdate);
} catch (Exception e) {
LOG.error("Function: {} Dataset: {} Exception in running anomaly function {}", anomalyFunction.getId(), anomalyFunction.getCollection(), anomalyFunction, e);
}
}
}
use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.
the class DetectionTaskRunner method setupTask.
private void setupTask(TaskInfo taskInfo, TaskContext taskContext) throws Exception {
DetectionTaskInfo detectionTaskInfo = (DetectionTaskInfo) taskInfo;
windowStarts = detectionTaskInfo.getWindowStartTime();
windowEnds = detectionTaskInfo.getWindowEndTime();
anomalyFunctionSpec = detectionTaskInfo.getAnomalyFunctionSpec();
jobExecutionId = detectionTaskInfo.getJobExecutionId();
anomalyFunctionFactory = taskContext.getAnomalyFunctionFactory();
anomalyFunction = anomalyFunctionFactory.fromSpec(anomalyFunctionSpec);
String dataset = anomalyFunctionSpec.getCollection();
DatasetConfigDTO datasetConfig = DAO_REGISTRY.getDatasetConfigDAO().findByDataset(dataset);
if (datasetConfig == null) {
LOG.error("Dataset [" + dataset + "] is not found");
throw new NullArgumentException("Dataset [" + dataset + "] is not found with function : " + anomalyFunctionSpec.toString());
}
collectionDimensions = datasetConfig.getDimensions();
LOG.info("Running anomaly detection job with metricFunction: [{}], topic metric [{}], collection: [{}]", anomalyFunctionSpec.getFunctionName(), anomalyFunctionSpec.getTopicMetric(), anomalyFunctionSpec.getCollection());
}
Aggregations