use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.
the class EmailHelper method writeTimeSeriesChart.
public static String writeTimeSeriesChart(final EmailConfigurationDTO config, TimeOnTimeComparisonHandler timeOnTimeComparisonHandler, final DateTime now, final DateTime then, final String collection, final Map<RawAnomalyResultDTO, String> anomaliesWithLabels) throws JobExecutionException {
try {
int windowSize = config.getWindowSize();
TimeUnit windowUnit = config.getWindowUnit();
long windowMillis = windowUnit.toMillis(windowSize);
// TODO provide a way for email reports to specify desired graph granularity.
DatasetConfigManager datasetConfigDAO = DAO_REGISTRY.getDatasetConfigDAO();
DatasetConfigDTO datasetConfig = datasetConfigDAO.findByDataset(collection);
TimeSpec timespec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
TimeGranularity dataGranularity = timespec.getDataGranularity();
TimeOnTimeComparisonResponse chartData = getData(timeOnTimeComparisonHandler, config, then, now, WEEK_MILLIS, dataGranularity);
AnomalyGraphGenerator anomalyGraphGenerator = AnomalyGraphGenerator.getInstance();
JFreeChart chart = anomalyGraphGenerator.createChart(chartData, dataGranularity, windowMillis, anomaliesWithLabels);
String chartFilePath = EMAIL_REPORT_CHART_PREFIX + config.getId() + PNG;
LOG.info("Writing chart to {}", chartFilePath);
anomalyGraphGenerator.writeChartToFile(chart, chartFilePath);
return chartFilePath;
} catch (Exception e) {
throw new JobExecutionException(e);
}
}
use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.
the class DetectionJobSchedulerUtils method getBucketSizeInMSForDataset.
/**
* get bucket size in millis, according to data granularity of dataset
* Bucket size are 1 HOUR for hourly, 1 DAY for daily
* For MINUTE level data, bucket size is calculated based on anomaly function frequency
* @param timeSpec
* @return
*/
public static long getBucketSizeInMSForDataset(DatasetConfigDTO datasetConfig, AnomalyFunctionDTO anomalyFunction) {
long bucketMillis = 0;
TimeSpec timeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
TimeUnit dataUnit = timeSpec.getDataGranularity().getUnit();
TimeGranularity functionFrequency = anomalyFunction.getFrequency();
// Calculate time periods according to the function frequency
if (dataUnit.equals(TimeUnit.MINUTES) || dataUnit.equals(TimeUnit.MILLISECONDS) || dataUnit.equals(TimeUnit.SECONDS)) {
if (functionFrequency.getUnit().equals(TimeUnit.MINUTES) && (functionFrequency.getSize() <= 30)) {
bucketMillis = TimeUnit.MILLISECONDS.convert(functionFrequency.getSize(), TimeUnit.MINUTES);
} else {
// default to HOURS
bucketMillis = TimeUnit.MILLISECONDS.convert(1, TimeUnit.HOURS);
}
} else {
bucketMillis = TimeUnit.MILLISECONDS.convert(1, dataUnit);
}
return bucketMillis;
}
use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.
the class DetectionJobSchedulerUtils method getBoundaryAlignedTimeForDataset.
/**
* round this time to earlier boundary, depending on granularity of dataset
* e.g. 12:15pm on HOURLY dataset should be treated as 12pm
* any dataset with granularity finer than HOUR, will be rounded as per function frequency (assumption is that this is in MINUTES)
* so 12.53 on 5 MINUTES dataset, with function frequency 15 MINUTES will be rounded to 12.45
* @param anomalyFunction
* @param timeSpec
* @param dataCompletenessStartTime
* @param dateTimeZone
* @return
*/
public static long getBoundaryAlignedTimeForDataset(DatasetConfigDTO datasetConfig, DateTime dateTime, AnomalyFunctionDTO anomalyFunction) {
TimeSpec timeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
TimeUnit dataUnit = timeSpec.getDataGranularity().getUnit();
TimeGranularity functionFrequency = anomalyFunction.getFrequency();
// Calculate time periods according to the function frequency
if (dataUnit.equals(TimeUnit.MINUTES) || dataUnit.equals(TimeUnit.MILLISECONDS) || dataUnit.equals(TimeUnit.SECONDS)) {
if (functionFrequency.getUnit().equals(TimeUnit.MINUTES) && (functionFrequency.getSize() <= 30)) {
int minuteBucketSize = functionFrequency.getSize();
int roundedMinutes = (dateTime.getMinuteOfHour() / minuteBucketSize) * minuteBucketSize;
dateTime = dateTime.withTime(dateTime.getHourOfDay(), roundedMinutes, 0, 0);
} else {
// default to HOURS
dateTime = getBoundaryAlignedTimeForDataset(dateTime, TimeUnit.HOURS);
}
} else {
dateTime = getBoundaryAlignedTimeForDataset(dateTime, dataUnit);
}
return dateTime.getMillis();
}
use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.
the class DataCompletenessTaskRunner method executeCheckerTask.
/**
* Performs data completeness check on all datasets, for past LOOKBACK time, and records the information in database
* @param dataCompletenessTaskInfo
*/
private void executeCheckerTask(DataCompletenessTaskInfo dataCompletenessTaskInfo) {
LOG.info("Execute data completeness checker task {}", dataCompletenessTaskInfo);
try {
List<String> datasets = dataCompletenessTaskInfo.getDatasetsToCheck();
LOG.info("Datasets {}", datasets);
// get start and end time
long dataCompletenessStartTime = dataCompletenessTaskInfo.getDataCompletenessStartTime();
long dataCompletenessEndTime = dataCompletenessTaskInfo.getDataCompletenessEndTime();
LOG.info("StartTime {} i.e. {}", dataCompletenessStartTime, new DateTime(dataCompletenessStartTime));
LOG.info("EndTime {} i.e. {}", dataCompletenessEndTime, new DateTime(dataCompletenessEndTime));
for (String dataset : datasets) {
try {
DatasetConfigDTO datasetConfig = DAO_REGISTRY.getDatasetConfigDAO().findByDataset(dataset);
LOG.info("Dataset {} {}", dataset, datasetConfig);
// TODO: get this from datasetConfig
//DataCompletenessAlgorithmName algorithmName = datasetConfig.getDataCompletenessAlgorithmName();
DataCompletenessAlgorithmName algorithmName = DataCompletenessAlgorithmName.WO4W_AVERAGE;
// TODO: get this from datasetConfig
// Double expectedCompleteness = datasetConfig.getExpectedCompleteness();
Double expectedCompleteness = null;
DataCompletenessAlgorithm dataCompletenessAlgorithm = DataCompletenessAlgorithmFactory.getDataCompletenessAlgorithmFromName(algorithmName);
LOG.info("DataCompletenessAlgorithmClass: {}", algorithmName);
// get adjusted start time, bucket size and date time formatter, according to dataset granularity
TimeSpec timeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
DateTimeZone dateTimeZone = Utils.getDataTimeZone(dataset);
long adjustedStart = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, dataCompletenessStartTime, dateTimeZone);
long adjustedEnd = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, dataCompletenessEndTime, dateTimeZone);
long bucketSize = DataCompletenessTaskUtils.getBucketSizeInMSForDataset(timeSpec);
DateTimeFormatter dateTimeFormatter = DataCompletenessTaskUtils.getDateTimeFormatterForDataset(timeSpec, dateTimeZone);
LOG.info("Adjusted start:{} i.e. {} Adjusted end:{} i.e. {} and Bucket size:{}", adjustedStart, new DateTime(adjustedStart), adjustedEnd, new DateTime(adjustedEnd), bucketSize);
// get buckets to process
Map<String, Long> bucketNameToBucketValueMS = getBucketsToProcess(dataset, adjustedStart, adjustedEnd, dataCompletenessAlgorithm, dateTimeFormatter, bucketSize);
LOG.info("Got {} buckets to process", bucketNameToBucketValueMS.size());
if (!bucketNameToBucketValueMS.isEmpty()) {
// create current entries in database if not already present
int numEntriesCreated = createEntriesInDatabaseIfNotPresent(dataset, bucketNameToBucketValueMS);
LOG.info("Created {} new entries in database", numEntriesCreated);
// coldstart: compute and store in db the counts for baseline, if not already present
LOG.info("Checking for baseline counts in database, or fetching them if not present");
dataCompletenessAlgorithm.computeBaselineCountsIfNotPresent(dataset, bucketNameToBucketValueMS, dateTimeFormatter, timeSpec, dateTimeZone);
// get current counts for all current buckets to process
Map<String, Long> bucketNameToCount = DataCompletenessTaskUtils.getCountsForBucketsOfDataset(dataset, timeSpec, bucketNameToBucketValueMS);
LOG.info("Bucket name to count {}", bucketNameToCount);
// run completeness check for all buckets
runCompletenessCheck(dataset, bucketNameToBucketValueMS, bucketNameToCount, dataCompletenessAlgorithm, expectedCompleteness);
}
} catch (Exception e) {
LOG.error("Exception in data completeness checker task for dataset {}.. Continuing with remaining datasets", dataset, e);
}
}
} catch (Exception e) {
LOG.error("Exception in data completeness checker task", e);
}
}
use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.
the class AnomalyResource method updateAnomalyFunction.
// Edit anomaly function
@POST
@Path("/anomaly-function/update")
public Response updateAnomalyFunction(@NotNull @QueryParam("id") Long id, @NotNull @QueryParam("dataset") String dataset, @NotNull @QueryParam("functionName") String functionName, @NotNull @QueryParam("metric") String metric, @QueryParam("type") String type, @NotNull @QueryParam("windowSize") String windowSize, @NotNull @QueryParam("windowUnit") String windowUnit, @NotNull @QueryParam("windowDelay") String windowDelay, @QueryParam("cron") String cron, @QueryParam("windowDelayUnit") String windowDelayUnit, @QueryParam("exploreDimension") String exploreDimensions, @QueryParam("filters") String filters, @NotNull @QueryParam("properties") String properties, @QueryParam("isActive") boolean isActive) throws Exception {
if (id == null || StringUtils.isEmpty(dataset) || StringUtils.isEmpty(functionName) || StringUtils.isEmpty(metric) || StringUtils.isEmpty(windowSize) || StringUtils.isEmpty(windowUnit) || StringUtils.isEmpty(windowDelay) || StringUtils.isEmpty(properties)) {
throw new UnsupportedOperationException("Received null for one of the mandatory params: " + "id " + id + ",dataset " + dataset + ", functionName " + functionName + ", metric " + metric + ", windowSize " + windowSize + ", windowUnit " + windowUnit + ", windowDelay " + windowDelay + ", properties" + properties);
}
AnomalyFunctionDTO anomalyFunctionSpec = anomalyFunctionDAO.findById(id);
if (anomalyFunctionSpec == null) {
throw new IllegalStateException("AnomalyFunctionSpec with id " + id + " does not exist");
}
DatasetConfigDTO datasetConfig = DAO_REGISTRY.getDatasetConfigDAO().findByDataset(dataset);
TimeSpec timespec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
TimeGranularity dataGranularity = timespec.getDataGranularity();
anomalyFunctionSpec.setActive(isActive);
anomalyFunctionSpec.setCollection(dataset);
anomalyFunctionSpec.setFunctionName(functionName);
anomalyFunctionSpec.setTopicMetric(metric);
anomalyFunctionSpec.setMetrics(Arrays.asList(metric));
if (StringUtils.isEmpty(type)) {
type = DEFAULT_FUNCTION_TYPE;
}
anomalyFunctionSpec.setType(type);
anomalyFunctionSpec.setWindowSize(Integer.valueOf(windowSize));
anomalyFunctionSpec.setWindowUnit(TimeUnit.valueOf(windowUnit));
// bucket size and unit are defaulted to the collection granularity
anomalyFunctionSpec.setBucketSize(dataGranularity.getSize());
anomalyFunctionSpec.setBucketUnit(dataGranularity.getUnit());
if (!StringUtils.isBlank(filters)) {
filters = URLDecoder.decode(filters, UTF8);
String filterString = ThirdEyeUtils.getSortedFiltersFromJson(filters);
anomalyFunctionSpec.setFilters(filterString);
}
anomalyFunctionSpec.setProperties(properties);
if (StringUtils.isNotEmpty(exploreDimensions)) {
// Ensure that the explore dimension names are ordered as schema dimension names
anomalyFunctionSpec.setExploreDimensions(getDimensions(dataset, exploreDimensions));
}
if (StringUtils.isEmpty(cron)) {
cron = DEFAULT_CRON;
} else {
// validate cron
if (!CronExpression.isValidExpression(cron)) {
throw new IllegalArgumentException("Invalid cron expression for cron : " + cron);
}
}
anomalyFunctionSpec.setCron(cron);
anomalyFunctionDAO.update(anomalyFunctionSpec);
return Response.ok(id).build();
}
Aggregations