use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.
the class DataCompletenessJobRunner method run.
@Override
public void run() {
DateTime now = new DateTime();
long checkDurationEndTime = now.getMillis();
long checkDurationStartTime = now.minus(TimeUnit.MILLISECONDS.convert(DataCompletenessConstants.LOOKBACK_TIME_DURATION, DataCompletenessConstants.LOOKBACK_TIMEUNIT)).getMillis();
String checkerEndTime = dateTimeFormatter.print(checkDurationEndTime);
String checkerStartTime = dateTimeFormatter.print(checkDurationStartTime);
String jobName = String.format("%s-%s-%s", TaskType.DATA_COMPLETENESS.toString(), checkerStartTime, checkerEndTime);
dataCompletenessJobContext.setCheckDurationStartTime(checkDurationStartTime);
dataCompletenessJobContext.setCheckDurationEndTime(checkDurationEndTime);
dataCompletenessJobContext.setJobName(jobName);
Set<String> datasetsToCheck = new HashSet<>();
for (AnomalyFunctionDTO anomalyFunction : DAO_REGISTRY.getAnomalyFunctionDAO().findAllActiveFunctions()) {
datasetsToCheck.add(anomalyFunction.getCollection());
}
for (DatasetConfigDTO datasetConfig : DAO_REGISTRY.getDatasetConfigDAO().findActiveRequiresCompletenessCheck()) {
datasetsToCheck.add(datasetConfig.getDataset());
}
dataCompletenessJobContext.setDatasetsToCheck(Lists.newArrayList(datasetsToCheck));
// create data completeness job
long jobExecutionId = createJob();
dataCompletenessJobContext.setJobExecutionId(jobExecutionId);
// create data completeness tasks
createTasks();
}
use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.
the class DataCompletenessTaskRunner method executeCheckerTask.
/**
* Performs data completeness check on all datasets, for past LOOKBACK time, and records the information in database
* @param dataCompletenessTaskInfo
*/
private void executeCheckerTask(DataCompletenessTaskInfo dataCompletenessTaskInfo) {
LOG.info("Execute data completeness checker task {}", dataCompletenessTaskInfo);
try {
List<String> datasets = dataCompletenessTaskInfo.getDatasetsToCheck();
LOG.info("Datasets {}", datasets);
// get start and end time
long dataCompletenessStartTime = dataCompletenessTaskInfo.getDataCompletenessStartTime();
long dataCompletenessEndTime = dataCompletenessTaskInfo.getDataCompletenessEndTime();
LOG.info("StartTime {} i.e. {}", dataCompletenessStartTime, new DateTime(dataCompletenessStartTime));
LOG.info("EndTime {} i.e. {}", dataCompletenessEndTime, new DateTime(dataCompletenessEndTime));
for (String dataset : datasets) {
try {
DatasetConfigDTO datasetConfig = DAO_REGISTRY.getDatasetConfigDAO().findByDataset(dataset);
LOG.info("Dataset {} {}", dataset, datasetConfig);
// TODO: get this from datasetConfig
//DataCompletenessAlgorithmName algorithmName = datasetConfig.getDataCompletenessAlgorithmName();
DataCompletenessAlgorithmName algorithmName = DataCompletenessAlgorithmName.WO4W_AVERAGE;
// TODO: get this from datasetConfig
// Double expectedCompleteness = datasetConfig.getExpectedCompleteness();
Double expectedCompleteness = null;
DataCompletenessAlgorithm dataCompletenessAlgorithm = DataCompletenessAlgorithmFactory.getDataCompletenessAlgorithmFromName(algorithmName);
LOG.info("DataCompletenessAlgorithmClass: {}", algorithmName);
// get adjusted start time, bucket size and date time formatter, according to dataset granularity
TimeSpec timeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
DateTimeZone dateTimeZone = Utils.getDataTimeZone(dataset);
long adjustedStart = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, dataCompletenessStartTime, dateTimeZone);
long adjustedEnd = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, dataCompletenessEndTime, dateTimeZone);
long bucketSize = DataCompletenessTaskUtils.getBucketSizeInMSForDataset(timeSpec);
DateTimeFormatter dateTimeFormatter = DataCompletenessTaskUtils.getDateTimeFormatterForDataset(timeSpec, dateTimeZone);
LOG.info("Adjusted start:{} i.e. {} Adjusted end:{} i.e. {} and Bucket size:{}", adjustedStart, new DateTime(adjustedStart), adjustedEnd, new DateTime(adjustedEnd), bucketSize);
// get buckets to process
Map<String, Long> bucketNameToBucketValueMS = getBucketsToProcess(dataset, adjustedStart, adjustedEnd, dataCompletenessAlgorithm, dateTimeFormatter, bucketSize);
LOG.info("Got {} buckets to process", bucketNameToBucketValueMS.size());
if (!bucketNameToBucketValueMS.isEmpty()) {
// create current entries in database if not already present
int numEntriesCreated = createEntriesInDatabaseIfNotPresent(dataset, bucketNameToBucketValueMS);
LOG.info("Created {} new entries in database", numEntriesCreated);
// coldstart: compute and store in db the counts for baseline, if not already present
LOG.info("Checking for baseline counts in database, or fetching them if not present");
dataCompletenessAlgorithm.computeBaselineCountsIfNotPresent(dataset, bucketNameToBucketValueMS, dateTimeFormatter, timeSpec, dateTimeZone);
// get current counts for all current buckets to process
Map<String, Long> bucketNameToCount = DataCompletenessTaskUtils.getCountsForBucketsOfDataset(dataset, timeSpec, bucketNameToBucketValueMS);
LOG.info("Bucket name to count {}", bucketNameToCount);
// run completeness check for all buckets
runCompletenessCheck(dataset, bucketNameToBucketValueMS, bucketNameToCount, dataCompletenessAlgorithm, expectedCompleteness);
}
} catch (Exception e) {
LOG.error("Exception in data completeness checker task for dataset {}.. Continuing with remaining datasets", dataset, e);
}
}
} catch (Exception e) {
LOG.error("Exception in data completeness checker task", e);
}
}
use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.
the class Utils method getNonAdditiveTimeGranularity.
public static TimeGranularity getNonAdditiveTimeGranularity(String collection) {
DatasetConfigDTO datasetConfig;
try {
datasetConfig = CACHE_REGISTRY.getDatasetConfigCache().get(collection);
Integer nonAdditiveBucketSize = datasetConfig.getNonAdditiveBucketSize();
String nonAdditiveBucketUnit = datasetConfig.getNonAdditiveBucketUnit();
if (nonAdditiveBucketSize != null && nonAdditiveBucketUnit != null) {
TimeGranularity timeGranularity = new TimeGranularity(datasetConfig.getNonAdditiveBucketSize(), TimeUnit.valueOf(datasetConfig.getNonAdditiveBucketUnit()));
return timeGranularity;
}
} catch (ExecutionException e) {
LOG.info("Exception in fetching non additive time granularity");
}
return null;
}
use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.
the class Utils method getDataTimeZone.
/*
* This method returns the time zone of the data in this collection
*/
public static DateTimeZone getDataTimeZone(String collection) {
String timezone = TimeSpec.DEFAULT_TIMEZONE;
try {
DatasetConfigDTO datasetConfig = CACHE_REGISTRY.getDatasetConfigCache().get(collection);
timezone = datasetConfig.getTimezone();
} catch (ExecutionException e) {
LOG.error("Exception while getting dataset config for {}", collection);
}
return DateTimeZone.forID(timezone);
}
use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.
the class AnomalyResource method updateAnomalyFunction.
// Edit anomaly function
@POST
@Path("/anomaly-function/update")
public Response updateAnomalyFunction(@NotNull @QueryParam("id") Long id, @NotNull @QueryParam("dataset") String dataset, @NotNull @QueryParam("functionName") String functionName, @NotNull @QueryParam("metric") String metric, @QueryParam("type") String type, @NotNull @QueryParam("windowSize") String windowSize, @NotNull @QueryParam("windowUnit") String windowUnit, @NotNull @QueryParam("windowDelay") String windowDelay, @QueryParam("cron") String cron, @QueryParam("windowDelayUnit") String windowDelayUnit, @QueryParam("exploreDimension") String exploreDimensions, @QueryParam("filters") String filters, @NotNull @QueryParam("properties") String properties, @QueryParam("isActive") boolean isActive) throws Exception {
if (id == null || StringUtils.isEmpty(dataset) || StringUtils.isEmpty(functionName) || StringUtils.isEmpty(metric) || StringUtils.isEmpty(windowSize) || StringUtils.isEmpty(windowUnit) || StringUtils.isEmpty(windowDelay) || StringUtils.isEmpty(properties)) {
throw new UnsupportedOperationException("Received null for one of the mandatory params: " + "id " + id + ",dataset " + dataset + ", functionName " + functionName + ", metric " + metric + ", windowSize " + windowSize + ", windowUnit " + windowUnit + ", windowDelay " + windowDelay + ", properties" + properties);
}
AnomalyFunctionDTO anomalyFunctionSpec = anomalyFunctionDAO.findById(id);
if (anomalyFunctionSpec == null) {
throw new IllegalStateException("AnomalyFunctionSpec with id " + id + " does not exist");
}
DatasetConfigDTO datasetConfig = DAO_REGISTRY.getDatasetConfigDAO().findByDataset(dataset);
TimeSpec timespec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
TimeGranularity dataGranularity = timespec.getDataGranularity();
anomalyFunctionSpec.setActive(isActive);
anomalyFunctionSpec.setCollection(dataset);
anomalyFunctionSpec.setFunctionName(functionName);
anomalyFunctionSpec.setTopicMetric(metric);
anomalyFunctionSpec.setMetrics(Arrays.asList(metric));
if (StringUtils.isEmpty(type)) {
type = DEFAULT_FUNCTION_TYPE;
}
anomalyFunctionSpec.setType(type);
anomalyFunctionSpec.setWindowSize(Integer.valueOf(windowSize));
anomalyFunctionSpec.setWindowUnit(TimeUnit.valueOf(windowUnit));
// bucket size and unit are defaulted to the collection granularity
anomalyFunctionSpec.setBucketSize(dataGranularity.getSize());
anomalyFunctionSpec.setBucketUnit(dataGranularity.getUnit());
if (!StringUtils.isBlank(filters)) {
filters = URLDecoder.decode(filters, UTF8);
String filterString = ThirdEyeUtils.getSortedFiltersFromJson(filters);
anomalyFunctionSpec.setFilters(filterString);
}
anomalyFunctionSpec.setProperties(properties);
if (StringUtils.isNotEmpty(exploreDimensions)) {
// Ensure that the explore dimension names are ordered as schema dimension names
anomalyFunctionSpec.setExploreDimensions(getDimensions(dataset, exploreDimensions));
}
if (StringUtils.isEmpty(cron)) {
cron = DEFAULT_CRON;
} else {
// validate cron
if (!CronExpression.isValidExpression(cron)) {
throw new IllegalArgumentException("Invalid cron expression for cron : " + cron);
}
}
anomalyFunctionSpec.setCron(cron);
anomalyFunctionDAO.update(anomalyFunctionSpec);
return Response.ok(id).build();
}
Aggregations