use of com.linkedin.thirdeye.completeness.checker.DataCompletenessConstants.DataCompletenessAlgorithmName in project pinot by linkedin.
the class DataCompletenessResource method getPercentCompleteness.
@GET
@Path(value = "/percent-completeness")
@Produces(MediaType.APPLICATION_JSON)
public double getPercentCompleteness(String payload) {
PercentCompletenessFunctionInput input = PercentCompletenessFunctionInput.fromJson(payload);
DataCompletenessAlgorithmName algorithm = input.getAlgorithm();
List<Long> baselineCounts = input.getBaselineCounts();
Long currentCount = input.getCurrentCount();
double percentCompleteness = 0;
double baselineTotalCount = 0;
if (CollectionUtils.isNotEmpty(baselineCounts)) {
switch(algorithm) {
case WO4W_AVERAGE:
default:
for (Long baseline : baselineCounts) {
baselineTotalCount = baselineTotalCount + baseline;
}
baselineTotalCount = baselineTotalCount / baselineCounts.size();
break;
}
}
if (baselineTotalCount != 0) {
percentCompleteness = new Double(currentCount * 100) / baselineTotalCount;
}
if (baselineTotalCount == 0 && currentCount != 0) {
percentCompleteness = 100;
}
return percentCompleteness;
}
use of com.linkedin.thirdeye.completeness.checker.DataCompletenessConstants.DataCompletenessAlgorithmName in project pinot by linkedin.
the class DataCompletenessTaskRunner method executeCheckerTask.
/**
* Performs data completeness check on all datasets, for past LOOKBACK time, and records the information in database
* @param dataCompletenessTaskInfo
*/
private void executeCheckerTask(DataCompletenessTaskInfo dataCompletenessTaskInfo) {
LOG.info("Execute data completeness checker task {}", dataCompletenessTaskInfo);
try {
List<String> datasets = dataCompletenessTaskInfo.getDatasetsToCheck();
LOG.info("Datasets {}", datasets);
// get start and end time
long dataCompletenessStartTime = dataCompletenessTaskInfo.getDataCompletenessStartTime();
long dataCompletenessEndTime = dataCompletenessTaskInfo.getDataCompletenessEndTime();
LOG.info("StartTime {} i.e. {}", dataCompletenessStartTime, new DateTime(dataCompletenessStartTime));
LOG.info("EndTime {} i.e. {}", dataCompletenessEndTime, new DateTime(dataCompletenessEndTime));
for (String dataset : datasets) {
try {
DatasetConfigDTO datasetConfig = DAO_REGISTRY.getDatasetConfigDAO().findByDataset(dataset);
LOG.info("Dataset {} {}", dataset, datasetConfig);
// TODO: get this from datasetConfig
//DataCompletenessAlgorithmName algorithmName = datasetConfig.getDataCompletenessAlgorithmName();
DataCompletenessAlgorithmName algorithmName = DataCompletenessAlgorithmName.WO4W_AVERAGE;
// TODO: get this from datasetConfig
// Double expectedCompleteness = datasetConfig.getExpectedCompleteness();
Double expectedCompleteness = null;
DataCompletenessAlgorithm dataCompletenessAlgorithm = DataCompletenessAlgorithmFactory.getDataCompletenessAlgorithmFromName(algorithmName);
LOG.info("DataCompletenessAlgorithmClass: {}", algorithmName);
// get adjusted start time, bucket size and date time formatter, according to dataset granularity
TimeSpec timeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
DateTimeZone dateTimeZone = Utils.getDataTimeZone(dataset);
long adjustedStart = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, dataCompletenessStartTime, dateTimeZone);
long adjustedEnd = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, dataCompletenessEndTime, dateTimeZone);
long bucketSize = DataCompletenessTaskUtils.getBucketSizeInMSForDataset(timeSpec);
DateTimeFormatter dateTimeFormatter = DataCompletenessTaskUtils.getDateTimeFormatterForDataset(timeSpec, dateTimeZone);
LOG.info("Adjusted start:{} i.e. {} Adjusted end:{} i.e. {} and Bucket size:{}", adjustedStart, new DateTime(adjustedStart), adjustedEnd, new DateTime(adjustedEnd), bucketSize);
// get buckets to process
Map<String, Long> bucketNameToBucketValueMS = getBucketsToProcess(dataset, adjustedStart, adjustedEnd, dataCompletenessAlgorithm, dateTimeFormatter, bucketSize);
LOG.info("Got {} buckets to process", bucketNameToBucketValueMS.size());
if (!bucketNameToBucketValueMS.isEmpty()) {
// create current entries in database if not already present
int numEntriesCreated = createEntriesInDatabaseIfNotPresent(dataset, bucketNameToBucketValueMS);
LOG.info("Created {} new entries in database", numEntriesCreated);
// coldstart: compute and store in db the counts for baseline, if not already present
LOG.info("Checking for baseline counts in database, or fetching them if not present");
dataCompletenessAlgorithm.computeBaselineCountsIfNotPresent(dataset, bucketNameToBucketValueMS, dateTimeFormatter, timeSpec, dateTimeZone);
// get current counts for all current buckets to process
Map<String, Long> bucketNameToCount = DataCompletenessTaskUtils.getCountsForBucketsOfDataset(dataset, timeSpec, bucketNameToBucketValueMS);
LOG.info("Bucket name to count {}", bucketNameToCount);
// run completeness check for all buckets
runCompletenessCheck(dataset, bucketNameToBucketValueMS, bucketNameToCount, dataCompletenessAlgorithm, expectedCompleteness);
}
} catch (Exception e) {
LOG.error("Exception in data completeness checker task for dataset {}.. Continuing with remaining datasets", dataset, e);
}
}
} catch (Exception e) {
LOG.error("Exception in data completeness checker task", e);
}
}
Aggregations