use of com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO in project pinot by linkedin.
the class TestDataCompletenessConfigManager method testDelete.
@Test(dependsOnMethods = { "testUpdate" })
public void testDelete() {
dataCompletenessConfigDAO.deleteById(dataCompletenessConfigId2);
DataCompletenessConfigDTO dataCompletenessConfigDTO = dataCompletenessConfigDAO.findById(dataCompletenessConfigId2);
Assert.assertNull(dataCompletenessConfigDTO);
}
use of com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO in project pinot by linkedin.
the class DetectionJobScheduler method checkIfDetectionRunCriteriaMet.
/**
* Checks if a time range for a dataset meets data completeness criteria
* @param startTime
* @param endTime
* @param datasetConfig
* @param anomalyFunction
* @return true if data completeness check is requested and passes, or data completeness check is not requested at all
* false if data completeness check is requested and fails
*/
private boolean checkIfDetectionRunCriteriaMet(Long startTime, Long endTime, DatasetConfigDTO datasetConfig, AnomalyFunctionDTO anomalyFunction) {
boolean pass = false;
String dataset = datasetConfig.getDataset();
/**
* Check is completeness check required is set at dataset level. That flag is false by default, so user will set as needed
* Check also for same flag in function level. That flag is true by default, so dataset config's flag will have its way unless user has tampered with this flag
* This flag would typically be unset, in backfill cases
*/
if (datasetConfig.isRequiresCompletenessCheck() && anomalyFunction.isRequiresCompletenessCheck()) {
LOG.info("Function: {} Dataset: {} Checking for completeness of time range {}({}) to {}({})", anomalyFunction.getId(), dataset, startTime, new DateTime(startTime), endTime, new DateTime(endTime));
List<DataCompletenessConfigDTO> incompleteTimePeriods = DAO_REGISTRY.getDataCompletenessConfigDAO().findAllByDatasetAndInTimeRangeAndStatus(dataset, startTime, endTime, false);
LOG.info("Function: {} Dataset: {} Incomplete periods {}", anomalyFunction.getId(), dataset, incompleteTimePeriods);
if (incompleteTimePeriods.size() == 0) {
// nothing incomplete
// find complete buckets
List<DataCompletenessConfigDTO> completeTimePeriods = DAO_REGISTRY.getDataCompletenessConfigDAO().findAllByDatasetAndInTimeRangeAndStatus(dataset, startTime, endTime, true);
LOG.info("Function: {} Dataset: {} Complete periods {}", anomalyFunction.getId(), dataset, completeTimePeriods);
long expectedCompleteBuckets = DetectionJobSchedulerUtils.getExpectedCompleteBuckets(datasetConfig, startTime, endTime);
LOG.info("Function: {} Dataset: {} Num complete periods: {} Expected num buckets:{}", anomalyFunction.getId(), dataset, completeTimePeriods.size(), expectedCompleteBuckets);
if (completeTimePeriods.size() == expectedCompleteBuckets) {
// complete matches expected
LOG.info("Function: {} Dataset: {} Found complete time range {}({}) to {}({})", anomalyFunction.getId(), dataset, startTime, new DateTime(startTime), endTime, new DateTime(endTime));
pass = true;
}
}
} else {
// no check required
pass = true;
}
return pass;
}
use of com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO in project pinot by linkedin.
the class DataCompletenessTaskRunner method runCompletenessCheck.
/**
* Checks completeness for each bucket of every dataset and updates the completeness percentage in the database
* @param dataset
* @param bucketNameToBucketValueMS
* @param bucketNameToCount
* @param dataCompletenessAlgorithm
* @param expectedCompleteness
*/
private void runCompletenessCheck(String dataset, Map<String, Long> bucketNameToBucketValueMS, Map<String, Long> bucketNameToCount, DataCompletenessAlgorithm dataCompletenessAlgorithm, Double expectedCompleteness) {
for (Entry<String, Long> entry : bucketNameToBucketValueMS.entrySet()) {
String bucketName = entry.getKey();
Long bucketValue = entry.getValue();
Long currentCount = bucketNameToCount.getOrDefault(bucketName, 0L);
LOG.info("Bucket name:{} Current count:{}", bucketName, currentCount);
// get baseline counts for this bucket
List<Long> baselineCounts = dataCompletenessAlgorithm.getBaselineCounts(dataset, bucketValue);
LOG.info("Baseline counts:{}", baselineCounts);
// call api with counts, algo, expectation
double percentComplete = dataCompletenessAlgorithm.getPercentCompleteness(baselineCounts, currentCount);
LOG.info("Percent complete:{}", percentComplete);
// calculate if data is complete
boolean dataComplete = dataCompletenessAlgorithm.isDataComplete(percentComplete, expectedCompleteness);
LOG.info("IsDataComplete:{}", dataComplete);
// update count, dataComplete, percentComplete, numAttempts in database
DataCompletenessConfigDTO configToUpdate = DAO_REGISTRY.getDataCompletenessConfigDAO().findByDatasetAndDateSDF(dataset, bucketName);
configToUpdate.setCountStar(currentCount);
configToUpdate.setDataComplete(dataComplete);
configToUpdate.setPercentComplete(Double.parseDouble(new DecimalFormat("##.##").format(percentComplete)));
configToUpdate.setNumAttempts(configToUpdate.getNumAttempts() + 1);
DAO_REGISTRY.getDataCompletenessConfigDAO().update(configToUpdate);
LOG.info("Updated data completeness config id:{} with count *:{} dataComplete:{} percentComplete:{} " + "and numAttempts:{}", configToUpdate.getId(), configToUpdate.getCountStar(), configToUpdate.isDataComplete(), configToUpdate.getPercentComplete(), configToUpdate.getNumAttempts());
}
}
use of com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO in project pinot by linkedin.
the class Wo4WAvgDataCompletenessAlgorithm method computeBaselineCountsIfNotPresent.
@Override
public void computeBaselineCountsIfNotPresent(String dataset, Map<String, Long> bucketNameToBucketValueMS, DateTimeFormatter dateTimeFormatter, TimeSpec timeSpec, DateTimeZone zone) {
// look for the past 4 weeks
for (int i = 0; i < 4; i++) {
Period baselineOffsetPeriod = new Period(0, 0, 0, 7 * (i + 1), 0, 0, 0, 0);
LOG.info("Checking for {} week ago for dataset {}", (i + 1), dataset);
// check if baseline is present in database
Map<String, Long> baselineBucketNameToBucketValueMS = new HashMap<>();
for (Entry<String, Long> entry : bucketNameToBucketValueMS.entrySet()) {
DateTime bucketValueDateTime = new DateTime(entry.getValue(), zone);
Long baselineBucketValueMS = bucketValueDateTime.minus(baselineOffsetPeriod).getMillis();
String baselineBucketName = dateTimeFormatter.print(baselineBucketValueMS);
DataCompletenessConfigDTO configDTO = dataCompletenessConfigDAO.findByDatasetAndDateSDF(dataset, baselineBucketName);
if (configDTO == null) {
baselineBucketNameToBucketValueMS.put(baselineBucketName, baselineBucketValueMS);
}
}
// for all baseline values not present in database, fetch their counts, and update in database
LOG.info("Missing baseline buckets {} for dataset {}", baselineBucketNameToBucketValueMS.keySet(), dataset);
if (!baselineBucketNameToBucketValueMS.isEmpty()) {
Map<String, Long> baselineCountsForBuckets = DataCompletenessTaskUtils.getCountsForBucketsOfDataset(dataset, timeSpec, baselineBucketNameToBucketValueMS);
LOG.info("Baseline bucket counts {}", baselineCountsForBuckets);
for (Entry<String, Long> entry : baselineCountsForBuckets.entrySet()) {
String baselineBucketName = entry.getKey();
Long baselineBucketCount = entry.getValue();
Long baselineBucketValueMS = baselineBucketNameToBucketValueMS.get(baselineBucketName);
DataCompletenessConfigDTO createBaselineConfig = new DataCompletenessConfigDTO();
createBaselineConfig.setDataset(dataset);
createBaselineConfig.setDateToCheckInSDF(baselineBucketName);
createBaselineConfig.setDateToCheckInMS(baselineBucketValueMS);
createBaselineConfig.setCountStar(baselineBucketCount);
dataCompletenessConfigDAO.save(createBaselineConfig);
}
LOG.info("Saved {} number of baseline counts in database for dataset {}", baselineCountsForBuckets.size(), dataset);
}
}
}
use of com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO in project pinot by linkedin.
the class DataCompletenessConfigManagerImpl method findByDatasetAndDateMS.
@Override
public DataCompletenessConfigDTO findByDatasetAndDateMS(String dataset, Long dateToCheckInMS) {
Predicate predicate = Predicate.AND(Predicate.EQ("dataset", dataset), Predicate.EQ("dateToCheckInMS", dateToCheckInMS));
List<DataCompletenessConfigBean> list = genericPojoDao.get(predicate, DataCompletenessConfigBean.class);
DataCompletenessConfigDTO result = null;
if (CollectionUtils.isNotEmpty(list)) {
result = MODEL_MAPPER.map(list.get(0), DataCompletenessConfigDTO.class);
}
return result;
}
Aggregations