Search in sources :

Example 11 with DataCompletenessConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO in project pinot by linkedin.

the class TestDataCompletenessConfigManager method testDelete.

@Test(dependsOnMethods = { "testUpdate" })
public void testDelete() {
    dataCompletenessConfigDAO.deleteById(dataCompletenessConfigId2);
    DataCompletenessConfigDTO dataCompletenessConfigDTO = dataCompletenessConfigDAO.findById(dataCompletenessConfigId2);
    Assert.assertNull(dataCompletenessConfigDTO);
}
Also used : DataCompletenessConfigDTO(com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO) Test(org.testng.annotations.Test)

Example 12 with DataCompletenessConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO in project pinot by linkedin.

the class DetectionJobScheduler method checkIfDetectionRunCriteriaMet.

/**
   * Checks if a time range for a dataset meets data completeness criteria
   * @param startTime
   * @param endTime
   * @param datasetConfig
   * @param anomalyFunction
   * @return true if data completeness check is requested and passes, or data completeness check is not requested at all
   * false if data completeness check is requested and fails
   */
private boolean checkIfDetectionRunCriteriaMet(Long startTime, Long endTime, DatasetConfigDTO datasetConfig, AnomalyFunctionDTO anomalyFunction) {
    boolean pass = false;
    String dataset = datasetConfig.getDataset();
    /**
     * Check is completeness check required is set at dataset level. That flag is false by default, so user will set as needed
     * Check also for same flag in function level. That flag is true by default, so dataset config's flag will have its way unless user has tampered with this flag
     * This flag would typically be unset, in backfill cases
     */
    if (datasetConfig.isRequiresCompletenessCheck() && anomalyFunction.isRequiresCompletenessCheck()) {
        LOG.info("Function: {} Dataset: {} Checking for completeness of time range {}({}) to {}({})", anomalyFunction.getId(), dataset, startTime, new DateTime(startTime), endTime, new DateTime(endTime));
        List<DataCompletenessConfigDTO> incompleteTimePeriods = DAO_REGISTRY.getDataCompletenessConfigDAO().findAllByDatasetAndInTimeRangeAndStatus(dataset, startTime, endTime, false);
        LOG.info("Function: {} Dataset: {} Incomplete periods {}", anomalyFunction.getId(), dataset, incompleteTimePeriods);
        if (incompleteTimePeriods.size() == 0) {
            // nothing incomplete
            // find complete buckets
            List<DataCompletenessConfigDTO> completeTimePeriods = DAO_REGISTRY.getDataCompletenessConfigDAO().findAllByDatasetAndInTimeRangeAndStatus(dataset, startTime, endTime, true);
            LOG.info("Function: {} Dataset: {} Complete periods {}", anomalyFunction.getId(), dataset, completeTimePeriods);
            long expectedCompleteBuckets = DetectionJobSchedulerUtils.getExpectedCompleteBuckets(datasetConfig, startTime, endTime);
            LOG.info("Function: {} Dataset: {} Num complete periods: {} Expected num buckets:{}", anomalyFunction.getId(), dataset, completeTimePeriods.size(), expectedCompleteBuckets);
            if (completeTimePeriods.size() == expectedCompleteBuckets) {
                // complete matches expected
                LOG.info("Function: {} Dataset: {}  Found complete time range {}({}) to {}({})", anomalyFunction.getId(), dataset, startTime, new DateTime(startTime), endTime, new DateTime(endTime));
                pass = true;
            }
        }
    } else {
        // no check required
        pass = true;
    }
    return pass;
}
Also used : DataCompletenessConfigDTO(com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO) DateTime(org.joda.time.DateTime)

Example 13 with DataCompletenessConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO in project pinot by linkedin.

the class DataCompletenessTaskRunner method runCompletenessCheck.

/**
   * Checks completeness for each bucket of every dataset and updates the completeness percentage in the database
   * @param dataset
   * @param bucketNameToBucketValueMS
   * @param bucketNameToCount
   * @param dataCompletenessAlgorithm
   * @param expectedCompleteness
   */
private void runCompletenessCheck(String dataset, Map<String, Long> bucketNameToBucketValueMS, Map<String, Long> bucketNameToCount, DataCompletenessAlgorithm dataCompletenessAlgorithm, Double expectedCompleteness) {
    for (Entry<String, Long> entry : bucketNameToBucketValueMS.entrySet()) {
        String bucketName = entry.getKey();
        Long bucketValue = entry.getValue();
        Long currentCount = bucketNameToCount.getOrDefault(bucketName, 0L);
        LOG.info("Bucket name:{} Current count:{}", bucketName, currentCount);
        // get baseline counts for this bucket
        List<Long> baselineCounts = dataCompletenessAlgorithm.getBaselineCounts(dataset, bucketValue);
        LOG.info("Baseline counts:{}", baselineCounts);
        // call api with counts, algo, expectation
        double percentComplete = dataCompletenessAlgorithm.getPercentCompleteness(baselineCounts, currentCount);
        LOG.info("Percent complete:{}", percentComplete);
        // calculate if data is complete
        boolean dataComplete = dataCompletenessAlgorithm.isDataComplete(percentComplete, expectedCompleteness);
        LOG.info("IsDataComplete:{}", dataComplete);
        // update count, dataComplete, percentComplete, numAttempts in database
        DataCompletenessConfigDTO configToUpdate = DAO_REGISTRY.getDataCompletenessConfigDAO().findByDatasetAndDateSDF(dataset, bucketName);
        configToUpdate.setCountStar(currentCount);
        configToUpdate.setDataComplete(dataComplete);
        configToUpdate.setPercentComplete(Double.parseDouble(new DecimalFormat("##.##").format(percentComplete)));
        configToUpdate.setNumAttempts(configToUpdate.getNumAttempts() + 1);
        DAO_REGISTRY.getDataCompletenessConfigDAO().update(configToUpdate);
        LOG.info("Updated data completeness config id:{} with count *:{} dataComplete:{} percentComplete:{} " + "and numAttempts:{}", configToUpdate.getId(), configToUpdate.getCountStar(), configToUpdate.isDataComplete(), configToUpdate.getPercentComplete(), configToUpdate.getNumAttempts());
    }
}
Also used : DataCompletenessConfigDTO(com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO) DecimalFormat(java.text.DecimalFormat)

Example 14 with DataCompletenessConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO in project pinot by linkedin.

the class Wo4WAvgDataCompletenessAlgorithm method computeBaselineCountsIfNotPresent.

@Override
public void computeBaselineCountsIfNotPresent(String dataset, Map<String, Long> bucketNameToBucketValueMS, DateTimeFormatter dateTimeFormatter, TimeSpec timeSpec, DateTimeZone zone) {
    // look for the past 4 weeks
    for (int i = 0; i < 4; i++) {
        Period baselineOffsetPeriod = new Period(0, 0, 0, 7 * (i + 1), 0, 0, 0, 0);
        LOG.info("Checking for {} week ago for dataset {}", (i + 1), dataset);
        // check if baseline is present in database
        Map<String, Long> baselineBucketNameToBucketValueMS = new HashMap<>();
        for (Entry<String, Long> entry : bucketNameToBucketValueMS.entrySet()) {
            DateTime bucketValueDateTime = new DateTime(entry.getValue(), zone);
            Long baselineBucketValueMS = bucketValueDateTime.minus(baselineOffsetPeriod).getMillis();
            String baselineBucketName = dateTimeFormatter.print(baselineBucketValueMS);
            DataCompletenessConfigDTO configDTO = dataCompletenessConfigDAO.findByDatasetAndDateSDF(dataset, baselineBucketName);
            if (configDTO == null) {
                baselineBucketNameToBucketValueMS.put(baselineBucketName, baselineBucketValueMS);
            }
        }
        // for all baseline values not present in database, fetch their counts, and update in database
        LOG.info("Missing baseline buckets {} for dataset {}", baselineBucketNameToBucketValueMS.keySet(), dataset);
        if (!baselineBucketNameToBucketValueMS.isEmpty()) {
            Map<String, Long> baselineCountsForBuckets = DataCompletenessTaskUtils.getCountsForBucketsOfDataset(dataset, timeSpec, baselineBucketNameToBucketValueMS);
            LOG.info("Baseline bucket counts {}", baselineCountsForBuckets);
            for (Entry<String, Long> entry : baselineCountsForBuckets.entrySet()) {
                String baselineBucketName = entry.getKey();
                Long baselineBucketCount = entry.getValue();
                Long baselineBucketValueMS = baselineBucketNameToBucketValueMS.get(baselineBucketName);
                DataCompletenessConfigDTO createBaselineConfig = new DataCompletenessConfigDTO();
                createBaselineConfig.setDataset(dataset);
                createBaselineConfig.setDateToCheckInSDF(baselineBucketName);
                createBaselineConfig.setDateToCheckInMS(baselineBucketValueMS);
                createBaselineConfig.setCountStar(baselineBucketCount);
                dataCompletenessConfigDAO.save(createBaselineConfig);
            }
            LOG.info("Saved {} number of baseline counts in database for dataset {}", baselineCountsForBuckets.size(), dataset);
        }
    }
}
Also used : DataCompletenessConfigDTO(com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO) HashMap(java.util.HashMap) Period(org.joda.time.Period) DateTime(org.joda.time.DateTime)

Example 15 with DataCompletenessConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO in project pinot by linkedin.

the class DataCompletenessConfigManagerImpl method findByDatasetAndDateMS.

@Override
public DataCompletenessConfigDTO findByDatasetAndDateMS(String dataset, Long dateToCheckInMS) {
    Predicate predicate = Predicate.AND(Predicate.EQ("dataset", dataset), Predicate.EQ("dateToCheckInMS", dateToCheckInMS));
    List<DataCompletenessConfigBean> list = genericPojoDao.get(predicate, DataCompletenessConfigBean.class);
    DataCompletenessConfigDTO result = null;
    if (CollectionUtils.isNotEmpty(list)) {
        result = MODEL_MAPPER.map(list.get(0), DataCompletenessConfigDTO.class);
    }
    return result;
}
Also used : DataCompletenessConfigDTO(com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO) DataCompletenessConfigBean(com.linkedin.thirdeye.datalayer.pojo.DataCompletenessConfigBean) Predicate(com.linkedin.thirdeye.datalayer.util.Predicate)

Aggregations

DataCompletenessConfigDTO (com.linkedin.thirdeye.datalayer.dto.DataCompletenessConfigDTO)15 DateTime (org.joda.time.DateTime)4 DataCompletenessConfigBean (com.linkedin.thirdeye.datalayer.pojo.DataCompletenessConfigBean)3 ArrayList (java.util.ArrayList)3 Test (org.testng.annotations.Test)3 Predicate (com.linkedin.thirdeye.datalayer.util.Predicate)2 HashMap (java.util.HashMap)2 DecimalFormat (java.text.DecimalFormat)1 Period (org.joda.time.Period)1