Search in sources :

Example 1 with DataCompletenessAlgorithmName

use of com.linkedin.thirdeye.completeness.checker.DataCompletenessConstants.DataCompletenessAlgorithmName in project pinot by linkedin.

the class DataCompletenessResource method getPercentCompleteness.

@GET
@Path(value = "/percent-completeness")
@Produces(MediaType.APPLICATION_JSON)
public double getPercentCompleteness(String payload) {
    PercentCompletenessFunctionInput input = PercentCompletenessFunctionInput.fromJson(payload);
    DataCompletenessAlgorithmName algorithm = input.getAlgorithm();
    List<Long> baselineCounts = input.getBaselineCounts();
    Long currentCount = input.getCurrentCount();
    double percentCompleteness = 0;
    double baselineTotalCount = 0;
    if (CollectionUtils.isNotEmpty(baselineCounts)) {
        switch(algorithm) {
            case WO4W_AVERAGE:
            default:
                for (Long baseline : baselineCounts) {
                    baselineTotalCount = baselineTotalCount + baseline;
                }
                baselineTotalCount = baselineTotalCount / baselineCounts.size();
                break;
        }
    }
    if (baselineTotalCount != 0) {
        percentCompleteness = new Double(currentCount * 100) / baselineTotalCount;
    }
    if (baselineTotalCount == 0 && currentCount != 0) {
        percentCompleteness = 100;
    }
    return percentCompleteness;
}
Also used : PercentCompletenessFunctionInput(com.linkedin.thirdeye.completeness.checker.PercentCompletenessFunctionInput) DataCompletenessAlgorithmName(com.linkedin.thirdeye.completeness.checker.DataCompletenessConstants.DataCompletenessAlgorithmName) Path(javax.ws.rs.Path) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Example 2 with DataCompletenessAlgorithmName

use of com.linkedin.thirdeye.completeness.checker.DataCompletenessConstants.DataCompletenessAlgorithmName in project pinot by linkedin.

the class DataCompletenessTaskRunner method executeCheckerTask.

/**
   * Performs data completeness check on all datasets, for past LOOKBACK time, and records the information in database
   * @param dataCompletenessTaskInfo
   */
private void executeCheckerTask(DataCompletenessTaskInfo dataCompletenessTaskInfo) {
    LOG.info("Execute data completeness checker task {}", dataCompletenessTaskInfo);
    try {
        List<String> datasets = dataCompletenessTaskInfo.getDatasetsToCheck();
        LOG.info("Datasets {}", datasets);
        // get start and end time
        long dataCompletenessStartTime = dataCompletenessTaskInfo.getDataCompletenessStartTime();
        long dataCompletenessEndTime = dataCompletenessTaskInfo.getDataCompletenessEndTime();
        LOG.info("StartTime {} i.e. {}", dataCompletenessStartTime, new DateTime(dataCompletenessStartTime));
        LOG.info("EndTime {} i.e. {}", dataCompletenessEndTime, new DateTime(dataCompletenessEndTime));
        for (String dataset : datasets) {
            try {
                DatasetConfigDTO datasetConfig = DAO_REGISTRY.getDatasetConfigDAO().findByDataset(dataset);
                LOG.info("Dataset {} {}", dataset, datasetConfig);
                // TODO: get this from datasetConfig
                //DataCompletenessAlgorithmName algorithmName = datasetConfig.getDataCompletenessAlgorithmName();
                DataCompletenessAlgorithmName algorithmName = DataCompletenessAlgorithmName.WO4W_AVERAGE;
                // TODO: get this from datasetConfig
                // Double expectedCompleteness = datasetConfig.getExpectedCompleteness();
                Double expectedCompleteness = null;
                DataCompletenessAlgorithm dataCompletenessAlgorithm = DataCompletenessAlgorithmFactory.getDataCompletenessAlgorithmFromName(algorithmName);
                LOG.info("DataCompletenessAlgorithmClass: {}", algorithmName);
                // get adjusted start time, bucket size and date time formatter, according to dataset granularity
                TimeSpec timeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
                DateTimeZone dateTimeZone = Utils.getDataTimeZone(dataset);
                long adjustedStart = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, dataCompletenessStartTime, dateTimeZone);
                long adjustedEnd = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, dataCompletenessEndTime, dateTimeZone);
                long bucketSize = DataCompletenessTaskUtils.getBucketSizeInMSForDataset(timeSpec);
                DateTimeFormatter dateTimeFormatter = DataCompletenessTaskUtils.getDateTimeFormatterForDataset(timeSpec, dateTimeZone);
                LOG.info("Adjusted start:{} i.e. {} Adjusted end:{} i.e. {} and Bucket size:{}", adjustedStart, new DateTime(adjustedStart), adjustedEnd, new DateTime(adjustedEnd), bucketSize);
                // get buckets to process
                Map<String, Long> bucketNameToBucketValueMS = getBucketsToProcess(dataset, adjustedStart, adjustedEnd, dataCompletenessAlgorithm, dateTimeFormatter, bucketSize);
                LOG.info("Got {} buckets to process", bucketNameToBucketValueMS.size());
                if (!bucketNameToBucketValueMS.isEmpty()) {
                    // create current entries in database if not already present
                    int numEntriesCreated = createEntriesInDatabaseIfNotPresent(dataset, bucketNameToBucketValueMS);
                    LOG.info("Created {} new entries in database", numEntriesCreated);
                    // coldstart: compute and store in db the counts for baseline, if not already present
                    LOG.info("Checking for baseline counts in database, or fetching them if not present");
                    dataCompletenessAlgorithm.computeBaselineCountsIfNotPresent(dataset, bucketNameToBucketValueMS, dateTimeFormatter, timeSpec, dateTimeZone);
                    // get current counts for all current buckets to process
                    Map<String, Long> bucketNameToCount = DataCompletenessTaskUtils.getCountsForBucketsOfDataset(dataset, timeSpec, bucketNameToBucketValueMS);
                    LOG.info("Bucket name to count {}", bucketNameToCount);
                    // run completeness check for all buckets
                    runCompletenessCheck(dataset, bucketNameToBucketValueMS, bucketNameToCount, dataCompletenessAlgorithm, expectedCompleteness);
                }
            } catch (Exception e) {
                LOG.error("Exception in data completeness checker task for dataset {}.. Continuing with remaining datasets", dataset, e);
            }
        }
    } catch (Exception e) {
        LOG.error("Exception in data completeness checker task", e);
    }
}
Also used : DataCompletenessAlgorithmName(com.linkedin.thirdeye.completeness.checker.DataCompletenessConstants.DataCompletenessAlgorithmName) DateTime(org.joda.time.DateTime) DateTimeZone(org.joda.time.DateTimeZone) TimeSpec(com.linkedin.thirdeye.api.TimeSpec) DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) DateTimeFormatter(org.joda.time.format.DateTimeFormatter)

Aggregations

DataCompletenessAlgorithmName (com.linkedin.thirdeye.completeness.checker.DataCompletenessConstants.DataCompletenessAlgorithmName)2 TimeSpec (com.linkedin.thirdeye.api.TimeSpec)1 PercentCompletenessFunctionInput (com.linkedin.thirdeye.completeness.checker.PercentCompletenessFunctionInput)1 DatasetConfigDTO (com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO)1 GET (javax.ws.rs.GET)1 Path (javax.ws.rs.Path)1 Produces (javax.ws.rs.Produces)1 DateTime (org.joda.time.DateTime)1 DateTimeZone (org.joda.time.DateTimeZone)1 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)1