Search in sources :

Example 41 with DatasetConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.

the class DataCompletenessJobRunner method run.

@Override
public void run() {
    DateTime now = new DateTime();
    long checkDurationEndTime = now.getMillis();
    long checkDurationStartTime = now.minus(TimeUnit.MILLISECONDS.convert(DataCompletenessConstants.LOOKBACK_TIME_DURATION, DataCompletenessConstants.LOOKBACK_TIMEUNIT)).getMillis();
    String checkerEndTime = dateTimeFormatter.print(checkDurationEndTime);
    String checkerStartTime = dateTimeFormatter.print(checkDurationStartTime);
    String jobName = String.format("%s-%s-%s", TaskType.DATA_COMPLETENESS.toString(), checkerStartTime, checkerEndTime);
    dataCompletenessJobContext.setCheckDurationStartTime(checkDurationStartTime);
    dataCompletenessJobContext.setCheckDurationEndTime(checkDurationEndTime);
    dataCompletenessJobContext.setJobName(jobName);
    Set<String> datasetsToCheck = new HashSet<>();
    for (AnomalyFunctionDTO anomalyFunction : DAO_REGISTRY.getAnomalyFunctionDAO().findAllActiveFunctions()) {
        datasetsToCheck.add(anomalyFunction.getCollection());
    }
    for (DatasetConfigDTO datasetConfig : DAO_REGISTRY.getDatasetConfigDAO().findActiveRequiresCompletenessCheck()) {
        datasetsToCheck.add(datasetConfig.getDataset());
    }
    dataCompletenessJobContext.setDatasetsToCheck(Lists.newArrayList(datasetsToCheck));
    // create data completeness job
    long jobExecutionId = createJob();
    dataCompletenessJobContext.setJobExecutionId(jobExecutionId);
    // create data completeness tasks
    createTasks();
}
Also used : DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) AnomalyFunctionDTO(com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO) DateTime(org.joda.time.DateTime) HashSet(java.util.HashSet)

Example 42 with DatasetConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.

the class DataCompletenessTaskRunner method executeCheckerTask.

/**
   * Performs data completeness check on all datasets, for past LOOKBACK time, and records the information in database
   * @param dataCompletenessTaskInfo
   */
private void executeCheckerTask(DataCompletenessTaskInfo dataCompletenessTaskInfo) {
    LOG.info("Execute data completeness checker task {}", dataCompletenessTaskInfo);
    try {
        List<String> datasets = dataCompletenessTaskInfo.getDatasetsToCheck();
        LOG.info("Datasets {}", datasets);
        // get start and end time
        long dataCompletenessStartTime = dataCompletenessTaskInfo.getDataCompletenessStartTime();
        long dataCompletenessEndTime = dataCompletenessTaskInfo.getDataCompletenessEndTime();
        LOG.info("StartTime {} i.e. {}", dataCompletenessStartTime, new DateTime(dataCompletenessStartTime));
        LOG.info("EndTime {} i.e. {}", dataCompletenessEndTime, new DateTime(dataCompletenessEndTime));
        for (String dataset : datasets) {
            try {
                DatasetConfigDTO datasetConfig = DAO_REGISTRY.getDatasetConfigDAO().findByDataset(dataset);
                LOG.info("Dataset {} {}", dataset, datasetConfig);
                // TODO: get this from datasetConfig
                //DataCompletenessAlgorithmName algorithmName = datasetConfig.getDataCompletenessAlgorithmName();
                DataCompletenessAlgorithmName algorithmName = DataCompletenessAlgorithmName.WO4W_AVERAGE;
                // TODO: get this from datasetConfig
                // Double expectedCompleteness = datasetConfig.getExpectedCompleteness();
                Double expectedCompleteness = null;
                DataCompletenessAlgorithm dataCompletenessAlgorithm = DataCompletenessAlgorithmFactory.getDataCompletenessAlgorithmFromName(algorithmName);
                LOG.info("DataCompletenessAlgorithmClass: {}", algorithmName);
                // get adjusted start time, bucket size and date time formatter, according to dataset granularity
                TimeSpec timeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
                DateTimeZone dateTimeZone = Utils.getDataTimeZone(dataset);
                long adjustedStart = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, dataCompletenessStartTime, dateTimeZone);
                long adjustedEnd = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, dataCompletenessEndTime, dateTimeZone);
                long bucketSize = DataCompletenessTaskUtils.getBucketSizeInMSForDataset(timeSpec);
                DateTimeFormatter dateTimeFormatter = DataCompletenessTaskUtils.getDateTimeFormatterForDataset(timeSpec, dateTimeZone);
                LOG.info("Adjusted start:{} i.e. {} Adjusted end:{} i.e. {} and Bucket size:{}", adjustedStart, new DateTime(adjustedStart), adjustedEnd, new DateTime(adjustedEnd), bucketSize);
                // get buckets to process
                Map<String, Long> bucketNameToBucketValueMS = getBucketsToProcess(dataset, adjustedStart, adjustedEnd, dataCompletenessAlgorithm, dateTimeFormatter, bucketSize);
                LOG.info("Got {} buckets to process", bucketNameToBucketValueMS.size());
                if (!bucketNameToBucketValueMS.isEmpty()) {
                    // create current entries in database if not already present
                    int numEntriesCreated = createEntriesInDatabaseIfNotPresent(dataset, bucketNameToBucketValueMS);
                    LOG.info("Created {} new entries in database", numEntriesCreated);
                    // coldstart: compute and store in db the counts for baseline, if not already present
                    LOG.info("Checking for baseline counts in database, or fetching them if not present");
                    dataCompletenessAlgorithm.computeBaselineCountsIfNotPresent(dataset, bucketNameToBucketValueMS, dateTimeFormatter, timeSpec, dateTimeZone);
                    // get current counts for all current buckets to process
                    Map<String, Long> bucketNameToCount = DataCompletenessTaskUtils.getCountsForBucketsOfDataset(dataset, timeSpec, bucketNameToBucketValueMS);
                    LOG.info("Bucket name to count {}", bucketNameToCount);
                    // run completeness check for all buckets
                    runCompletenessCheck(dataset, bucketNameToBucketValueMS, bucketNameToCount, dataCompletenessAlgorithm, expectedCompleteness);
                }
            } catch (Exception e) {
                LOG.error("Exception in data completeness checker task for dataset {}.. Continuing with remaining datasets", dataset, e);
            }
        }
    } catch (Exception e) {
        LOG.error("Exception in data completeness checker task", e);
    }
}
Also used : DataCompletenessAlgorithmName(com.linkedin.thirdeye.completeness.checker.DataCompletenessConstants.DataCompletenessAlgorithmName) DateTime(org.joda.time.DateTime) DateTimeZone(org.joda.time.DateTimeZone) TimeSpec(com.linkedin.thirdeye.api.TimeSpec) DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) DateTimeFormatter(org.joda.time.format.DateTimeFormatter)

Example 43 with DatasetConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.

the class Utils method getNonAdditiveTimeGranularity.

public static TimeGranularity getNonAdditiveTimeGranularity(String collection) {
    DatasetConfigDTO datasetConfig;
    try {
        datasetConfig = CACHE_REGISTRY.getDatasetConfigCache().get(collection);
        Integer nonAdditiveBucketSize = datasetConfig.getNonAdditiveBucketSize();
        String nonAdditiveBucketUnit = datasetConfig.getNonAdditiveBucketUnit();
        if (nonAdditiveBucketSize != null && nonAdditiveBucketUnit != null) {
            TimeGranularity timeGranularity = new TimeGranularity(datasetConfig.getNonAdditiveBucketSize(), TimeUnit.valueOf(datasetConfig.getNonAdditiveBucketUnit()));
            return timeGranularity;
        }
    } catch (ExecutionException e) {
        LOG.info("Exception in fetching non additive time granularity");
    }
    return null;
}
Also used : DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) TimeGranularity(com.linkedin.thirdeye.api.TimeGranularity) ExecutionException(java.util.concurrent.ExecutionException)

Example 44 with DatasetConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.

the class Utils method getDataTimeZone.

/*
   * This method returns the time zone of the data in this collection
   */
public static DateTimeZone getDataTimeZone(String collection) {
    String timezone = TimeSpec.DEFAULT_TIMEZONE;
    try {
        DatasetConfigDTO datasetConfig = CACHE_REGISTRY.getDatasetConfigCache().get(collection);
        timezone = datasetConfig.getTimezone();
    } catch (ExecutionException e) {
        LOG.error("Exception while getting dataset config for {}", collection);
    }
    return DateTimeZone.forID(timezone);
}
Also used : DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) ExecutionException(java.util.concurrent.ExecutionException)

Example 45 with DatasetConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.

the class AnomalyResource method updateAnomalyFunction.

// Edit anomaly function
@POST
@Path("/anomaly-function/update")
public Response updateAnomalyFunction(@NotNull @QueryParam("id") Long id, @NotNull @QueryParam("dataset") String dataset, @NotNull @QueryParam("functionName") String functionName, @NotNull @QueryParam("metric") String metric, @QueryParam("type") String type, @NotNull @QueryParam("windowSize") String windowSize, @NotNull @QueryParam("windowUnit") String windowUnit, @NotNull @QueryParam("windowDelay") String windowDelay, @QueryParam("cron") String cron, @QueryParam("windowDelayUnit") String windowDelayUnit, @QueryParam("exploreDimension") String exploreDimensions, @QueryParam("filters") String filters, @NotNull @QueryParam("properties") String properties, @QueryParam("isActive") boolean isActive) throws Exception {
    if (id == null || StringUtils.isEmpty(dataset) || StringUtils.isEmpty(functionName) || StringUtils.isEmpty(metric) || StringUtils.isEmpty(windowSize) || StringUtils.isEmpty(windowUnit) || StringUtils.isEmpty(windowDelay) || StringUtils.isEmpty(properties)) {
        throw new UnsupportedOperationException("Received null for one of the mandatory params: " + "id " + id + ",dataset " + dataset + ", functionName " + functionName + ", metric " + metric + ", windowSize " + windowSize + ", windowUnit " + windowUnit + ", windowDelay " + windowDelay + ", properties" + properties);
    }
    AnomalyFunctionDTO anomalyFunctionSpec = anomalyFunctionDAO.findById(id);
    if (anomalyFunctionSpec == null) {
        throw new IllegalStateException("AnomalyFunctionSpec with id " + id + " does not exist");
    }
    DatasetConfigDTO datasetConfig = DAO_REGISTRY.getDatasetConfigDAO().findByDataset(dataset);
    TimeSpec timespec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
    TimeGranularity dataGranularity = timespec.getDataGranularity();
    anomalyFunctionSpec.setActive(isActive);
    anomalyFunctionSpec.setCollection(dataset);
    anomalyFunctionSpec.setFunctionName(functionName);
    anomalyFunctionSpec.setTopicMetric(metric);
    anomalyFunctionSpec.setMetrics(Arrays.asList(metric));
    if (StringUtils.isEmpty(type)) {
        type = DEFAULT_FUNCTION_TYPE;
    }
    anomalyFunctionSpec.setType(type);
    anomalyFunctionSpec.setWindowSize(Integer.valueOf(windowSize));
    anomalyFunctionSpec.setWindowUnit(TimeUnit.valueOf(windowUnit));
    // bucket size and unit are defaulted to the collection granularity
    anomalyFunctionSpec.setBucketSize(dataGranularity.getSize());
    anomalyFunctionSpec.setBucketUnit(dataGranularity.getUnit());
    if (!StringUtils.isBlank(filters)) {
        filters = URLDecoder.decode(filters, UTF8);
        String filterString = ThirdEyeUtils.getSortedFiltersFromJson(filters);
        anomalyFunctionSpec.setFilters(filterString);
    }
    anomalyFunctionSpec.setProperties(properties);
    if (StringUtils.isNotEmpty(exploreDimensions)) {
        // Ensure that the explore dimension names are ordered as schema dimension names
        anomalyFunctionSpec.setExploreDimensions(getDimensions(dataset, exploreDimensions));
    }
    if (StringUtils.isEmpty(cron)) {
        cron = DEFAULT_CRON;
    } else {
        // validate cron
        if (!CronExpression.isValidExpression(cron)) {
            throw new IllegalArgumentException("Invalid cron expression for cron : " + cron);
        }
    }
    anomalyFunctionSpec.setCron(cron);
    anomalyFunctionDAO.update(anomalyFunctionSpec);
    return Response.ok(id).build();
}
Also used : DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) TimeGranularity(com.linkedin.thirdeye.api.TimeGranularity) AnomalyFunctionDTO(com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO) TimeSpec(com.linkedin.thirdeye.api.TimeSpec) Path(javax.ws.rs.Path) POST(javax.ws.rs.POST)

Aggregations

DatasetConfigDTO (com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO)54 TimeSpec (com.linkedin.thirdeye.api.TimeSpec)14 DateTime (org.joda.time.DateTime)14 ArrayList (java.util.ArrayList)13 Path (javax.ws.rs.Path)12 ExecutionException (java.util.concurrent.ExecutionException)11 GET (javax.ws.rs.GET)10 TimeGranularity (com.linkedin.thirdeye.api.TimeGranularity)9 AnomalyFunctionDTO (com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO)9 MetricConfigDTO (com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO)9 DateTimeZone (org.joda.time.DateTimeZone)9 Test (org.testng.annotations.Test)9 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)7 IOException (java.io.IOException)6 MetricExpression (com.linkedin.thirdeye.client.MetricExpression)5 ResultSetGroup (com.linkedin.pinot.client.ResultSetGroup)4 DetectionStatusDTO (com.linkedin.thirdeye.datalayer.dto.DetectionStatusDTO)4 JSONException (org.json.JSONException)4 DashboardConfigDTO (com.linkedin.thirdeye.datalayer.dto.DashboardConfigDTO)3 NullArgumentException (org.apache.commons.lang.NullArgumentException)3