Search in sources :

Example 11 with DetectionStatusDTO

use of com.linkedin.thirdeye.datalayer.dto.DetectionStatusDTO in project pinot by linkedin.

the class DetectionJobScheduler method runBackfill.

/**
   * Sequentially performs anomaly detection for all the monitoring windows that are located between backfillStartTime
   * and backfillEndTime. A lightweight job is performed right after each detection job and notified is set to false in
   * order to silence the mail alerts.
   *
   * NOTE: We assume that the backfill window for the same function DOES NOT overlap. In other words, this function
   * does not guarantees correctness of the detections result if it is invoked twice with the same parameters.
   *
   * @param functionId the id of the anomaly function, which has to be an active function
   * @param backfillStartTime the start time for backfilling
   * @param backfillEndTime the end time for backfilling
   * @param force set to false to resume from previous backfill if there exists any
   * @return task id
   */
public Long runBackfill(long functionId, DateTime backfillStartTime, DateTime backfillEndTime, boolean force) {
    AnomalyFunctionDTO anomalyFunction = DAO_REGISTRY.getAnomalyFunctionDAO().findById(functionId);
    Long jobId = null;
    String dataset = anomalyFunction.getCollection();
    boolean isActive = anomalyFunction.getIsActive();
    if (!isActive) {
        LOG.info("Skipping function {}", functionId);
        return null;
    }
    BackfillKey backfillKey = new BackfillKey(functionId, backfillStartTime, backfillEndTime);
    Thread returnedThread = existingBackfillJobs.putIfAbsent(backfillKey, Thread.currentThread());
    // If returned thread is not current thread, then a backfill job is already running
    if (returnedThread != null) {
        LOG.info("Function: {} Dataset: {} Aborting... An existing back-fill job is running...", functionId, dataset);
        return null;
    }
    try {
        CronExpression cronExpression = null;
        try {
            cronExpression = new CronExpression(anomalyFunction.getCron());
        } catch (ParseException e) {
            LOG.error("Function: {} Dataset: {} Failed to parse cron expression", functionId, dataset);
            return null;
        }
        long monitoringWindowSize = TimeUnit.MILLISECONDS.convert(anomalyFunction.getWindowSize(), anomalyFunction.getWindowUnit());
        DateTime currentStart;
        if (force) {
            currentStart = backfillStartTime;
        } else {
            currentStart = computeResumeStartTime(functionId, cronExpression, backfillStartTime, backfillEndTime);
        }
        DateTime currentEnd = currentStart.plus(monitoringWindowSize);
        // Make the end time inclusive
        DateTime endBoundary = new DateTime(cronExpression.getNextValidTimeAfter(backfillEndTime.toDate()));
        List<Long> startTimes = new ArrayList<>();
        List<Long> endTimes = new ArrayList<>();
        LOG.info("Function: {} Dataset: {} Begin regenerate anomalies for each monitoring window between {} and {}", functionId, dataset, currentStart, endBoundary);
        while (currentEnd.isBefore(endBoundary)) {
            if (Thread.currentThread().isInterrupted()) {
                LOG.info("Function: {} Dataset: {} Terminating adhoc function.", functionId, dataset);
                return null;
            }
            String monitoringWindowStart = ISODateTimeFormat.dateHourMinute().print(currentStart);
            String monitoringWindowEnd = ISODateTimeFormat.dateHourMinute().print(currentEnd);
            LOG.info("Function: {} Dataset: {} Adding adhoc time range {}({}) to {}({})", functionId, dataset, currentStart, monitoringWindowStart, currentEnd, monitoringWindowEnd);
            startTimes.add(currentStart.getMillis());
            endTimes.add(currentEnd.getMillis());
            currentStart = new DateTime(cronExpression.getNextValidTimeAfter(currentStart.toDate()));
            currentEnd = currentStart.plus(monitoringWindowSize);
        }
        // If any time periods found, for which detection needs to be run, run anomaly function update detection status
        List<DetectionStatusDTO> findAllInTimeRange = DAO_REGISTRY.getDetectionStatusDAO().findAllInTimeRangeForFunctionAndDetectionRun(backfillStartTime.getMillis(), currentStart.getMillis(), functionId, false);
        jobId = runAnomalyFunctionAndUpdateDetectionStatus(startTimes, endTimes, anomalyFunction, findAllInTimeRange);
        LOG.info("Function: {} Dataset: {} Generated job for detecting anomalies for each monitoring window " + "whose start is located in range {} -- {}", functionId, dataset, backfillStartTime, currentStart);
    } finally {
        existingBackfillJobs.remove(backfillKey, Thread.currentThread());
    }
    return jobId;
}
Also used : ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) AnomalyFunctionDTO(com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO) CronExpression(org.quartz.CronExpression) ParseException(java.text.ParseException) DetectionStatusDTO(com.linkedin.thirdeye.datalayer.dto.DetectionStatusDTO)

Example 12 with DetectionStatusDTO

use of com.linkedin.thirdeye.datalayer.dto.DetectionStatusDTO in project pinot by linkedin.

the class DetectionJobScheduler method run.

/**
   * Reads all active anomaly functions
   * For each function, finds all time periods for which detection needs to be run
   * Calls run anomaly function for all those periods, and updates detection status
   * {@inheritDoc}
   * @see java.lang.Runnable#run()
   */
public void run() {
    // read all anomaly functions
    LOG.info("Reading all anomaly functions");
    List<AnomalyFunctionDTO> anomalyFunctions = DAO_REGISTRY.getAnomalyFunctionDAO().findAllActiveFunctions();
    // for each active anomaly function
    for (AnomalyFunctionDTO anomalyFunction : anomalyFunctions) {
        try {
            LOG.info("Function: {}", anomalyFunction);
            long functionId = anomalyFunction.getId();
            String dataset = anomalyFunction.getCollection();
            DatasetConfigDTO datasetConfig = CACHE_REGISTRY.getDatasetConfigCache().get(dataset);
            DateTimeZone dateTimeZone = Utils.getDataTimeZone(dataset);
            DateTime currentDateTime = new DateTime(dateTimeZone);
            // find last entry into detectionStatus table, for this function
            DetectionStatusDTO lastEntryForFunction = DAO_REGISTRY.getDetectionStatusDAO().findLatestEntryForFunctionId(functionId);
            LOG.info("Function: {} Dataset: {} Last entry is {}", functionId, dataset, lastEntryForFunction);
            // calculate entries from last entry to current time
            Map<String, Long> newEntries = DetectionJobSchedulerUtils.getNewEntries(currentDateTime, lastEntryForFunction, anomalyFunction, datasetConfig, dateTimeZone);
            LOG.info("Function: {} Dataset: {} Creating {} new entries {}", functionId, dataset, newEntries.size(), newEntries);
            // create these entries
            for (Entry<String, Long> entry : newEntries.entrySet()) {
                DetectionStatusDTO detectionStatus = new DetectionStatusDTO();
                detectionStatus.setDataset(anomalyFunction.getCollection());
                detectionStatus.setFunctionId(functionId);
                detectionStatus.setDateToCheckInSDF(entry.getKey());
                detectionStatus.setDateToCheckInMS(entry.getValue());
                DAO_REGISTRY.getDetectionStatusDAO().save(detectionStatus);
            }
            // find all entries in the past 3 days, which are still isRun = false
            List<DetectionStatusDTO> entriesInLast3Days = DAO_REGISTRY.getDetectionStatusDAO().findAllInTimeRangeForFunctionAndDetectionRun(currentDateTime.minusDays(3).getMillis(), currentDateTime.getMillis(), functionId, false);
            Collections.sort(entriesInLast3Days);
            LOG.info("Function: {} Dataset: {} Entries in last 3 days {}", functionId, dataset, entriesInLast3Days);
            // for each entry, collect startTime and endTime
            List<Long> startTimes = new ArrayList<>();
            List<Long> endTimes = new ArrayList<>();
            List<DetectionStatusDTO> detectionStatusToUpdate = new ArrayList<>();
            for (DetectionStatusDTO detectionStatus : entriesInLast3Days) {
                try {
                    LOG.info("Function: {} Dataset: {} Entry : {}", functionId, dataset, detectionStatus);
                    long dateToCheck = detectionStatus.getDateToCheckInMS();
                    // check availability for monitoring window - delay
                    long endTime = dateToCheck - TimeUnit.MILLISECONDS.convert(anomalyFunction.getWindowDelay(), anomalyFunction.getWindowDelayUnit());
                    long startTime = endTime - TimeUnit.MILLISECONDS.convert(anomalyFunction.getWindowSize(), anomalyFunction.getWindowUnit());
                    LOG.info("Function: {} Dataset: {} Checking start:{} {} to end:{} {}", functionId, dataset, startTime, new DateTime(startTime, dateTimeZone), endTime, new DateTime(endTime, dateTimeZone));
                    boolean pass = checkIfDetectionRunCriteriaMet(startTime, endTime, datasetConfig, anomalyFunction);
                    if (pass) {
                        startTimes.add(startTime);
                        endTimes.add(endTime);
                        detectionStatusToUpdate.add(detectionStatus);
                    } else {
                        LOG.warn("Function: {} Dataset: {} Data incomplete for monitoring window {} ({}) to {} ({}), skipping anomaly detection", functionId, dataset, startTime, new DateTime(startTime), endTime, new DateTime(endTime));
                    // TODO: Send email to owners/dev team
                    }
                } catch (Exception e) {
                    LOG.error("Function: {} Dataset: {} Exception in preparing entry {}", functionId, dataset, detectionStatus, e);
                }
            }
            // If any time periods found, for which detection needs to be run
            runAnomalyFunctionAndUpdateDetectionStatus(startTimes, endTimes, anomalyFunction, detectionStatusToUpdate);
        } catch (Exception e) {
            LOG.error("Function: {} Dataset: {} Exception in running anomaly function {}", anomalyFunction.getId(), anomalyFunction.getCollection(), anomalyFunction, e);
        }
    }
}
Also used : ArrayList(java.util.ArrayList) DateTimeZone(org.joda.time.DateTimeZone) DateTime(org.joda.time.DateTime) SchedulerException(org.quartz.SchedulerException) ParseException(java.text.ParseException) ExecutionException(java.util.concurrent.ExecutionException) DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) AnomalyFunctionDTO(com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO) DetectionStatusDTO(com.linkedin.thirdeye.datalayer.dto.DetectionStatusDTO)

Aggregations

DetectionStatusDTO (com.linkedin.thirdeye.datalayer.dto.DetectionStatusDTO)12 DateTime (org.joda.time.DateTime)7 AnomalyFunctionDTO (com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO)5 Test (org.testng.annotations.Test)5 DatasetConfigDTO (com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO)4 DateTimeZone (org.joda.time.DateTimeZone)4 ArrayList (java.util.ArrayList)3 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)3 DetectionStatusBean (com.linkedin.thirdeye.datalayer.pojo.DetectionStatusBean)2 Predicate (com.linkedin.thirdeye.datalayer.util.Predicate)2 ParseException (java.text.ParseException)2 TimeGranularity (com.linkedin.thirdeye.api.TimeGranularity)1 ExecutionException (java.util.concurrent.ExecutionException)1 CronExpression (org.quartz.CronExpression)1 SchedulerException (org.quartz.SchedulerException)1