Search in sources :

Example 31 with DatasetConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.

the class CollectionMaxDataTimeCacheLoader method load.

@Override
public Long load(String collection) throws Exception {
    LOGGER.info("Loading maxDataTime cache {}", collection);
    long maxTime = 0;
    try {
        DatasetConfigDTO datasetConfig = datasetConfigDAO.findByDataset(collection);
        // By default, query only offline, unless dataset has been marked as realtime
        String tableName = ThirdEyeUtils.computeTableName(collection);
        TimeSpec timeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
        long prevMaxDataTime = getPrevMaxDataTime(collection, timeSpec);
        String maxTimePql = String.format(COLLECTION_MAX_TIME_QUERY_TEMPLATE, timeSpec.getColumnName(), tableName, timeSpec.getColumnName(), prevMaxDataTime);
        PinotQuery maxTimePinotQuery = new PinotQuery(maxTimePql, tableName);
        resultSetGroupCache.refresh(maxTimePinotQuery);
        ResultSetGroup resultSetGroup = resultSetGroupCache.get(maxTimePinotQuery);
        if (resultSetGroup.getResultSetCount() == 0 || resultSetGroup.getResultSet(0).getRowCount() == 0) {
            LOGGER.info("resultSetGroup is Empty for collection {} is {}", tableName, resultSetGroup);
            this.collectionToPrevMaxDataTimeMap.remove(collection);
        } else {
            long endTime = new Double(resultSetGroup.getResultSet(0).getDouble(0)).longValue();
            this.collectionToPrevMaxDataTimeMap.put(collection, endTime);
            // endTime + 1 to make sure we cover the time range of that time value.
            String timeFormat = timeSpec.getFormat();
            if (StringUtils.isBlank(timeFormat) || TimeSpec.SINCE_EPOCH_FORMAT.equals(timeFormat)) {
                maxTime = timeSpec.getDataGranularity().toMillis(endTime + 1) - 1;
            } else {
                DateTimeFormatter inputDataDateTimeFormatter = DateTimeFormat.forPattern(timeFormat).withZone(Utils.getDataTimeZone(collection));
                maxTime = DateTime.parse(String.valueOf(endTime), inputDataDateTimeFormatter).getMillis();
            }
        }
    } catch (Exception e) {
        LOGGER.warn("Exception getting maxTime from collection: {}", collection, e);
        this.collectionToPrevMaxDataTimeMap.remove(collection);
    }
    if (maxTime <= 0) {
        maxTime = System.currentTimeMillis();
    }
    return maxTime;
}
Also used : DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) PinotQuery(com.linkedin.thirdeye.client.pinot.PinotQuery) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) TimeSpec(com.linkedin.thirdeye.api.TimeSpec)

Example 32 with DatasetConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.

the class EmailHelper method writeTimeSeriesChart.

public static String writeTimeSeriesChart(final EmailConfigurationDTO config, TimeOnTimeComparisonHandler timeOnTimeComparisonHandler, final DateTime now, final DateTime then, final String collection, final Map<RawAnomalyResultDTO, String> anomaliesWithLabels) throws JobExecutionException {
    try {
        int windowSize = config.getWindowSize();
        TimeUnit windowUnit = config.getWindowUnit();
        long windowMillis = windowUnit.toMillis(windowSize);
        // TODO provide a way for email reports to specify desired graph granularity.
        DatasetConfigManager datasetConfigDAO = DAO_REGISTRY.getDatasetConfigDAO();
        DatasetConfigDTO datasetConfig = datasetConfigDAO.findByDataset(collection);
        TimeSpec timespec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
        TimeGranularity dataGranularity = timespec.getDataGranularity();
        TimeOnTimeComparisonResponse chartData = getData(timeOnTimeComparisonHandler, config, then, now, WEEK_MILLIS, dataGranularity);
        AnomalyGraphGenerator anomalyGraphGenerator = AnomalyGraphGenerator.getInstance();
        JFreeChart chart = anomalyGraphGenerator.createChart(chartData, dataGranularity, windowMillis, anomaliesWithLabels);
        String chartFilePath = EMAIL_REPORT_CHART_PREFIX + config.getId() + PNG;
        LOG.info("Writing chart to {}", chartFilePath);
        anomalyGraphGenerator.writeChartToFile(chart, chartFilePath);
        return chartFilePath;
    } catch (Exception e) {
        throw new JobExecutionException(e);
    }
}
Also used : JFreeChart(org.jfree.chart.JFreeChart) MalformedURLException(java.net.MalformedURLException) JobExecutionException(org.quartz.JobExecutionException) EmailException(org.apache.commons.mail.EmailException) TimeSpec(com.linkedin.thirdeye.api.TimeSpec) DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) TimeOnTimeComparisonResponse(com.linkedin.thirdeye.client.comparison.TimeOnTimeComparisonResponse) DatasetConfigManager(com.linkedin.thirdeye.datalayer.bao.DatasetConfigManager) JobExecutionException(org.quartz.JobExecutionException) TimeUnit(java.util.concurrent.TimeUnit) TimeGranularity(com.linkedin.thirdeye.api.TimeGranularity) AnomalyGraphGenerator(com.linkedin.thirdeye.detector.email.AnomalyGraphGenerator)

Example 33 with DatasetConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.

the class EmailHelper method getContributorDataForDataReport.

public static ContributorViewResponse getContributorDataForDataReport(String collection, String metric, List<String> dimensions, AlertConfigBean.COMPARE_MODE compareMode, long offsetDelayMillis, boolean intraday) throws Exception {
    long baselineOffset = getBaselineOffset(compareMode);
    ContributorViewRequest request = new ContributorViewRequest();
    request.setCollection(collection);
    List<MetricExpression> metricExpressions = Utils.convertToMetricExpressions(metric, MetricAggFunction.SUM, collection);
    request.setMetricExpressions(metricExpressions);
    long currentEnd = System.currentTimeMillis();
    long maxDataTime = collectionMaxDataTimeCache.get(collection);
    if (currentEnd > maxDataTime) {
        currentEnd = maxDataTime;
    }
    // align to nearest hour
    currentEnd = (currentEnd - (currentEnd % HOUR_MILLIS)) - offsetDelayMillis;
    String aggTimeGranularity = "HOURS";
    long currentStart = currentEnd - DAY_MILLIS;
    // intraday option
    if (intraday) {
        DateTimeZone timeZone = DateTimeZone.forTimeZone(AlertTaskRunnerV2.DEFAULT_TIME_ZONE);
        DateTime endDate = new DateTime(currentEnd, timeZone);
        DateTime intraDayStartTime = new DateTime(endDate.toString().split("T")[0], timeZone);
        if (intraDayStartTime.getMillis() != currentEnd) {
            currentStart = intraDayStartTime.getMillis();
        }
    }
    DatasetConfigDTO datasetConfigDTO = datasetConfigManager.findByDataset(collection);
    if (datasetConfigDTO != null && TimeUnit.DAYS.equals(datasetConfigDTO.getTimeUnit())) {
        aggTimeGranularity = datasetConfigDTO.getTimeUnit().name();
        currentEnd = currentEnd - (currentEnd % DAY_MILLIS);
        currentStart = currentEnd - WEEK_MILLIS;
    }
    long baselineStart = currentStart - baselineOffset;
    long baselineEnd = currentEnd - baselineOffset;
    String timeZone = datasetConfigDTO.getTimezone();
    request.setBaselineStart(new DateTime(baselineStart, DateTimeZone.forID(timeZone)));
    request.setBaselineEnd(new DateTime(baselineEnd, DateTimeZone.forID(timeZone)));
    request.setCurrentStart(new DateTime(currentStart, DateTimeZone.forID(timeZone)));
    request.setCurrentEnd(new DateTime(currentEnd, DateTimeZone.forID(timeZone)));
    request.setTimeGranularity(Utils.getAggregationTimeGranularity(aggTimeGranularity, collection));
    request.setGroupByDimensions(dimensions);
    ContributorViewHandler handler = new ContributorViewHandler(queryCache);
    return handler.process(request);
}
Also used : DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) ContributorViewHandler(com.linkedin.thirdeye.dashboard.views.contributor.ContributorViewHandler) ContributorViewRequest(com.linkedin.thirdeye.dashboard.views.contributor.ContributorViewRequest) MetricExpression(com.linkedin.thirdeye.client.MetricExpression) DateTimeZone(org.joda.time.DateTimeZone) DateTime(org.joda.time.DateTime)

Example 34 with DatasetConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.

the class DetectionJobScheduler method run.

/**
   * Reads all active anomaly functions
   * For each function, finds all time periods for which detection needs to be run
   * Calls run anomaly function for all those periods, and updates detection status
   * {@inheritDoc}
   * @see java.lang.Runnable#run()
   */
public void run() {
    // read all anomaly functions
    LOG.info("Reading all anomaly functions");
    List<AnomalyFunctionDTO> anomalyFunctions = DAO_REGISTRY.getAnomalyFunctionDAO().findAllActiveFunctions();
    // for each active anomaly function
    for (AnomalyFunctionDTO anomalyFunction : anomalyFunctions) {
        try {
            LOG.info("Function: {}", anomalyFunction);
            long functionId = anomalyFunction.getId();
            String dataset = anomalyFunction.getCollection();
            DatasetConfigDTO datasetConfig = CACHE_REGISTRY.getDatasetConfigCache().get(dataset);
            DateTimeZone dateTimeZone = Utils.getDataTimeZone(dataset);
            DateTime currentDateTime = new DateTime(dateTimeZone);
            // find last entry into detectionStatus table, for this function
            DetectionStatusDTO lastEntryForFunction = DAO_REGISTRY.getDetectionStatusDAO().findLatestEntryForFunctionId(functionId);
            LOG.info("Function: {} Dataset: {} Last entry is {}", functionId, dataset, lastEntryForFunction);
            // calculate entries from last entry to current time
            Map<String, Long> newEntries = DetectionJobSchedulerUtils.getNewEntries(currentDateTime, lastEntryForFunction, anomalyFunction, datasetConfig, dateTimeZone);
            LOG.info("Function: {} Dataset: {} Creating {} new entries {}", functionId, dataset, newEntries.size(), newEntries);
            // create these entries
            for (Entry<String, Long> entry : newEntries.entrySet()) {
                DetectionStatusDTO detectionStatus = new DetectionStatusDTO();
                detectionStatus.setDataset(anomalyFunction.getCollection());
                detectionStatus.setFunctionId(functionId);
                detectionStatus.setDateToCheckInSDF(entry.getKey());
                detectionStatus.setDateToCheckInMS(entry.getValue());
                DAO_REGISTRY.getDetectionStatusDAO().save(detectionStatus);
            }
            // find all entries in the past 3 days, which are still isRun = false
            List<DetectionStatusDTO> entriesInLast3Days = DAO_REGISTRY.getDetectionStatusDAO().findAllInTimeRangeForFunctionAndDetectionRun(currentDateTime.minusDays(3).getMillis(), currentDateTime.getMillis(), functionId, false);
            Collections.sort(entriesInLast3Days);
            LOG.info("Function: {} Dataset: {} Entries in last 3 days {}", functionId, dataset, entriesInLast3Days);
            // for each entry, collect startTime and endTime
            List<Long> startTimes = new ArrayList<>();
            List<Long> endTimes = new ArrayList<>();
            List<DetectionStatusDTO> detectionStatusToUpdate = new ArrayList<>();
            for (DetectionStatusDTO detectionStatus : entriesInLast3Days) {
                try {
                    LOG.info("Function: {} Dataset: {} Entry : {}", functionId, dataset, detectionStatus);
                    long dateToCheck = detectionStatus.getDateToCheckInMS();
                    // check availability for monitoring window - delay
                    long endTime = dateToCheck - TimeUnit.MILLISECONDS.convert(anomalyFunction.getWindowDelay(), anomalyFunction.getWindowDelayUnit());
                    long startTime = endTime - TimeUnit.MILLISECONDS.convert(anomalyFunction.getWindowSize(), anomalyFunction.getWindowUnit());
                    LOG.info("Function: {} Dataset: {} Checking start:{} {} to end:{} {}", functionId, dataset, startTime, new DateTime(startTime, dateTimeZone), endTime, new DateTime(endTime, dateTimeZone));
                    boolean pass = checkIfDetectionRunCriteriaMet(startTime, endTime, datasetConfig, anomalyFunction);
                    if (pass) {
                        startTimes.add(startTime);
                        endTimes.add(endTime);
                        detectionStatusToUpdate.add(detectionStatus);
                    } else {
                        LOG.warn("Function: {} Dataset: {} Data incomplete for monitoring window {} ({}) to {} ({}), skipping anomaly detection", functionId, dataset, startTime, new DateTime(startTime), endTime, new DateTime(endTime));
                    // TODO: Send email to owners/dev team
                    }
                } catch (Exception e) {
                    LOG.error("Function: {} Dataset: {} Exception in preparing entry {}", functionId, dataset, detectionStatus, e);
                }
            }
            // If any time periods found, for which detection needs to be run
            runAnomalyFunctionAndUpdateDetectionStatus(startTimes, endTimes, anomalyFunction, detectionStatusToUpdate);
        } catch (Exception e) {
            LOG.error("Function: {} Dataset: {} Exception in running anomaly function {}", anomalyFunction.getId(), anomalyFunction.getCollection(), anomalyFunction, e);
        }
    }
}
Also used : ArrayList(java.util.ArrayList) DateTimeZone(org.joda.time.DateTimeZone) DateTime(org.joda.time.DateTime) SchedulerException(org.quartz.SchedulerException) ParseException(java.text.ParseException) ExecutionException(java.util.concurrent.ExecutionException) DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) AnomalyFunctionDTO(com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO) DetectionStatusDTO(com.linkedin.thirdeye.datalayer.dto.DetectionStatusDTO)

Example 35 with DatasetConfigDTO

use of com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO in project pinot by linkedin.

the class DetectionTaskRunner method setupTask.

private void setupTask(TaskInfo taskInfo, TaskContext taskContext) throws Exception {
    DetectionTaskInfo detectionTaskInfo = (DetectionTaskInfo) taskInfo;
    windowStarts = detectionTaskInfo.getWindowStartTime();
    windowEnds = detectionTaskInfo.getWindowEndTime();
    anomalyFunctionSpec = detectionTaskInfo.getAnomalyFunctionSpec();
    jobExecutionId = detectionTaskInfo.getJobExecutionId();
    anomalyFunctionFactory = taskContext.getAnomalyFunctionFactory();
    anomalyFunction = anomalyFunctionFactory.fromSpec(anomalyFunctionSpec);
    String dataset = anomalyFunctionSpec.getCollection();
    DatasetConfigDTO datasetConfig = DAO_REGISTRY.getDatasetConfigDAO().findByDataset(dataset);
    if (datasetConfig == null) {
        LOG.error("Dataset [" + dataset + "] is not found");
        throw new NullArgumentException("Dataset [" + dataset + "] is not found with function : " + anomalyFunctionSpec.toString());
    }
    collectionDimensions = datasetConfig.getDimensions();
    LOG.info("Running anomaly detection job with metricFunction: [{}], topic metric [{}], collection: [{}]", anomalyFunctionSpec.getFunctionName(), anomalyFunctionSpec.getTopicMetric(), anomalyFunctionSpec.getCollection());
}
Also used : DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) NullArgumentException(org.apache.commons.lang.NullArgumentException)

Aggregations

DatasetConfigDTO (com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO)54 TimeSpec (com.linkedin.thirdeye.api.TimeSpec)14 DateTime (org.joda.time.DateTime)14 ArrayList (java.util.ArrayList)13 Path (javax.ws.rs.Path)12 ExecutionException (java.util.concurrent.ExecutionException)11 GET (javax.ws.rs.GET)10 TimeGranularity (com.linkedin.thirdeye.api.TimeGranularity)9 AnomalyFunctionDTO (com.linkedin.thirdeye.datalayer.dto.AnomalyFunctionDTO)9 MetricConfigDTO (com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO)9 DateTimeZone (org.joda.time.DateTimeZone)9 Test (org.testng.annotations.Test)9 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)7 IOException (java.io.IOException)6 MetricExpression (com.linkedin.thirdeye.client.MetricExpression)5 ResultSetGroup (com.linkedin.pinot.client.ResultSetGroup)4 DetectionStatusDTO (com.linkedin.thirdeye.datalayer.dto.DetectionStatusDTO)4 JSONException (org.json.JSONException)4 DashboardConfigDTO (com.linkedin.thirdeye.datalayer.dto.DashboardConfigDTO)3 NullArgumentException (org.apache.commons.lang.NullArgumentException)3