Search in sources :

Example 11 with TimeSpec

use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.

the class DataCompletenessTaskUtilsTest method testGetBucketSizeForDataset.

@Test
public void testGetBucketSizeForDataset() throws Exception {
    String columnName = "Date";
    // DAYS bucket
    TimeGranularity timeGranularity = new TimeGranularity(1, TimeUnit.DAYS);
    String timeFormat = TimeSpec.SINCE_EPOCH_FORMAT;
    TimeSpec timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
    long bucketSize = DataCompletenessTaskUtils.getBucketSizeInMSForDataset(timeSpec);
    Assert.assertEquals(bucketSize, 24 * 60 * 60_000);
    // HOURS bucket
    timeGranularity = new TimeGranularity(1, TimeUnit.HOURS);
    timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
    bucketSize = DataCompletenessTaskUtils.getBucketSizeInMSForDataset(timeSpec);
    Assert.assertEquals(bucketSize, 60 * 60_000);
    // MINUTES returns 30 MINUTES bucket
    timeGranularity = new TimeGranularity(1, TimeUnit.MINUTES);
    timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
    bucketSize = DataCompletenessTaskUtils.getBucketSizeInMSForDataset(timeSpec);
    Assert.assertEquals(bucketSize, 30 * 60_000);
    // DEFAULT bucket
    timeGranularity = new TimeGranularity(1, TimeUnit.MILLISECONDS);
    timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
    bucketSize = DataCompletenessTaskUtils.getBucketSizeInMSForDataset(timeSpec);
    Assert.assertEquals(bucketSize, 60 * 60_000);
}
Also used : TimeGranularity(com.linkedin.thirdeye.api.TimeGranularity) TimeSpec(com.linkedin.thirdeye.api.TimeSpec) Test(org.testng.annotations.Test)

Example 12 with TimeSpec

use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.

the class DataCompletenessTaskUtilsTest method testGetBucketNameToTimeValuesMap.

@Test
public void testGetBucketNameToTimeValuesMap() {
    DateTimeZone zone = DateTimeZone.forID("America/Los_Angeles");
    // SDF
    String columnName = "Date";
    TimeGranularity timeGranularity = new TimeGranularity(1, TimeUnit.DAYS);
    String timeFormat = "SIMPLE_DATE_FORMAT:yyyyMMdd";
    TimeSpec timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
    // DAYS
    Map<String, Long> bucketNameToBucketValue = new HashMap<>();
    bucketNameToBucketValue.put("20170112", new DateTime(2017, 01, 12, 0, 0, zone).getMillis());
    bucketNameToBucketValue.put("20170113", new DateTime(2017, 01, 13, 0, 0, zone).getMillis());
    bucketNameToBucketValue.put("20170114", new DateTime(2017, 01, 14, 0, 0, zone).getMillis());
    Map<String, Long> expectedValues = new HashMap<>();
    expectedValues.put("20170112", 20170112L);
    expectedValues.put("20170113", 20170113L);
    expectedValues.put("20170114", 20170114L);
    ListMultimap<String, Long> bucketNameToTimeValuesMap = DataCompletenessTaskUtils.getBucketNameToTimeValuesMap(timeSpec, bucketNameToBucketValue);
    for (Entry<String, Long> entry : bucketNameToTimeValuesMap.entries()) {
        String bucketName = entry.getKey();
        Assert.assertEquals(entry.getValue(), expectedValues.get(bucketName));
    }
    // EPOCH
    zone = DateTimeZone.UTC;
    timeFormat = TimeSpec.SINCE_EPOCH_FORMAT;
    timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
    // HOURS
    timeGranularity = new TimeGranularity(1, TimeUnit.HOURS);
    timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
    bucketNameToBucketValue = new HashMap<>();
    bucketNameToBucketValue.put("2017011200", new DateTime(2017, 01, 12, 0, 0, zone).getMillis());
    bucketNameToBucketValue.put("2017011201", new DateTime(2017, 01, 12, 1, 0, zone).getMillis());
    bucketNameToBucketValue.put("2017011202", new DateTime(2017, 01, 12, 2, 0, zone).getMillis());
    expectedValues = new HashMap<>();
    // hours since epoch values
    expectedValues.put("2017011200", 412272L);
    expectedValues.put("2017011201", 412273L);
    expectedValues.put("2017011202", 412274L);
    bucketNameToTimeValuesMap = DataCompletenessTaskUtils.getBucketNameToTimeValuesMap(timeSpec, bucketNameToBucketValue);
    for (Entry<String, Long> entry : bucketNameToTimeValuesMap.entries()) {
        String bucketName = entry.getKey();
        Assert.assertEquals(entry.getValue(), expectedValues.get(bucketName));
    }
    // MINUTES
    timeGranularity = new TimeGranularity(10, TimeUnit.MINUTES);
    timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
    bucketNameToBucketValue = new HashMap<>();
    bucketNameToBucketValue.put("201701120000", new DateTime(2017, 01, 12, 0, 0, zone).getMillis());
    bucketNameToBucketValue.put("201701120030", new DateTime(2017, 01, 12, 0, 30, zone).getMillis());
    bucketNameToBucketValue.put("201701120100", new DateTime(2017, 01, 12, 1, 00, zone).getMillis());
    bucketNameToBucketValue.put("201701120130", new DateTime(2017, 01, 12, 1, 30, zone).getMillis());
    Map<String, List<Long>> expectedValuesList = new HashMap<>();
    // 10 minutes since epoch values
    expectedValuesList.put("201701120000", Lists.newArrayList(2473632L, 2473633L, 2473634L));
    expectedValuesList.put("201701120030", Lists.newArrayList(2473635L, 2473636L, 2473637L));
    expectedValuesList.put("201701120100", Lists.newArrayList(2473638L, 2473639L, 2473640L));
    expectedValuesList.put("201701120130", Lists.newArrayList(2473641L, 2473642L, 2473643L));
    bucketNameToTimeValuesMap = DataCompletenessTaskUtils.getBucketNameToTimeValuesMap(timeSpec, bucketNameToBucketValue);
    for (String bucketName : bucketNameToTimeValuesMap.keySet()) {
        List<Long> timeValues = bucketNameToTimeValuesMap.get(bucketName);
        Collections.sort(timeValues);
        Assert.assertEquals(timeValues, expectedValuesList.get(bucketName));
    }
}
Also used : HashMap(java.util.HashMap) DateTimeZone(org.joda.time.DateTimeZone) DateTime(org.joda.time.DateTime) TimeSpec(com.linkedin.thirdeye.api.TimeSpec) TimeGranularity(com.linkedin.thirdeye.api.TimeGranularity) List(java.util.List) Test(org.testng.annotations.Test)

Example 13 with TimeSpec

use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.

the class DataCompletenessTaskUtilsTest method testGetAdjustedStartForDataset.

@Test
public void testGetAdjustedStartForDataset() throws Exception {
    DateTimeZone zone = DateTimeZone.forID("America/Los_Angeles");
    DateTime dateTime1 = new DateTime(2017, 01, 12, 15, 46, zone);
    // SDF, DAYS
    long startTime = dateTime1.getMillis();
    String columnName = "Date";
    TimeGranularity timeGranularity = new TimeGranularity(1, TimeUnit.DAYS);
    String timeFormat = "SIMPLE_DATE_FORMAT:yyyyMMdd";
    TimeSpec timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
    long adjustedStartTime = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone);
    Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 0, 0, zone).getMillis());
    // EPOCH
    timeFormat = TimeSpec.SINCE_EPOCH_FORMAT;
    // HOURS
    zone = DateTimeZone.UTC;
    dateTime1 = new DateTime(2017, 01, 12, 15, 46, zone);
    startTime = dateTime1.getMillis();
    timeGranularity = new TimeGranularity(1, TimeUnit.HOURS);
    timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
    adjustedStartTime = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone);
    Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 0, zone).getMillis());
    // DEFAULT
    timeGranularity = new TimeGranularity(1, TimeUnit.MILLISECONDS);
    timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
    adjustedStartTime = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone);
    Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 0, zone).getMillis());
    // MINUTES
    timeGranularity = new TimeGranularity(5, TimeUnit.MINUTES);
    timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
    adjustedStartTime = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone);
    Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 30, zone).getMillis());
    DateTime dateTime2 = new DateTime(2017, 01, 12, 15, 00, zone);
    DateTime dateTime3 = new DateTime(2017, 01, 12, 15, 03, zone);
    startTime = dateTime2.getMillis();
    adjustedStartTime = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone);
    Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 0, zone).getMillis());
    startTime = dateTime3.getMillis();
    adjustedStartTime = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone);
    Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 0, zone).getMillis());
}
Also used : TimeGranularity(com.linkedin.thirdeye.api.TimeGranularity) DateTimeZone(org.joda.time.DateTimeZone) DateTime(org.joda.time.DateTime) TimeSpec(com.linkedin.thirdeye.api.TimeSpec) Test(org.testng.annotations.Test)

Example 14 with TimeSpec

use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.

the class PinotThirdEyeClient method execute.

@Override
public PinotThirdEyeResponse execute(ThirdEyeRequest request) throws Exception {
    DatasetConfigDTO datasetConfig = CACHE_REGISTRY_INSTANCE.getDatasetConfigCache().get(request.getCollection());
    TimeSpec dataTimeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
    List<MetricFunction> metricFunctions = request.getMetricFunctions();
    List<String> dimensionNames = datasetConfig.getDimensions();
    List<ResultSetGroup> resultSetGroups = new ArrayList<>();
    // By default, query only offline, unless dataset has been marked as realtime
    String tableName = ThirdEyeUtils.computeTableName(request.getCollection());
    if (datasetConfig.isMetricAsDimension()) {
        List<String> pqls = PqlUtils.getMetricAsDimensionPqls(request, dataTimeSpec, datasetConfig);
        for (String pql : pqls) {
            LOG.debug("PQL isMetricAsDimension : {}", pql);
            ResultSetGroup result = CACHE_REGISTRY_INSTANCE.getResultSetGroupCache().get(new PinotQuery(pql, tableName));
            resultSetGroups.add(result);
        }
    } else {
        String sql = PqlUtils.getPql(request, dataTimeSpec);
        LOG.debug("PQL: {}", sql);
        ResultSetGroup result = CACHE_REGISTRY_INSTANCE.getResultSetGroupCache().get(new PinotQuery(sql, tableName));
        resultSetGroups.add(result);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Result for: {} {}", sql, format(result));
        }
    }
    List<ResultSet> resultSets = getResultSets(resultSetGroups);
    List<String[]> resultRows = parseResultSets(request, resultSets, metricFunctions, dimensionNames, datasetConfig);
    PinotThirdEyeResponse resp = new PinotThirdEyeResponse(request, resultRows, dataTimeSpec);
    return resp;
}
Also used : ArrayList(java.util.ArrayList) ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) TimeSpec(com.linkedin.thirdeye.api.TimeSpec) DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) MetricFunction(com.linkedin.thirdeye.client.MetricFunction) ResultSet(com.linkedin.pinot.client.ResultSet)

Example 15 with TimeSpec

use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.

the class CollectionMaxDataTimeCacheLoader method load.

@Override
public Long load(String collection) throws Exception {
    LOGGER.info("Loading maxDataTime cache {}", collection);
    long maxTime = 0;
    try {
        DatasetConfigDTO datasetConfig = datasetConfigDAO.findByDataset(collection);
        // By default, query only offline, unless dataset has been marked as realtime
        String tableName = ThirdEyeUtils.computeTableName(collection);
        TimeSpec timeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
        long prevMaxDataTime = getPrevMaxDataTime(collection, timeSpec);
        String maxTimePql = String.format(COLLECTION_MAX_TIME_QUERY_TEMPLATE, timeSpec.getColumnName(), tableName, timeSpec.getColumnName(), prevMaxDataTime);
        PinotQuery maxTimePinotQuery = new PinotQuery(maxTimePql, tableName);
        resultSetGroupCache.refresh(maxTimePinotQuery);
        ResultSetGroup resultSetGroup = resultSetGroupCache.get(maxTimePinotQuery);
        if (resultSetGroup.getResultSetCount() == 0 || resultSetGroup.getResultSet(0).getRowCount() == 0) {
            LOGGER.info("resultSetGroup is Empty for collection {} is {}", tableName, resultSetGroup);
            this.collectionToPrevMaxDataTimeMap.remove(collection);
        } else {
            long endTime = new Double(resultSetGroup.getResultSet(0).getDouble(0)).longValue();
            this.collectionToPrevMaxDataTimeMap.put(collection, endTime);
            // endTime + 1 to make sure we cover the time range of that time value.
            String timeFormat = timeSpec.getFormat();
            if (StringUtils.isBlank(timeFormat) || TimeSpec.SINCE_EPOCH_FORMAT.equals(timeFormat)) {
                maxTime = timeSpec.getDataGranularity().toMillis(endTime + 1) - 1;
            } else {
                DateTimeFormatter inputDataDateTimeFormatter = DateTimeFormat.forPattern(timeFormat).withZone(Utils.getDataTimeZone(collection));
                maxTime = DateTime.parse(String.valueOf(endTime), inputDataDateTimeFormatter).getMillis();
            }
        }
    } catch (Exception e) {
        LOGGER.warn("Exception getting maxTime from collection: {}", collection, e);
        this.collectionToPrevMaxDataTimeMap.remove(collection);
    }
    if (maxTime <= 0) {
        maxTime = System.currentTimeMillis();
    }
    return maxTime;
}
Also used : DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) PinotQuery(com.linkedin.thirdeye.client.pinot.PinotQuery) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) TimeSpec(com.linkedin.thirdeye.api.TimeSpec)

Aggregations

TimeSpec (com.linkedin.thirdeye.api.TimeSpec)26 DatasetConfigDTO (com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO)14 TimeGranularity (com.linkedin.thirdeye.api.TimeGranularity)13 DateTime (org.joda.time.DateTime)11 DateTimeZone (org.joda.time.DateTimeZone)8 TimeUnit (java.util.concurrent.TimeUnit)6 DateTimeFormatter (org.joda.time.format.DateTimeFormatter)6 ExecutionException (java.util.concurrent.ExecutionException)5 MetricExpression (com.linkedin.thirdeye.client.MetricExpression)4 ArrayList (java.util.ArrayList)4 Path (javax.ws.rs.Path)4 Test (org.testng.annotations.Test)4 IOException (java.io.IOException)3 JSONException (org.json.JSONException)3 ResultSet (com.linkedin.pinot.client.ResultSet)2 ResultSetGroup (com.linkedin.pinot.client.ResultSetGroup)2 TimeOnTimeComparisonRequest (com.linkedin.thirdeye.client.comparison.TimeOnTimeComparisonRequest)2 TimeSeriesHandler (com.linkedin.thirdeye.client.timeseries.TimeSeriesHandler)2 TimeSeriesRequest (com.linkedin.thirdeye.client.timeseries.TimeSeriesRequest)2 TimeSeriesResponse (com.linkedin.thirdeye.client.timeseries.TimeSeriesResponse)2