use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.
the class DataCompletenessTaskUtilsTest method testGetBucketSizeForDataset.
@Test
public void testGetBucketSizeForDataset() throws Exception {
String columnName = "Date";
// DAYS bucket
TimeGranularity timeGranularity = new TimeGranularity(1, TimeUnit.DAYS);
String timeFormat = TimeSpec.SINCE_EPOCH_FORMAT;
TimeSpec timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
long bucketSize = DataCompletenessTaskUtils.getBucketSizeInMSForDataset(timeSpec);
Assert.assertEquals(bucketSize, 24 * 60 * 60_000);
// HOURS bucket
timeGranularity = new TimeGranularity(1, TimeUnit.HOURS);
timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
bucketSize = DataCompletenessTaskUtils.getBucketSizeInMSForDataset(timeSpec);
Assert.assertEquals(bucketSize, 60 * 60_000);
// MINUTES returns 30 MINUTES bucket
timeGranularity = new TimeGranularity(1, TimeUnit.MINUTES);
timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
bucketSize = DataCompletenessTaskUtils.getBucketSizeInMSForDataset(timeSpec);
Assert.assertEquals(bucketSize, 30 * 60_000);
// DEFAULT bucket
timeGranularity = new TimeGranularity(1, TimeUnit.MILLISECONDS);
timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
bucketSize = DataCompletenessTaskUtils.getBucketSizeInMSForDataset(timeSpec);
Assert.assertEquals(bucketSize, 60 * 60_000);
}
use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.
the class DataCompletenessTaskUtilsTest method testGetBucketNameToTimeValuesMap.
@Test
public void testGetBucketNameToTimeValuesMap() {
DateTimeZone zone = DateTimeZone.forID("America/Los_Angeles");
// SDF
String columnName = "Date";
TimeGranularity timeGranularity = new TimeGranularity(1, TimeUnit.DAYS);
String timeFormat = "SIMPLE_DATE_FORMAT:yyyyMMdd";
TimeSpec timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
// DAYS
Map<String, Long> bucketNameToBucketValue = new HashMap<>();
bucketNameToBucketValue.put("20170112", new DateTime(2017, 01, 12, 0, 0, zone).getMillis());
bucketNameToBucketValue.put("20170113", new DateTime(2017, 01, 13, 0, 0, zone).getMillis());
bucketNameToBucketValue.put("20170114", new DateTime(2017, 01, 14, 0, 0, zone).getMillis());
Map<String, Long> expectedValues = new HashMap<>();
expectedValues.put("20170112", 20170112L);
expectedValues.put("20170113", 20170113L);
expectedValues.put("20170114", 20170114L);
ListMultimap<String, Long> bucketNameToTimeValuesMap = DataCompletenessTaskUtils.getBucketNameToTimeValuesMap(timeSpec, bucketNameToBucketValue);
for (Entry<String, Long> entry : bucketNameToTimeValuesMap.entries()) {
String bucketName = entry.getKey();
Assert.assertEquals(entry.getValue(), expectedValues.get(bucketName));
}
// EPOCH
zone = DateTimeZone.UTC;
timeFormat = TimeSpec.SINCE_EPOCH_FORMAT;
timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
// HOURS
timeGranularity = new TimeGranularity(1, TimeUnit.HOURS);
timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
bucketNameToBucketValue = new HashMap<>();
bucketNameToBucketValue.put("2017011200", new DateTime(2017, 01, 12, 0, 0, zone).getMillis());
bucketNameToBucketValue.put("2017011201", new DateTime(2017, 01, 12, 1, 0, zone).getMillis());
bucketNameToBucketValue.put("2017011202", new DateTime(2017, 01, 12, 2, 0, zone).getMillis());
expectedValues = new HashMap<>();
// hours since epoch values
expectedValues.put("2017011200", 412272L);
expectedValues.put("2017011201", 412273L);
expectedValues.put("2017011202", 412274L);
bucketNameToTimeValuesMap = DataCompletenessTaskUtils.getBucketNameToTimeValuesMap(timeSpec, bucketNameToBucketValue);
for (Entry<String, Long> entry : bucketNameToTimeValuesMap.entries()) {
String bucketName = entry.getKey();
Assert.assertEquals(entry.getValue(), expectedValues.get(bucketName));
}
// MINUTES
timeGranularity = new TimeGranularity(10, TimeUnit.MINUTES);
timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
bucketNameToBucketValue = new HashMap<>();
bucketNameToBucketValue.put("201701120000", new DateTime(2017, 01, 12, 0, 0, zone).getMillis());
bucketNameToBucketValue.put("201701120030", new DateTime(2017, 01, 12, 0, 30, zone).getMillis());
bucketNameToBucketValue.put("201701120100", new DateTime(2017, 01, 12, 1, 00, zone).getMillis());
bucketNameToBucketValue.put("201701120130", new DateTime(2017, 01, 12, 1, 30, zone).getMillis());
Map<String, List<Long>> expectedValuesList = new HashMap<>();
// 10 minutes since epoch values
expectedValuesList.put("201701120000", Lists.newArrayList(2473632L, 2473633L, 2473634L));
expectedValuesList.put("201701120030", Lists.newArrayList(2473635L, 2473636L, 2473637L));
expectedValuesList.put("201701120100", Lists.newArrayList(2473638L, 2473639L, 2473640L));
expectedValuesList.put("201701120130", Lists.newArrayList(2473641L, 2473642L, 2473643L));
bucketNameToTimeValuesMap = DataCompletenessTaskUtils.getBucketNameToTimeValuesMap(timeSpec, bucketNameToBucketValue);
for (String bucketName : bucketNameToTimeValuesMap.keySet()) {
List<Long> timeValues = bucketNameToTimeValuesMap.get(bucketName);
Collections.sort(timeValues);
Assert.assertEquals(timeValues, expectedValuesList.get(bucketName));
}
}
use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.
the class DataCompletenessTaskUtilsTest method testGetAdjustedStartForDataset.
@Test
public void testGetAdjustedStartForDataset() throws Exception {
DateTimeZone zone = DateTimeZone.forID("America/Los_Angeles");
DateTime dateTime1 = new DateTime(2017, 01, 12, 15, 46, zone);
// SDF, DAYS
long startTime = dateTime1.getMillis();
String columnName = "Date";
TimeGranularity timeGranularity = new TimeGranularity(1, TimeUnit.DAYS);
String timeFormat = "SIMPLE_DATE_FORMAT:yyyyMMdd";
TimeSpec timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
long adjustedStartTime = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone);
Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 0, 0, zone).getMillis());
// EPOCH
timeFormat = TimeSpec.SINCE_EPOCH_FORMAT;
// HOURS
zone = DateTimeZone.UTC;
dateTime1 = new DateTime(2017, 01, 12, 15, 46, zone);
startTime = dateTime1.getMillis();
timeGranularity = new TimeGranularity(1, TimeUnit.HOURS);
timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
adjustedStartTime = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone);
Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 0, zone).getMillis());
// DEFAULT
timeGranularity = new TimeGranularity(1, TimeUnit.MILLISECONDS);
timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
adjustedStartTime = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone);
Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 0, zone).getMillis());
// MINUTES
timeGranularity = new TimeGranularity(5, TimeUnit.MINUTES);
timeSpec = new TimeSpec(columnName, timeGranularity, timeFormat);
adjustedStartTime = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone);
Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 30, zone).getMillis());
DateTime dateTime2 = new DateTime(2017, 01, 12, 15, 00, zone);
DateTime dateTime3 = new DateTime(2017, 01, 12, 15, 03, zone);
startTime = dateTime2.getMillis();
adjustedStartTime = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone);
Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 0, zone).getMillis());
startTime = dateTime3.getMillis();
adjustedStartTime = DataCompletenessTaskUtils.getAdjustedTimeForDataset(timeSpec, startTime, zone);
Assert.assertEquals(adjustedStartTime, new DateTime(2017, 01, 12, 15, 0, zone).getMillis());
}
use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.
the class PinotThirdEyeClient method execute.
@Override
public PinotThirdEyeResponse execute(ThirdEyeRequest request) throws Exception {
DatasetConfigDTO datasetConfig = CACHE_REGISTRY_INSTANCE.getDatasetConfigCache().get(request.getCollection());
TimeSpec dataTimeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
List<MetricFunction> metricFunctions = request.getMetricFunctions();
List<String> dimensionNames = datasetConfig.getDimensions();
List<ResultSetGroup> resultSetGroups = new ArrayList<>();
// By default, query only offline, unless dataset has been marked as realtime
String tableName = ThirdEyeUtils.computeTableName(request.getCollection());
if (datasetConfig.isMetricAsDimension()) {
List<String> pqls = PqlUtils.getMetricAsDimensionPqls(request, dataTimeSpec, datasetConfig);
for (String pql : pqls) {
LOG.debug("PQL isMetricAsDimension : {}", pql);
ResultSetGroup result = CACHE_REGISTRY_INSTANCE.getResultSetGroupCache().get(new PinotQuery(pql, tableName));
resultSetGroups.add(result);
}
} else {
String sql = PqlUtils.getPql(request, dataTimeSpec);
LOG.debug("PQL: {}", sql);
ResultSetGroup result = CACHE_REGISTRY_INSTANCE.getResultSetGroupCache().get(new PinotQuery(sql, tableName));
resultSetGroups.add(result);
if (LOG.isDebugEnabled()) {
LOG.debug("Result for: {} {}", sql, format(result));
}
}
List<ResultSet> resultSets = getResultSets(resultSetGroups);
List<String[]> resultRows = parseResultSets(request, resultSets, metricFunctions, dimensionNames, datasetConfig);
PinotThirdEyeResponse resp = new PinotThirdEyeResponse(request, resultRows, dataTimeSpec);
return resp;
}
use of com.linkedin.thirdeye.api.TimeSpec in project pinot by linkedin.
the class CollectionMaxDataTimeCacheLoader method load.
@Override
public Long load(String collection) throws Exception {
LOGGER.info("Loading maxDataTime cache {}", collection);
long maxTime = 0;
try {
DatasetConfigDTO datasetConfig = datasetConfigDAO.findByDataset(collection);
// By default, query only offline, unless dataset has been marked as realtime
String tableName = ThirdEyeUtils.computeTableName(collection);
TimeSpec timeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
long prevMaxDataTime = getPrevMaxDataTime(collection, timeSpec);
String maxTimePql = String.format(COLLECTION_MAX_TIME_QUERY_TEMPLATE, timeSpec.getColumnName(), tableName, timeSpec.getColumnName(), prevMaxDataTime);
PinotQuery maxTimePinotQuery = new PinotQuery(maxTimePql, tableName);
resultSetGroupCache.refresh(maxTimePinotQuery);
ResultSetGroup resultSetGroup = resultSetGroupCache.get(maxTimePinotQuery);
if (resultSetGroup.getResultSetCount() == 0 || resultSetGroup.getResultSet(0).getRowCount() == 0) {
LOGGER.info("resultSetGroup is Empty for collection {} is {}", tableName, resultSetGroup);
this.collectionToPrevMaxDataTimeMap.remove(collection);
} else {
long endTime = new Double(resultSetGroup.getResultSet(0).getDouble(0)).longValue();
this.collectionToPrevMaxDataTimeMap.put(collection, endTime);
// endTime + 1 to make sure we cover the time range of that time value.
String timeFormat = timeSpec.getFormat();
if (StringUtils.isBlank(timeFormat) || TimeSpec.SINCE_EPOCH_FORMAT.equals(timeFormat)) {
maxTime = timeSpec.getDataGranularity().toMillis(endTime + 1) - 1;
} else {
DateTimeFormatter inputDataDateTimeFormatter = DateTimeFormat.forPattern(timeFormat).withZone(Utils.getDataTimeZone(collection));
maxTime = DateTime.parse(String.valueOf(endTime), inputDataDateTimeFormatter).getMillis();
}
}
} catch (Exception e) {
LOGGER.warn("Exception getting maxTime from collection: {}", collection, e);
this.collectionToPrevMaxDataTimeMap.remove(collection);
}
if (maxTime <= 0) {
maxTime = System.currentTimeMillis();
}
return maxTime;
}
Aggregations