Search in sources :

Example 6 with ResultSetGroup

use of com.linkedin.pinot.client.ResultSetGroup in project pinot by linkedin.

the class CollectionMaxDataTimeCacheLoader method load.

@Override
public Long load(String collection) throws Exception {
    LOGGER.info("Loading maxDataTime cache {}", collection);
    long maxTime = 0;
    try {
        DatasetConfigDTO datasetConfig = datasetConfigDAO.findByDataset(collection);
        // By default, query only offline, unless dataset has been marked as realtime
        String tableName = ThirdEyeUtils.computeTableName(collection);
        TimeSpec timeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
        long prevMaxDataTime = getPrevMaxDataTime(collection, timeSpec);
        String maxTimePql = String.format(COLLECTION_MAX_TIME_QUERY_TEMPLATE, timeSpec.getColumnName(), tableName, timeSpec.getColumnName(), prevMaxDataTime);
        PinotQuery maxTimePinotQuery = new PinotQuery(maxTimePql, tableName);
        resultSetGroupCache.refresh(maxTimePinotQuery);
        ResultSetGroup resultSetGroup = resultSetGroupCache.get(maxTimePinotQuery);
        if (resultSetGroup.getResultSetCount() == 0 || resultSetGroup.getResultSet(0).getRowCount() == 0) {
            LOGGER.info("resultSetGroup is Empty for collection {} is {}", tableName, resultSetGroup);
            this.collectionToPrevMaxDataTimeMap.remove(collection);
        } else {
            long endTime = new Double(resultSetGroup.getResultSet(0).getDouble(0)).longValue();
            this.collectionToPrevMaxDataTimeMap.put(collection, endTime);
            // endTime + 1 to make sure we cover the time range of that time value.
            String timeFormat = timeSpec.getFormat();
            if (StringUtils.isBlank(timeFormat) || TimeSpec.SINCE_EPOCH_FORMAT.equals(timeFormat)) {
                maxTime = timeSpec.getDataGranularity().toMillis(endTime + 1) - 1;
            } else {
                DateTimeFormatter inputDataDateTimeFormatter = DateTimeFormat.forPattern(timeFormat).withZone(Utils.getDataTimeZone(collection));
                maxTime = DateTime.parse(String.valueOf(endTime), inputDataDateTimeFormatter).getMillis();
            }
        }
    } catch (Exception e) {
        LOGGER.warn("Exception getting maxTime from collection: {}", collection, e);
        this.collectionToPrevMaxDataTimeMap.remove(collection);
    }
    if (maxTime <= 0) {
        maxTime = System.currentTimeMillis();
    }
    return maxTime;
}
Also used : DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) PinotQuery(com.linkedin.thirdeye.client.pinot.PinotQuery) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) TimeSpec(com.linkedin.thirdeye.api.TimeSpec)

Example 7 with ResultSetGroup

use of com.linkedin.pinot.client.ResultSetGroup in project pinot by linkedin.

the class DataCompletenessTaskUtils method getBucketNameToCountStarMap.

private static Map<String, Long> getBucketNameToCountStarMap(String dataset, TimeSpec timeSpec, ListMultimap<String, Long> bucketNameToTimeValues) {
    Map<String, Long> bucketNameToCountStar = new HashMap<>();
    // generate request
    StringBuilder sb = new StringBuilder();
    String delimiter = "";
    for (Long timeValue : bucketNameToTimeValues.values()) {
        sb.append(delimiter);
        delimiter = " OR ";
        sb.append(String.format("%s='%s'", timeSpec.getColumnName(), timeValue));
    }
    long top = bucketNameToTimeValues.values().size();
    String pql = String.format("select count(*) from %s where %s group by %s top %s", dataset, sb.toString(), timeSpec.getColumnName(), top);
    Map<Long, Long> timeValueToCount = new HashMap<>();
    try {
        ResultSetGroup resultSetGroup = CACHE_REGISTRY.getResultSetGroupCache().get(new PinotQuery(pql, dataset));
        if (resultSetGroup == null || resultSetGroup.getResultSetCount() <= 0) {
            return bucketNameToCountStar;
        }
        ResultSet resultSet = resultSetGroup.getResultSet(0);
        for (int i = 0; i < resultSet.getRowCount(); i++) {
            Long timeValue = Long.valueOf(resultSet.getGroupKeyString(i, 0));
            Long count = resultSet.getLong(i, 0);
            timeValueToCount.put(timeValue, count);
        }
    } catch (ExecutionException e) {
        LOG.error("Exception in getting count *. PQL:{}", pql, e);
    }
    // parse response to get counts
    for (String bucketName : bucketNameToTimeValues.keySet()) {
        List<Long> timeValues = bucketNameToTimeValues.get(bucketName);
        Long sumOfCountForBucket = 0L;
        for (Long timeValue : timeValues) {
            sumOfCountForBucket = sumOfCountForBucket + timeValueToCount.getOrDefault(timeValue, 0L);
        }
        bucketNameToCountStar.put(bucketName, sumOfCountForBucket);
    }
    return bucketNameToCountStar;
}
Also used : HashMap(java.util.HashMap) ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) PinotQuery(com.linkedin.thirdeye.client.pinot.PinotQuery) ResultSet(com.linkedin.pinot.client.ResultSet) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

ResultSetGroup (com.linkedin.pinot.client.ResultSetGroup)7 PinotQuery (com.linkedin.thirdeye.client.pinot.PinotQuery)5 ResultSet (com.linkedin.pinot.client.ResultSet)4 DatasetConfigDTO (com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO)4 TimeSpec (com.linkedin.thirdeye.api.TimeSpec)2 MetricDataset (com.linkedin.thirdeye.client.cache.MetricDataset)2 QueryCache (com.linkedin.thirdeye.client.cache.QueryCache)2 MetricConfigDTO (com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO)2 ArrayList (java.util.ArrayList)2 CacheBuilder (com.google.common.cache.CacheBuilder)1 LoadingCache (com.google.common.cache.LoadingCache)1 RemovalListener (com.google.common.cache.RemovalListener)1 RemovalNotification (com.google.common.cache.RemovalNotification)1 Connection (com.linkedin.pinot.client.Connection)1 PinotClientException (com.linkedin.pinot.client.PinotClientException)1 ThirdEyeAnomalyConfiguration (com.linkedin.thirdeye.anomaly.ThirdEyeAnomalyConfiguration)1 MonitorConfiguration (com.linkedin.thirdeye.anomaly.monitor.MonitorConfiguration)1 TimeGranularity (com.linkedin.thirdeye.api.TimeGranularity)1 MetricFunction (com.linkedin.thirdeye.client.MetricFunction)1 ThirdEyeCacheRegistry (com.linkedin.thirdeye.client.ThirdEyeCacheRegistry)1