Search in sources :

Example 1 with PinotQuery

use of com.linkedin.thirdeye.client.pinot.PinotQuery in project pinot by linkedin.

the class ThirdEyeCacheRegistry method initCaches.

private static void initCaches(ThirdEyeConfiguration config) {
    ThirdEyeCacheRegistry cacheRegistry = ThirdEyeCacheRegistry.getInstance();
    RemovalListener<PinotQuery, ResultSetGroup> listener = new RemovalListener<PinotQuery, ResultSetGroup>() {

        @Override
        public void onRemoval(RemovalNotification<PinotQuery, ResultSetGroup> notification) {
            LOGGER.info("Expired {}", notification.getKey().getPql());
        }
    };
    // ResultSetGroup Cache. The size of this cache is limited by the total number of buckets in all ResultSetGroup.
    // We estimate that 1 bucket (including overhead) consumes 1KB and this cache is allowed to use up to 50% of max
    // heap space.
    long maxBucketNumber = getApproximateMaxBucketNumber(DEFAULT_HEAP_PERCENTAGE_FOR_RESULTSETGROUP_CACHE);
    LoadingCache<PinotQuery, ResultSetGroup> resultSetGroupCache = CacheBuilder.newBuilder().removalListener(listener).expireAfterAccess(1, TimeUnit.HOURS).maximumWeight(maxBucketNumber).weigher((pinotQuery, resultSetGroup) -> {
        int resultSetCount = resultSetGroup.getResultSetCount();
        int weight = 0;
        for (int idx = 0; idx < resultSetCount; ++idx) {
            com.linkedin.pinot.client.ResultSet resultSet = resultSetGroup.getResultSet(idx);
            weight += (resultSet.getColumnCount() * resultSet.getRowCount());
        }
        return weight;
    }).build(new ResultSetGroupCacheLoader(pinotThirdeyeClientConfig));
    cacheRegistry.registerResultSetGroupCache(resultSetGroupCache);
    LOGGER.info("Max bucket number for ResultSetGroup cache is set to {}", maxBucketNumber);
    // CollectionMaxDataTime Cache
    LoadingCache<String, Long> collectionMaxDataTimeCache = CacheBuilder.newBuilder().refreshAfterWrite(5, TimeUnit.MINUTES).build(new CollectionMaxDataTimeCacheLoader(resultSetGroupCache, datasetConfigDAO));
    cacheRegistry.registerCollectionMaxDataTimeCache(collectionMaxDataTimeCache);
    // Query Cache
    QueryCache queryCache = new QueryCache(thirdEyeClient, Executors.newFixedThreadPool(10));
    cacheRegistry.registerQueryCache(queryCache);
    // Dimension Filter cache
    LoadingCache<String, String> dimensionFiltersCache = CacheBuilder.newBuilder().build(new DimensionFiltersCacheLoader(cacheRegistry.getQueryCache()));
    cacheRegistry.registerDimensionFiltersCache(dimensionFiltersCache);
    // Dashboards cache
    LoadingCache<String, String> dashboardsCache = CacheBuilder.newBuilder().build(new DashboardsCacheLoader(dashboardConfigDAO));
    cacheRegistry.registerDashboardsCache(dashboardsCache);
    // Collections cache
    CollectionsCache collectionsCache = new CollectionsCache(datasetConfigDAO, config);
    cacheRegistry.registerCollectionsCache(collectionsCache);
    // DatasetConfig cache
    LoadingCache<String, DatasetConfigDTO> datasetConfigCache = CacheBuilder.newBuilder().build(new DatasetConfigCacheLoader(datasetConfigDAO));
    cacheRegistry.registerDatasetConfigCache(datasetConfigCache);
    // MetricConfig cache
    LoadingCache<MetricDataset, MetricConfigDTO> metricConfigCache = CacheBuilder.newBuilder().build(new MetricConfigCacheLoader(metricConfigDAO));
    cacheRegistry.registerMetricConfigCache(metricConfigCache);
    // DashboardConfigs cache
    LoadingCache<String, List<DashboardConfigDTO>> dashboardConfigsCache = CacheBuilder.newBuilder().build(new DashboardConfigCacheLoader(dashboardConfigDAO));
    cacheRegistry.registerDashboardConfigsCache(dashboardConfigsCache);
}
Also used : ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) MetricConfigDTO(com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO) LoadingCache(com.google.common.cache.LoadingCache) DimensionFiltersCacheLoader(com.linkedin.thirdeye.client.cache.DimensionFiltersCacheLoader) PinotQuery(com.linkedin.thirdeye.client.pinot.PinotQuery) DashboardConfigManager(com.linkedin.thirdeye.datalayer.bao.DashboardConfigManager) LoggerFactory(org.slf4j.LoggerFactory) ThirdEyeConfiguration(com.linkedin.thirdeye.common.ThirdEyeConfiguration) DatasetConfigManager(com.linkedin.thirdeye.datalayer.bao.DatasetConfigManager) MetricDataset(com.linkedin.thirdeye.client.cache.MetricDataset) MetricConfigManager(com.linkedin.thirdeye.datalayer.bao.MetricConfigManager) PinotThirdEyeClient(com.linkedin.thirdeye.client.pinot.PinotThirdEyeClient) QueryCache(com.linkedin.thirdeye.client.cache.QueryCache) CollectionMaxDataTimeCacheLoader(com.linkedin.thirdeye.client.cache.CollectionMaxDataTimeCacheLoader) MetricConfigCacheLoader(com.linkedin.thirdeye.client.cache.MetricConfigCacheLoader) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) CacheResource(com.linkedin.thirdeye.dashboard.resources.CacheResource) DashboardConfigCacheLoader(com.linkedin.thirdeye.client.cache.DashboardConfigCacheLoader) RemovalNotification(com.google.common.cache.RemovalNotification) CollectionsCache(com.linkedin.thirdeye.client.cache.CollectionsCache) Logger(org.slf4j.Logger) PinotThirdEyeClientConfig(com.linkedin.thirdeye.client.pinot.PinotThirdEyeClientConfig) Executors(java.util.concurrent.Executors) TimeUnit(java.util.concurrent.TimeUnit) DatasetConfigCacheLoader(com.linkedin.thirdeye.client.cache.DatasetConfigCacheLoader) List(java.util.List) DashboardsCacheLoader(com.linkedin.thirdeye.client.cache.DashboardsCacheLoader) ResultSetGroupCacheLoader(com.linkedin.thirdeye.client.cache.ResultSetGroupCacheLoader) DashboardConfigDTO(com.linkedin.thirdeye.datalayer.dto.DashboardConfigDTO) RemovalListener(com.google.common.cache.RemovalListener) CacheBuilder(com.google.common.cache.CacheBuilder) DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) ResultSetGroupCacheLoader(com.linkedin.thirdeye.client.cache.ResultSetGroupCacheLoader) QueryCache(com.linkedin.thirdeye.client.cache.QueryCache) DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) DatasetConfigCacheLoader(com.linkedin.thirdeye.client.cache.DatasetConfigCacheLoader) DimensionFiltersCacheLoader(com.linkedin.thirdeye.client.cache.DimensionFiltersCacheLoader) List(java.util.List) MetricConfigDTO(com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO) DashboardConfigCacheLoader(com.linkedin.thirdeye.client.cache.DashboardConfigCacheLoader) RemovalListener(com.google.common.cache.RemovalListener) ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) DashboardsCacheLoader(com.linkedin.thirdeye.client.cache.DashboardsCacheLoader) MetricDataset(com.linkedin.thirdeye.client.cache.MetricDataset) CollectionMaxDataTimeCacheLoader(com.linkedin.thirdeye.client.cache.CollectionMaxDataTimeCacheLoader) PinotQuery(com.linkedin.thirdeye.client.pinot.PinotQuery) MetricConfigCacheLoader(com.linkedin.thirdeye.client.cache.MetricConfigCacheLoader) RemovalNotification(com.google.common.cache.RemovalNotification) CollectionsCache(com.linkedin.thirdeye.client.cache.CollectionsCache)

Example 2 with PinotQuery

use of com.linkedin.thirdeye.client.pinot.PinotQuery in project pinot by linkedin.

the class AutoLoadPinotMetricsService method fetchMetricAsADimensionMetrics.

private List<String> fetchMetricAsADimensionMetrics(String dataset, String metricNamesColumn) {
    List<String> distinctMetricNames = new ArrayList<>();
    ThirdEyeCacheRegistry CACHE_REGISTRY = ThirdEyeCacheRegistry.getInstance();
    String sql = String.format("select count(*) from %s group by %s top 10", dataset, metricNamesColumn);
    try {
        ResultSetGroup result = CACHE_REGISTRY.getResultSetGroupCache().get(new PinotQuery(sql, dataset));
        ResultSet resultSet = result.getResultSet(0);
        int rowCount = resultSet.getRowCount();
        for (int i = 0; i < rowCount; i++) {
            String dimensionName = resultSet.getGroupKeyString(i, 0);
            distinctMetricNames.add(dimensionName);
        }
        LOG.info("Distinct Metrics {}", distinctMetricNames);
    } catch (Exception e) {
        LOG.error("Exception in fetching metrics from pinot", e);
    }
    return distinctMetricNames;
}
Also used : ThirdEyeCacheRegistry(com.linkedin.thirdeye.client.ThirdEyeCacheRegistry) PinotQuery(com.linkedin.thirdeye.client.pinot.PinotQuery) ArrayList(java.util.ArrayList) ResultSet(com.linkedin.pinot.client.ResultSet) ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) IOException(java.io.IOException)

Example 3 with PinotQuery

use of com.linkedin.thirdeye.client.pinot.PinotQuery in project pinot by linkedin.

the class AnomalyApplicationEndToEndTest method setup.

private void setup() throws Exception {
    // Mock query cache
    ThirdEyeClient mockThirdeyeClient = Mockito.mock(ThirdEyeClient.class);
    Mockito.when(mockThirdeyeClient.execute(Matchers.any(ThirdEyeRequest.class))).thenAnswer(new Answer<ThirdEyeResponse>() {

        @Override
        public ThirdEyeResponse answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            ThirdEyeRequest request = (ThirdEyeRequest) args[0];
            ThirdEyeResponse response = getMockResponse(request);
            return response;
        }
    });
    QueryCache mockQueryCache = new QueryCache(mockThirdeyeClient, Executors.newFixedThreadPool(10));
    cacheRegistry.registerQueryCache(mockQueryCache);
    MetricConfigDTO metricConfig = getTestMetricConfig(collection, metric, 1L);
    // create metric config in cache
    LoadingCache<MetricDataset, MetricConfigDTO> mockMetricConfigCache = Mockito.mock(LoadingCache.class);
    Mockito.when(mockMetricConfigCache.get(new MetricDataset(metric, collection))).thenReturn(metricConfig);
    cacheRegistry.registerMetricConfigCache(mockMetricConfigCache);
    // create dataset config in cache
    LoadingCache<String, DatasetConfigDTO> mockDatasetConfigCache = Mockito.mock(LoadingCache.class);
    Mockito.when(mockDatasetConfigCache.get(collection)).thenReturn(getTestDatasetConfig(collection));
    cacheRegistry.registerDatasetConfigCache(mockDatasetConfigCache);
    ResultSet mockResultSet = Mockito.mock(ResultSet.class);
    Mockito.when(mockResultSet.getRowCount()).thenReturn(0);
    ResultSetGroup mockResultSetGroup = Mockito.mock(ResultSetGroup.class);
    Mockito.when(mockResultSetGroup.getResultSet(0)).thenReturn(mockResultSet);
    LoadingCache<PinotQuery, ResultSetGroup> mockResultSetGroupCache = Mockito.mock(LoadingCache.class);
    Mockito.when(mockResultSetGroupCache.get(Matchers.any(PinotQuery.class))).thenAnswer(new Answer<ResultSetGroup>() {

        @Override
        public ResultSetGroup answer(InvocationOnMock invocation) throws Throwable {
            return mockResultSetGroup;
        }
    });
    cacheRegistry.registerResultSetGroupCache(mockResultSetGroupCache);
    // Application config
    thirdeyeAnomalyConfig = new ThirdEyeAnomalyConfiguration();
    thirdeyeAnomalyConfig.setId(id);
    thirdeyeAnomalyConfig.setDashboardHost(dashboardHost);
    MonitorConfiguration monitorConfiguration = new MonitorConfiguration();
    monitorConfiguration.setMonitorFrequency(new TimeGranularity(30, TimeUnit.SECONDS));
    thirdeyeAnomalyConfig.setMonitorConfiguration(monitorConfiguration);
    thirdeyeAnomalyConfig.setRootDir(System.getProperty("dw.rootDir", "NOT_SET(dw.rootDir)"));
    // create test anomaly function
    functionId = anomalyFunctionDAO.save(getTestFunctionSpec(metric, collection));
    // create test email configuration
    emailConfigurationDAO.save(getTestEmailConfiguration(metric, collection));
    // create test alert configuration
    alertConfigDAO.save(getTestAlertConfiguration("test alert v2"));
    // create test dataset config
    datasetConfigDAO.save(getTestDatasetConfig(collection));
    // setup function factory for worker and merger
    InputStream factoryStream = AnomalyApplicationEndToEndTest.class.getResourceAsStream(functionPropertiesFile);
    anomalyFunctionFactory = new AnomalyFunctionFactory(factoryStream);
    // setup alertfilter factory for worker
    InputStream alertFilterStream = AnomalyApplicationEndToEndTest.class.getResourceAsStream(alertFilterPropertiesFile);
    alertFilterFactory = new AlertFilterFactory(alertFilterStream);
}
Also used : QueryCache(com.linkedin.thirdeye.client.cache.QueryCache) AlertFilterFactory(com.linkedin.thirdeye.detector.email.filter.AlertFilterFactory) ThirdEyeClient(com.linkedin.thirdeye.client.ThirdEyeClient) ThirdEyeRequest(com.linkedin.thirdeye.client.ThirdEyeRequest) DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) ResultSet(com.linkedin.pinot.client.ResultSet) TimeGranularity(com.linkedin.thirdeye.api.TimeGranularity) AnomalyFunctionFactory(com.linkedin.thirdeye.detector.function.AnomalyFunctionFactory) MetricConfigDTO(com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO) InputStream(java.io.InputStream) MonitorConfiguration(com.linkedin.thirdeye.anomaly.monitor.MonitorConfiguration) ThirdEyeResponse(com.linkedin.thirdeye.client.ThirdEyeResponse) PinotThirdEyeResponse(com.linkedin.thirdeye.client.pinot.PinotThirdEyeResponse) ThirdEyeAnomalyConfiguration(com.linkedin.thirdeye.anomaly.ThirdEyeAnomalyConfiguration) ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) MetricDataset(com.linkedin.thirdeye.client.cache.MetricDataset) InvocationOnMock(org.mockito.invocation.InvocationOnMock) PinotQuery(com.linkedin.thirdeye.client.pinot.PinotQuery)

Example 4 with PinotQuery

use of com.linkedin.thirdeye.client.pinot.PinotQuery in project pinot by linkedin.

the class CollectionMaxDataTimeCacheLoader method load.

@Override
public Long load(String collection) throws Exception {
    LOGGER.info("Loading maxDataTime cache {}", collection);
    long maxTime = 0;
    try {
        DatasetConfigDTO datasetConfig = datasetConfigDAO.findByDataset(collection);
        // By default, query only offline, unless dataset has been marked as realtime
        String tableName = ThirdEyeUtils.computeTableName(collection);
        TimeSpec timeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
        long prevMaxDataTime = getPrevMaxDataTime(collection, timeSpec);
        String maxTimePql = String.format(COLLECTION_MAX_TIME_QUERY_TEMPLATE, timeSpec.getColumnName(), tableName, timeSpec.getColumnName(), prevMaxDataTime);
        PinotQuery maxTimePinotQuery = new PinotQuery(maxTimePql, tableName);
        resultSetGroupCache.refresh(maxTimePinotQuery);
        ResultSetGroup resultSetGroup = resultSetGroupCache.get(maxTimePinotQuery);
        if (resultSetGroup.getResultSetCount() == 0 || resultSetGroup.getResultSet(0).getRowCount() == 0) {
            LOGGER.info("resultSetGroup is Empty for collection {} is {}", tableName, resultSetGroup);
            this.collectionToPrevMaxDataTimeMap.remove(collection);
        } else {
            long endTime = new Double(resultSetGroup.getResultSet(0).getDouble(0)).longValue();
            this.collectionToPrevMaxDataTimeMap.put(collection, endTime);
            // endTime + 1 to make sure we cover the time range of that time value.
            String timeFormat = timeSpec.getFormat();
            if (StringUtils.isBlank(timeFormat) || TimeSpec.SINCE_EPOCH_FORMAT.equals(timeFormat)) {
                maxTime = timeSpec.getDataGranularity().toMillis(endTime + 1) - 1;
            } else {
                DateTimeFormatter inputDataDateTimeFormatter = DateTimeFormat.forPattern(timeFormat).withZone(Utils.getDataTimeZone(collection));
                maxTime = DateTime.parse(String.valueOf(endTime), inputDataDateTimeFormatter).getMillis();
            }
        }
    } catch (Exception e) {
        LOGGER.warn("Exception getting maxTime from collection: {}", collection, e);
        this.collectionToPrevMaxDataTimeMap.remove(collection);
    }
    if (maxTime <= 0) {
        maxTime = System.currentTimeMillis();
    }
    return maxTime;
}
Also used : DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) PinotQuery(com.linkedin.thirdeye.client.pinot.PinotQuery) DateTimeFormatter(org.joda.time.format.DateTimeFormatter) ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) TimeSpec(com.linkedin.thirdeye.api.TimeSpec)

Example 5 with PinotQuery

use of com.linkedin.thirdeye.client.pinot.PinotQuery in project pinot by linkedin.

the class DataCompletenessTaskUtils method getBucketNameToCountStarMap.

private static Map<String, Long> getBucketNameToCountStarMap(String dataset, TimeSpec timeSpec, ListMultimap<String, Long> bucketNameToTimeValues) {
    Map<String, Long> bucketNameToCountStar = new HashMap<>();
    // generate request
    StringBuilder sb = new StringBuilder();
    String delimiter = "";
    for (Long timeValue : bucketNameToTimeValues.values()) {
        sb.append(delimiter);
        delimiter = " OR ";
        sb.append(String.format("%s='%s'", timeSpec.getColumnName(), timeValue));
    }
    long top = bucketNameToTimeValues.values().size();
    String pql = String.format("select count(*) from %s where %s group by %s top %s", dataset, sb.toString(), timeSpec.getColumnName(), top);
    Map<Long, Long> timeValueToCount = new HashMap<>();
    try {
        ResultSetGroup resultSetGroup = CACHE_REGISTRY.getResultSetGroupCache().get(new PinotQuery(pql, dataset));
        if (resultSetGroup == null || resultSetGroup.getResultSetCount() <= 0) {
            return bucketNameToCountStar;
        }
        ResultSet resultSet = resultSetGroup.getResultSet(0);
        for (int i = 0; i < resultSet.getRowCount(); i++) {
            Long timeValue = Long.valueOf(resultSet.getGroupKeyString(i, 0));
            Long count = resultSet.getLong(i, 0);
            timeValueToCount.put(timeValue, count);
        }
    } catch (ExecutionException e) {
        LOG.error("Exception in getting count *. PQL:{}", pql, e);
    }
    // parse response to get counts
    for (String bucketName : bucketNameToTimeValues.keySet()) {
        List<Long> timeValues = bucketNameToTimeValues.get(bucketName);
        Long sumOfCountForBucket = 0L;
        for (Long timeValue : timeValues) {
            sumOfCountForBucket = sumOfCountForBucket + timeValueToCount.getOrDefault(timeValue, 0L);
        }
        bucketNameToCountStar.put(bucketName, sumOfCountForBucket);
    }
    return bucketNameToCountStar;
}
Also used : HashMap(java.util.HashMap) ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) PinotQuery(com.linkedin.thirdeye.client.pinot.PinotQuery) ResultSet(com.linkedin.pinot.client.ResultSet) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

ResultSetGroup (com.linkedin.pinot.client.ResultSetGroup)5 PinotQuery (com.linkedin.thirdeye.client.pinot.PinotQuery)5 ResultSet (com.linkedin.pinot.client.ResultSet)3 DatasetConfigDTO (com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO)3 MetricDataset (com.linkedin.thirdeye.client.cache.MetricDataset)2 QueryCache (com.linkedin.thirdeye.client.cache.QueryCache)2 MetricConfigDTO (com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO)2 CacheBuilder (com.google.common.cache.CacheBuilder)1 LoadingCache (com.google.common.cache.LoadingCache)1 RemovalListener (com.google.common.cache.RemovalListener)1 RemovalNotification (com.google.common.cache.RemovalNotification)1 ThirdEyeAnomalyConfiguration (com.linkedin.thirdeye.anomaly.ThirdEyeAnomalyConfiguration)1 MonitorConfiguration (com.linkedin.thirdeye.anomaly.monitor.MonitorConfiguration)1 TimeGranularity (com.linkedin.thirdeye.api.TimeGranularity)1 TimeSpec (com.linkedin.thirdeye.api.TimeSpec)1 ThirdEyeCacheRegistry (com.linkedin.thirdeye.client.ThirdEyeCacheRegistry)1 ThirdEyeClient (com.linkedin.thirdeye.client.ThirdEyeClient)1 ThirdEyeRequest (com.linkedin.thirdeye.client.ThirdEyeRequest)1 ThirdEyeResponse (com.linkedin.thirdeye.client.ThirdEyeResponse)1 CollectionMaxDataTimeCacheLoader (com.linkedin.thirdeye.client.cache.CollectionMaxDataTimeCacheLoader)1