Search in sources :

Example 1 with ResultSet

use of com.linkedin.pinot.client.ResultSet in project pinot by linkedin.

the class PinotThirdEyeClient method parseResultSets.

private List<String[]> parseResultSets(ThirdEyeRequest request, List<ResultSet> resultSets, List<MetricFunction> metricFunctions, List<String> dimensionNames, DatasetConfigDTO datasetConfig) throws ExecutionException {
    int numGroupByKeys = 0;
    boolean hasGroupBy = false;
    if (request.getGroupByTimeGranularity() != null) {
        numGroupByKeys += 1;
    }
    if (request.getGroupBy() != null) {
        numGroupByKeys += request.getGroupBy().size();
    }
    if (numGroupByKeys > 0) {
        hasGroupBy = true;
    }
    int numMetrics = request.getMetricFunctions().size();
    int numCols = numGroupByKeys + numMetrics;
    boolean hasGroupByTime = false;
    String collection = datasetConfig.getDataset();
    TimeGranularity dataGranularity = null;
    long startTime = request.getStartTimeInclusive().getMillis();
    DateTimeZone dateTimeZone = Utils.getDataTimeZone(collection);
    DateTime startDateTime = new DateTime(startTime, dateTimeZone);
    TimeSpec timespec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
    dataGranularity = timespec.getDataGranularity();
    boolean isISOFormat = false;
    DateTimeFormatter inputDataDateTimeFormatter = null;
    String timeFormat = timespec.getFormat();
    if (timeFormat != null && !timeFormat.equals(TimeSpec.SINCE_EPOCH_FORMAT)) {
        isISOFormat = true;
        inputDataDateTimeFormatter = DateTimeFormat.forPattern(timeFormat).withZone(dateTimeZone);
    }
    if (request.getGroupByTimeGranularity() != null) {
        hasGroupByTime = true;
    }
    LinkedHashMap<String, String[]> dataMap = new LinkedHashMap<>();
    for (int i = 0; i < resultSets.size(); i++) {
        ResultSet resultSet = resultSets.get(i);
        int numRows = resultSet.getRowCount();
        for (int r = 0; r < numRows; r++) {
            boolean skipRowDueToError = false;
            String[] groupKeys;
            if (hasGroupBy) {
                groupKeys = new String[resultSet.getGroupKeyLength()];
                for (int grpKeyIdx = 0; grpKeyIdx < resultSet.getGroupKeyLength(); grpKeyIdx++) {
                    String groupKeyVal = "";
                    try {
                        groupKeyVal = resultSet.getGroupKeyString(r, grpKeyIdx);
                    } catch (Exception e) {
                    // IGNORE FOR NOW, workaround for Pinot Bug
                    }
                    if (hasGroupByTime && grpKeyIdx == 0) {
                        int timeBucket;
                        long millis;
                        if (!isISOFormat) {
                            millis = dataGranularity.toMillis(Double.valueOf(groupKeyVal).longValue());
                        } else {
                            millis = DateTime.parse(groupKeyVal, inputDataDateTimeFormatter).getMillis();
                        }
                        if (millis < startTime) {
                            LOG.error("Data point earlier than requested start time {}: {}", new Date(startTime), new Date(millis));
                            skipRowDueToError = true;
                            break;
                        }
                        timeBucket = TimeRangeUtils.computeBucketIndex(request.getGroupByTimeGranularity(), startDateTime, new DateTime(millis, dateTimeZone));
                        groupKeyVal = String.valueOf(timeBucket);
                    }
                    groupKeys[grpKeyIdx] = groupKeyVal;
                }
                if (skipRowDueToError) {
                    continue;
                }
            } else {
                groupKeys = new String[] {};
            }
            StringBuilder groupKeyBuilder = new StringBuilder("");
            for (String grpKey : groupKeys) {
                groupKeyBuilder.append(grpKey).append("|");
            }
            String compositeGroupKey = groupKeyBuilder.toString();
            String[] rowValues = dataMap.get(compositeGroupKey);
            if (rowValues == null) {
                rowValues = new String[numCols];
                Arrays.fill(rowValues, "0");
                System.arraycopy(groupKeys, 0, rowValues, 0, groupKeys.length);
                dataMap.put(compositeGroupKey, rowValues);
            }
            rowValues[groupKeys.length + i] = String.valueOf(Double.parseDouble(rowValues[groupKeys.length + i]) + Double.parseDouble(resultSet.getString(r, 0)));
        }
    }
    List<String[]> rows = new ArrayList<>();
    rows.addAll(dataMap.values());
    return rows;
}
Also used : ArrayList(java.util.ArrayList) DateTimeZone(org.joda.time.DateTimeZone) DateTime(org.joda.time.DateTime) ExecutionException(java.util.concurrent.ExecutionException) Date(java.util.Date) TimeSpec(com.linkedin.thirdeye.api.TimeSpec) LinkedHashMap(java.util.LinkedHashMap) ResultSet(com.linkedin.pinot.client.ResultSet) TimeGranularity(com.linkedin.thirdeye.api.TimeGranularity) DateTimeFormatter(org.joda.time.format.DateTimeFormatter)

Example 2 with ResultSet

use of com.linkedin.pinot.client.ResultSet in project pinot by linkedin.

the class AutoLoadPinotMetricsService method fetchMetricAsADimensionMetrics.

private List<String> fetchMetricAsADimensionMetrics(String dataset, String metricNamesColumn) {
    List<String> distinctMetricNames = new ArrayList<>();
    ThirdEyeCacheRegistry CACHE_REGISTRY = ThirdEyeCacheRegistry.getInstance();
    String sql = String.format("select count(*) from %s group by %s top 10", dataset, metricNamesColumn);
    try {
        ResultSetGroup result = CACHE_REGISTRY.getResultSetGroupCache().get(new PinotQuery(sql, dataset));
        ResultSet resultSet = result.getResultSet(0);
        int rowCount = resultSet.getRowCount();
        for (int i = 0; i < rowCount; i++) {
            String dimensionName = resultSet.getGroupKeyString(i, 0);
            distinctMetricNames.add(dimensionName);
        }
        LOG.info("Distinct Metrics {}", distinctMetricNames);
    } catch (Exception e) {
        LOG.error("Exception in fetching metrics from pinot", e);
    }
    return distinctMetricNames;
}
Also used : ThirdEyeCacheRegistry(com.linkedin.thirdeye.client.ThirdEyeCacheRegistry) PinotQuery(com.linkedin.thirdeye.client.pinot.PinotQuery) ArrayList(java.util.ArrayList) ResultSet(com.linkedin.pinot.client.ResultSet) ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) IOException(java.io.IOException)

Example 3 with ResultSet

use of com.linkedin.pinot.client.ResultSet in project pinot by linkedin.

the class AnomalyApplicationEndToEndTest method setup.

private void setup() throws Exception {
    // Mock query cache
    ThirdEyeClient mockThirdeyeClient = Mockito.mock(ThirdEyeClient.class);
    Mockito.when(mockThirdeyeClient.execute(Matchers.any(ThirdEyeRequest.class))).thenAnswer(new Answer<ThirdEyeResponse>() {

        @Override
        public ThirdEyeResponse answer(InvocationOnMock invocation) throws Throwable {
            Object[] args = invocation.getArguments();
            ThirdEyeRequest request = (ThirdEyeRequest) args[0];
            ThirdEyeResponse response = getMockResponse(request);
            return response;
        }
    });
    QueryCache mockQueryCache = new QueryCache(mockThirdeyeClient, Executors.newFixedThreadPool(10));
    cacheRegistry.registerQueryCache(mockQueryCache);
    MetricConfigDTO metricConfig = getTestMetricConfig(collection, metric, 1L);
    // create metric config in cache
    LoadingCache<MetricDataset, MetricConfigDTO> mockMetricConfigCache = Mockito.mock(LoadingCache.class);
    Mockito.when(mockMetricConfigCache.get(new MetricDataset(metric, collection))).thenReturn(metricConfig);
    cacheRegistry.registerMetricConfigCache(mockMetricConfigCache);
    // create dataset config in cache
    LoadingCache<String, DatasetConfigDTO> mockDatasetConfigCache = Mockito.mock(LoadingCache.class);
    Mockito.when(mockDatasetConfigCache.get(collection)).thenReturn(getTestDatasetConfig(collection));
    cacheRegistry.registerDatasetConfigCache(mockDatasetConfigCache);
    ResultSet mockResultSet = Mockito.mock(ResultSet.class);
    Mockito.when(mockResultSet.getRowCount()).thenReturn(0);
    ResultSetGroup mockResultSetGroup = Mockito.mock(ResultSetGroup.class);
    Mockito.when(mockResultSetGroup.getResultSet(0)).thenReturn(mockResultSet);
    LoadingCache<PinotQuery, ResultSetGroup> mockResultSetGroupCache = Mockito.mock(LoadingCache.class);
    Mockito.when(mockResultSetGroupCache.get(Matchers.any(PinotQuery.class))).thenAnswer(new Answer<ResultSetGroup>() {

        @Override
        public ResultSetGroup answer(InvocationOnMock invocation) throws Throwable {
            return mockResultSetGroup;
        }
    });
    cacheRegistry.registerResultSetGroupCache(mockResultSetGroupCache);
    // Application config
    thirdeyeAnomalyConfig = new ThirdEyeAnomalyConfiguration();
    thirdeyeAnomalyConfig.setId(id);
    thirdeyeAnomalyConfig.setDashboardHost(dashboardHost);
    MonitorConfiguration monitorConfiguration = new MonitorConfiguration();
    monitorConfiguration.setMonitorFrequency(new TimeGranularity(30, TimeUnit.SECONDS));
    thirdeyeAnomalyConfig.setMonitorConfiguration(monitorConfiguration);
    thirdeyeAnomalyConfig.setRootDir(System.getProperty("dw.rootDir", "NOT_SET(dw.rootDir)"));
    // create test anomaly function
    functionId = anomalyFunctionDAO.save(getTestFunctionSpec(metric, collection));
    // create test email configuration
    emailConfigurationDAO.save(getTestEmailConfiguration(metric, collection));
    // create test alert configuration
    alertConfigDAO.save(getTestAlertConfiguration("test alert v2"));
    // create test dataset config
    datasetConfigDAO.save(getTestDatasetConfig(collection));
    // setup function factory for worker and merger
    InputStream factoryStream = AnomalyApplicationEndToEndTest.class.getResourceAsStream(functionPropertiesFile);
    anomalyFunctionFactory = new AnomalyFunctionFactory(factoryStream);
    // setup alertfilter factory for worker
    InputStream alertFilterStream = AnomalyApplicationEndToEndTest.class.getResourceAsStream(alertFilterPropertiesFile);
    alertFilterFactory = new AlertFilterFactory(alertFilterStream);
}
Also used : QueryCache(com.linkedin.thirdeye.client.cache.QueryCache) AlertFilterFactory(com.linkedin.thirdeye.detector.email.filter.AlertFilterFactory) ThirdEyeClient(com.linkedin.thirdeye.client.ThirdEyeClient) ThirdEyeRequest(com.linkedin.thirdeye.client.ThirdEyeRequest) DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) ResultSet(com.linkedin.pinot.client.ResultSet) TimeGranularity(com.linkedin.thirdeye.api.TimeGranularity) AnomalyFunctionFactory(com.linkedin.thirdeye.detector.function.AnomalyFunctionFactory) MetricConfigDTO(com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO) InputStream(java.io.InputStream) MonitorConfiguration(com.linkedin.thirdeye.anomaly.monitor.MonitorConfiguration) ThirdEyeResponse(com.linkedin.thirdeye.client.ThirdEyeResponse) PinotThirdEyeResponse(com.linkedin.thirdeye.client.pinot.PinotThirdEyeResponse) ThirdEyeAnomalyConfiguration(com.linkedin.thirdeye.anomaly.ThirdEyeAnomalyConfiguration) ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) MetricDataset(com.linkedin.thirdeye.client.cache.MetricDataset) InvocationOnMock(org.mockito.invocation.InvocationOnMock) PinotQuery(com.linkedin.thirdeye.client.pinot.PinotQuery)

Example 4 with ResultSet

use of com.linkedin.pinot.client.ResultSet in project pinot by linkedin.

the class PinotThirdEyeClient method execute.

@Override
public PinotThirdEyeResponse execute(ThirdEyeRequest request) throws Exception {
    DatasetConfigDTO datasetConfig = CACHE_REGISTRY_INSTANCE.getDatasetConfigCache().get(request.getCollection());
    TimeSpec dataTimeSpec = ThirdEyeUtils.getTimeSpecFromDatasetConfig(datasetConfig);
    List<MetricFunction> metricFunctions = request.getMetricFunctions();
    List<String> dimensionNames = datasetConfig.getDimensions();
    List<ResultSetGroup> resultSetGroups = new ArrayList<>();
    // By default, query only offline, unless dataset has been marked as realtime
    String tableName = ThirdEyeUtils.computeTableName(request.getCollection());
    if (datasetConfig.isMetricAsDimension()) {
        List<String> pqls = PqlUtils.getMetricAsDimensionPqls(request, dataTimeSpec, datasetConfig);
        for (String pql : pqls) {
            LOG.debug("PQL isMetricAsDimension : {}", pql);
            ResultSetGroup result = CACHE_REGISTRY_INSTANCE.getResultSetGroupCache().get(new PinotQuery(pql, tableName));
            resultSetGroups.add(result);
        }
    } else {
        String sql = PqlUtils.getPql(request, dataTimeSpec);
        LOG.debug("PQL: {}", sql);
        ResultSetGroup result = CACHE_REGISTRY_INSTANCE.getResultSetGroupCache().get(new PinotQuery(sql, tableName));
        resultSetGroups.add(result);
        if (LOG.isDebugEnabled()) {
            LOG.debug("Result for: {} {}", sql, format(result));
        }
    }
    List<ResultSet> resultSets = getResultSets(resultSetGroups);
    List<String[]> resultRows = parseResultSets(request, resultSets, metricFunctions, dimensionNames, datasetConfig);
    PinotThirdEyeResponse resp = new PinotThirdEyeResponse(request, resultRows, dataTimeSpec);
    return resp;
}
Also used : ArrayList(java.util.ArrayList) ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) TimeSpec(com.linkedin.thirdeye.api.TimeSpec) DatasetConfigDTO(com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO) MetricFunction(com.linkedin.thirdeye.client.MetricFunction) ResultSet(com.linkedin.pinot.client.ResultSet)

Example 5 with ResultSet

use of com.linkedin.pinot.client.ResultSet in project pinot by linkedin.

the class DataCompletenessTaskUtils method getBucketNameToCountStarMap.

private static Map<String, Long> getBucketNameToCountStarMap(String dataset, TimeSpec timeSpec, ListMultimap<String, Long> bucketNameToTimeValues) {
    Map<String, Long> bucketNameToCountStar = new HashMap<>();
    // generate request
    StringBuilder sb = new StringBuilder();
    String delimiter = "";
    for (Long timeValue : bucketNameToTimeValues.values()) {
        sb.append(delimiter);
        delimiter = " OR ";
        sb.append(String.format("%s='%s'", timeSpec.getColumnName(), timeValue));
    }
    long top = bucketNameToTimeValues.values().size();
    String pql = String.format("select count(*) from %s where %s group by %s top %s", dataset, sb.toString(), timeSpec.getColumnName(), top);
    Map<Long, Long> timeValueToCount = new HashMap<>();
    try {
        ResultSetGroup resultSetGroup = CACHE_REGISTRY.getResultSetGroupCache().get(new PinotQuery(pql, dataset));
        if (resultSetGroup == null || resultSetGroup.getResultSetCount() <= 0) {
            return bucketNameToCountStar;
        }
        ResultSet resultSet = resultSetGroup.getResultSet(0);
        for (int i = 0; i < resultSet.getRowCount(); i++) {
            Long timeValue = Long.valueOf(resultSet.getGroupKeyString(i, 0));
            Long count = resultSet.getLong(i, 0);
            timeValueToCount.put(timeValue, count);
        }
    } catch (ExecutionException e) {
        LOG.error("Exception in getting count *. PQL:{}", pql, e);
    }
    // parse response to get counts
    for (String bucketName : bucketNameToTimeValues.keySet()) {
        List<Long> timeValues = bucketNameToTimeValues.get(bucketName);
        Long sumOfCountForBucket = 0L;
        for (Long timeValue : timeValues) {
            sumOfCountForBucket = sumOfCountForBucket + timeValueToCount.getOrDefault(timeValue, 0L);
        }
        bucketNameToCountStar.put(bucketName, sumOfCountForBucket);
    }
    return bucketNameToCountStar;
}
Also used : HashMap(java.util.HashMap) ResultSetGroup(com.linkedin.pinot.client.ResultSetGroup) PinotQuery(com.linkedin.thirdeye.client.pinot.PinotQuery) ResultSet(com.linkedin.pinot.client.ResultSet) ExecutionException(java.util.concurrent.ExecutionException)

Aggregations

ResultSet (com.linkedin.pinot.client.ResultSet)5 ResultSetGroup (com.linkedin.pinot.client.ResultSetGroup)4 PinotQuery (com.linkedin.thirdeye.client.pinot.PinotQuery)3 ArrayList (java.util.ArrayList)3 TimeGranularity (com.linkedin.thirdeye.api.TimeGranularity)2 TimeSpec (com.linkedin.thirdeye.api.TimeSpec)2 DatasetConfigDTO (com.linkedin.thirdeye.datalayer.dto.DatasetConfigDTO)2 ExecutionException (java.util.concurrent.ExecutionException)2 ThirdEyeAnomalyConfiguration (com.linkedin.thirdeye.anomaly.ThirdEyeAnomalyConfiguration)1 MonitorConfiguration (com.linkedin.thirdeye.anomaly.monitor.MonitorConfiguration)1 MetricFunction (com.linkedin.thirdeye.client.MetricFunction)1 ThirdEyeCacheRegistry (com.linkedin.thirdeye.client.ThirdEyeCacheRegistry)1 ThirdEyeClient (com.linkedin.thirdeye.client.ThirdEyeClient)1 ThirdEyeRequest (com.linkedin.thirdeye.client.ThirdEyeRequest)1 ThirdEyeResponse (com.linkedin.thirdeye.client.ThirdEyeResponse)1 MetricDataset (com.linkedin.thirdeye.client.cache.MetricDataset)1 QueryCache (com.linkedin.thirdeye.client.cache.QueryCache)1 PinotThirdEyeResponse (com.linkedin.thirdeye.client.pinot.PinotThirdEyeResponse)1 MetricConfigDTO (com.linkedin.thirdeye.datalayer.dto.MetricConfigDTO)1 AlertFilterFactory (com.linkedin.thirdeye.detector.email.filter.AlertFilterFactory)1