Search in sources :

Example 1 with AggregationGroupByResult

use of com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByResult in project pinot by linkedin.

the class MCombineGroupByOperator method combineBlocks.

/**
   * This method combines the result blocks from underlying operators and builds a
   * merged, sorted and trimmed result block.
   * 1. Result blocks from underlying operators are merged concurrently into a
   *   HashMap, with appropriate synchronizations. Result blocks themselves are stored
   *   in the specified blocks[].
   *   - The key in this concurrent map is the group-by key, and value is an array of
   *     Objects (one for each aggregation function).
   *   - Synchronization is provided by locking the group-key that is to be modified.
   *
   * 2. The result of the concurrent map is then translated into what is expected by
   *    the broker (List<Map<String, Object>>).
   *
   * 3. This result is then sorted and then trimmed as per 'TOP N' in the brokerRequest.
   *
   * @return IntermediateResultBlock containing the final results from combine operation.
   */
private IntermediateResultsBlock combineBlocks() throws InterruptedException {
    int numOperators = _operators.size();
    final CountDownLatch operatorLatch = new CountDownLatch(numOperators);
    final Map<String, Object[]> resultsMap = new ConcurrentHashMap<>();
    final ConcurrentLinkedQueue<ProcessingException> mergedProcessingExceptions = new ConcurrentLinkedQueue<>();
    List<AggregationInfo> aggregationInfos = _brokerRequest.getAggregationsInfo();
    final AggregationFunctionContext[] aggregationFunctionContexts = AggregationFunctionUtils.getAggregationFunctionContexts(aggregationInfos, null);
    final int numAggregationFunctions = aggregationFunctionContexts.length;
    for (int i = 0; i < numOperators; i++) {
        final int index = i;
        _executorService.execute(new TraceRunnable() {

            @SuppressWarnings("unchecked")
            @Override
            public void runJob() {
                AggregationGroupByResult aggregationGroupByResult;
                try {
                    IntermediateResultsBlock intermediateResultsBlock = (IntermediateResultsBlock) _operators.get(index).nextBlock();
                    // Merge processing exceptions.
                    List<ProcessingException> processingExceptionsToMerge = intermediateResultsBlock.getProcessingExceptions();
                    if (processingExceptionsToMerge != null) {
                        mergedProcessingExceptions.addAll(processingExceptionsToMerge);
                    }
                    // Merge aggregation group-by result.
                    aggregationGroupByResult = intermediateResultsBlock.getAggregationGroupByResult();
                    if (aggregationGroupByResult != null) {
                        // Iterate over the group-by keys, for each key, update the group-by result in the resultsMap.
                        Iterator<GroupKeyGenerator.GroupKey> groupKeyIterator = aggregationGroupByResult.getGroupKeyIterator();
                        while (groupKeyIterator.hasNext()) {
                            GroupKeyGenerator.GroupKey groupKey = groupKeyIterator.next();
                            String groupKeyString = groupKey.getStringKey();
                            // HashCode method might return negative value, make it non-negative
                            int lockIndex = (groupKeyString.hashCode() & Integer.MAX_VALUE) % NUM_LOCKS;
                            synchronized (LOCKS[lockIndex]) {
                                Object[] results = resultsMap.get(groupKeyString);
                                if (results == null) {
                                    results = new Object[numAggregationFunctions];
                                    for (int j = 0; j < numAggregationFunctions; j++) {
                                        results[j] = aggregationGroupByResult.getResultForKey(groupKey, j);
                                    }
                                    resultsMap.put(groupKeyString, results);
                                } else {
                                    for (int j = 0; j < numAggregationFunctions; j++) {
                                        results[j] = aggregationFunctionContexts[j].getAggregationFunction().merge(results[j], aggregationGroupByResult.getResultForKey(groupKey, j));
                                    }
                                }
                            }
                        }
                    }
                } catch (Exception e) {
                    LOGGER.error("Exception processing CombineGroupBy for index {}, operator {}", index, _operators.get(index).getClass().getName(), e);
                    mergedProcessingExceptions.add(QueryException.getException(QueryException.QUERY_EXECUTION_ERROR, e));
                }
                operatorLatch.countDown();
            }
        });
    }
    boolean opCompleted = operatorLatch.await(_timeOutMs, TimeUnit.MILLISECONDS);
    if (!opCompleted) {
        // If this happens, the broker side should already timed out, just log the error in server side.
        LOGGER.error("Timed out while combining group-by results, after {}ms.", _timeOutMs);
        return new IntermediateResultsBlock(new TimeoutException("CombineGroupBy timed out."));
    }
    // Trim the results map.
    AggregationGroupByTrimmingService aggregationGroupByTrimmingService = new AggregationGroupByTrimmingService(aggregationFunctionContexts, (int) _brokerRequest.getGroupBy().getTopN());
    List<Map<String, Object>> trimmedResults = aggregationGroupByTrimmingService.trimIntermediateResultsMap(resultsMap);
    IntermediateResultsBlock mergedBlock = new IntermediateResultsBlock(aggregationFunctionContexts, trimmedResults, true);
    // Set the processing exceptions.
    if (!mergedProcessingExceptions.isEmpty()) {
        mergedBlock.setProcessingExceptions(new ArrayList<>(mergedProcessingExceptions));
    }
    // Set the execution statistics.
    ExecutionStatistics executionStatistics = new ExecutionStatistics();
    for (Operator operator : _operators) {
        ExecutionStatistics executionStatisticsToMerge = operator.getExecutionStatistics();
        if (executionStatisticsToMerge != null) {
            executionStatistics.merge(executionStatisticsToMerge);
        }
    }
    mergedBlock.setNumDocsScanned(executionStatistics.getNumDocsScanned());
    mergedBlock.setNumEntriesScannedInFilter(executionStatistics.getNumEntriesScannedInFilter());
    mergedBlock.setNumEntriesScannedPostFilter(executionStatistics.getNumEntriesScannedPostFilter());
    mergedBlock.setNumTotalRawDocs(executionStatistics.getNumTotalRawDocs());
    return mergedBlock;
}
Also used : Operator(com.linkedin.pinot.core.common.Operator) AggregationGroupByTrimmingService(com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByTrimmingService) AggregationGroupByResult(com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByResult) TraceRunnable(com.linkedin.pinot.core.util.trace.TraceRunnable) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) AggregationInfo(com.linkedin.pinot.common.request.AggregationInfo) IntermediateResultsBlock(com.linkedin.pinot.core.operator.blocks.IntermediateResultsBlock) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) AggregationFunctionContext(com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext) GroupKeyGenerator(com.linkedin.pinot.core.query.aggregation.groupby.GroupKeyGenerator) ProcessingException(com.linkedin.pinot.common.response.ProcessingException) TimeoutException(java.util.concurrent.TimeoutException) CountDownLatch(java.util.concurrent.CountDownLatch) TimeoutException(java.util.concurrent.TimeoutException) ProcessingException(com.linkedin.pinot.common.response.ProcessingException) QueryException(com.linkedin.pinot.common.exception.QueryException) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 2 with AggregationGroupByResult

use of com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByResult in project pinot by linkedin.

the class InnerSegmentAggregationMultiValueQueriesTest method testMediumAggregationGroupBy.

@Test
public void testMediumAggregationGroupBy() {
    String query = "SELECT" + AGGREGATION + " FROM testTable" + MEDIUM_GROUP_BY;
    // NOTE: here we assume the first group key returned from the iterator is constant.
    // Test query without filter.
    AggregationGroupByOperator aggregationGroupByOperator = getOperatorForQuery(query);
    IntermediateResultsBlock resultsBlock = (IntermediateResultsBlock) aggregationGroupByOperator.nextBlock();
    ExecutionStatistics executionStatistics = aggregationGroupByOperator.getExecutionStatistics();
    Assert.assertEquals(executionStatistics.getNumDocsScanned(), 100000L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 0L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 700000L);
    Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
    AggregationGroupByResult aggregationGroupByResult = resultsBlock.getAggregationGroupByResult();
    GroupKeyGenerator.GroupKey firstGroupKey = aggregationGroupByResult.getGroupKeyIterator().next();
    Assert.assertEquals(firstGroupKey.getStringKey(), "w\t3836469\t204");
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 0)).longValue(), 1L);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 1)).longValue(), 1415527660L);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 2)).intValue(), 1747635671);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 3)).intValue(), 1298457813);
    AvgPair avgResult = (AvgPair) aggregationGroupByResult.getResultForKey(firstGroupKey, 4);
    Assert.assertEquals((long) avgResult.getSum(), 1235208236L);
    Assert.assertEquals(avgResult.getCount(), 1L);
    // Test query with filter.
    aggregationGroupByOperator = getOperatorForQueryWithFilter(query);
    resultsBlock = (IntermediateResultsBlock) aggregationGroupByOperator.nextBlock();
    executionStatistics = aggregationGroupByOperator.getExecutionStatistics();
    Assert.assertEquals(executionStatistics.getNumDocsScanned(), 15620L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 282430L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 109340L);
    Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
    aggregationGroupByResult = resultsBlock.getAggregationGroupByResult();
    firstGroupKey = aggregationGroupByResult.getGroupKeyIterator().next();
    Assert.assertEquals(firstGroupKey.getStringKey(), "L\t1483645\t2147483647");
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 0)).longValue(), 1L);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 1)).longValue(), 650650103L);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 2)).intValue(), 108417107);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 3)).intValue(), 674022574);
    avgResult = (AvgPair) aggregationGroupByResult.getResultForKey(firstGroupKey, 4);
    Assert.assertEquals((long) avgResult.getSum(), 674022574L);
    Assert.assertEquals(avgResult.getCount(), 1L);
}
Also used : ExecutionStatistics(com.linkedin.pinot.core.operator.ExecutionStatistics) AggregationGroupByOperator(com.linkedin.pinot.core.operator.query.AggregationGroupByOperator) IntermediateResultsBlock(com.linkedin.pinot.core.operator.blocks.IntermediateResultsBlock) GroupKeyGenerator(com.linkedin.pinot.core.query.aggregation.groupby.GroupKeyGenerator) AvgPair(com.linkedin.pinot.core.query.aggregation.function.customobject.AvgPair) AggregationGroupByResult(com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByResult) Test(org.testng.annotations.Test)

Example 3 with AggregationGroupByResult

use of com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByResult in project pinot by linkedin.

the class InnerSegmentAggregationMultiValueQueriesTest method testLargeAggregationGroupBy.

@Test
public void testLargeAggregationGroupBy() {
    String query = "SELECT" + AGGREGATION + " FROM testTable" + LARGE_GROUP_BY;
    // NOTE: here we assume the first group key returned from the iterator is constant.
    // Test query without filter.
    AggregationGroupByOperator aggregationGroupByOperator = getOperatorForQuery(query);
    IntermediateResultsBlock resultsBlock = (IntermediateResultsBlock) aggregationGroupByOperator.nextBlock();
    ExecutionStatistics executionStatistics = aggregationGroupByOperator.getExecutionStatistics();
    Assert.assertEquals(executionStatistics.getNumDocsScanned(), 100000L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 0L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 700000L);
    Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
    AggregationGroupByResult aggregationGroupByResult = resultsBlock.getAggregationGroupByResult();
    GroupKeyGenerator.GroupKey firstGroupKey = aggregationGroupByResult.getGroupKeyIterator().next();
    Assert.assertEquals(firstGroupKey.getStringKey(), "1118965780\t1848116124\t8599\t504\t1597666851\t675163196\t607034543");
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 0)).longValue(), 1L);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 1)).longValue(), 1118965780L);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 2)).intValue(), 1848116124);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 3)).intValue(), 1597666851);
    AvgPair avgResult = (AvgPair) aggregationGroupByResult.getResultForKey(firstGroupKey, 4);
    Assert.assertEquals((long) avgResult.getSum(), 675163196L);
    Assert.assertEquals(avgResult.getCount(), 1L);
    // Test query with filter.
    aggregationGroupByOperator = getOperatorForQueryWithFilter(query);
    resultsBlock = (IntermediateResultsBlock) aggregationGroupByOperator.nextBlock();
    executionStatistics = aggregationGroupByOperator.getExecutionStatistics();
    Assert.assertEquals(executionStatistics.getNumDocsScanned(), 15620L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 282430L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 109340L);
    Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 100000L);
    aggregationGroupByResult = resultsBlock.getAggregationGroupByResult();
    firstGroupKey = aggregationGroupByResult.getGroupKeyIterator().next();
    Assert.assertEquals(firstGroupKey.getStringKey(), "949960647\t238753654\t2147483647\t2147483647\t674022574\t674022574\t674022574");
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 0)).longValue(), 2L);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 1)).longValue(), 1899921294L);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 2)).intValue(), 238753654);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 3)).intValue(), 674022574);
    avgResult = (AvgPair) aggregationGroupByResult.getResultForKey(firstGroupKey, 4);
    Assert.assertEquals((long) avgResult.getSum(), 1348045148L);
    Assert.assertEquals(avgResult.getCount(), 2L);
}
Also used : ExecutionStatistics(com.linkedin.pinot.core.operator.ExecutionStatistics) AggregationGroupByOperator(com.linkedin.pinot.core.operator.query.AggregationGroupByOperator) IntermediateResultsBlock(com.linkedin.pinot.core.operator.blocks.IntermediateResultsBlock) GroupKeyGenerator(com.linkedin.pinot.core.query.aggregation.groupby.GroupKeyGenerator) AvgPair(com.linkedin.pinot.core.query.aggregation.function.customobject.AvgPair) AggregationGroupByResult(com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByResult) Test(org.testng.annotations.Test)

Example 4 with AggregationGroupByResult

use of com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByResult in project pinot by linkedin.

the class InnerSegmentAggregationSingleValueQueriesTest method testSmallAggregationGroupBy.

@Test
public void testSmallAggregationGroupBy() {
    String query = "SELECT" + AGGREGATION + " FROM testTable" + SMALL_GROUP_BY;
    // NOTE: here we assume the first group key returned from the iterator is constant.
    // Test query without filter.
    AggregationGroupByOperator aggregationGroupByOperator = getOperatorForQuery(query);
    IntermediateResultsBlock resultsBlock = (IntermediateResultsBlock) aggregationGroupByOperator.nextBlock();
    ExecutionStatistics executionStatistics = aggregationGroupByOperator.getExecutionStatistics();
    Assert.assertEquals(executionStatistics.getNumDocsScanned(), 30000L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 0L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 150000L);
    Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 30000L);
    AggregationGroupByResult aggregationGroupByResult = resultsBlock.getAggregationGroupByResult();
    GroupKeyGenerator.GroupKey firstGroupKey = aggregationGroupByResult.getGroupKeyIterator().next();
    Assert.assertEquals(firstGroupKey.getStringKey(), "11270");
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 0)).longValue(), 1L);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 1)).longValue(), 815409257L);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 2)).intValue(), 1215316262);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 3)).intValue(), 1328642550);
    AvgPair avgResult = (AvgPair) aggregationGroupByResult.getResultForKey(firstGroupKey, 4);
    Assert.assertEquals((long) avgResult.getSum(), 788414092L);
    Assert.assertEquals(avgResult.getCount(), 1L);
    // Test query with filter.
    aggregationGroupByOperator = getOperatorForQueryWithFilter(query);
    resultsBlock = (IntermediateResultsBlock) aggregationGroupByOperator.nextBlock();
    executionStatistics = aggregationGroupByOperator.getExecutionStatistics();
    Assert.assertEquals(executionStatistics.getNumDocsScanned(), 6129L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedInFilter(), 84134L);
    Assert.assertEquals(executionStatistics.getNumEntriesScannedPostFilter(), 30645L);
    Assert.assertEquals(executionStatistics.getNumTotalRawDocs(), 30000L);
    aggregationGroupByResult = resultsBlock.getAggregationGroupByResult();
    firstGroupKey = aggregationGroupByResult.getGroupKeyIterator().next();
    Assert.assertEquals(firstGroupKey.getStringKey(), "242920");
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 0)).longValue(), 3L);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 1)).longValue(), 4348938306L);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 2)).intValue(), 407993712);
    Assert.assertEquals(((Number) aggregationGroupByResult.getResultForKey(firstGroupKey, 3)).intValue(), 296467636);
    avgResult = (AvgPair) aggregationGroupByResult.getResultForKey(firstGroupKey, 4);
    Assert.assertEquals((long) avgResult.getSum(), 5803888725L);
    Assert.assertEquals(avgResult.getCount(), 3L);
}
Also used : ExecutionStatistics(com.linkedin.pinot.core.operator.ExecutionStatistics) AggregationGroupByOperator(com.linkedin.pinot.core.operator.query.AggregationGroupByOperator) IntermediateResultsBlock(com.linkedin.pinot.core.operator.blocks.IntermediateResultsBlock) GroupKeyGenerator(com.linkedin.pinot.core.query.aggregation.groupby.GroupKeyGenerator) AvgPair(com.linkedin.pinot.core.query.aggregation.function.customobject.AvgPair) AggregationGroupByResult(com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByResult) Test(org.testng.annotations.Test)

Example 5 with AggregationGroupByResult

use of com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByResult in project pinot by linkedin.

the class TransformGroupByTest method testGroupByString.

/**
   * Test for group-by with transformed string dimension column.
   */
@Test
public void testGroupByString() throws Exception {
    String query = String.format("select sum(%s) from xformSegTable group by ToUpper(%s)", METRIC_NAME, DIMENSION_NAME);
    AggregationGroupByResult groupByResult = executeGroupByQuery(_indexSegment, query);
    Assert.assertNotNull(groupByResult);
    // Compute the expected answer for the query.
    Map<String, Double> expectedValuesMap = new HashMap<>();
    _recordReader.rewind();
    for (int row = 0; row < NUM_ROWS; row++) {
        GenericRow genericRow = _recordReader.next();
        String key = ((String) genericRow.getValue(DIMENSION_NAME)).toUpperCase();
        Double value = (Double) genericRow.getValue(METRIC_NAME);
        Double prevValue = expectedValuesMap.get(key);
        if (prevValue == null) {
            expectedValuesMap.put(key, value);
        } else {
            expectedValuesMap.put(key, prevValue + value);
        }
    }
    compareGroupByResults(groupByResult, expectedValuesMap);
}
Also used : GenericRow(com.linkedin.pinot.core.data.GenericRow) HashMap(java.util.HashMap) AggregationGroupByResult(com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByResult) Test(org.testng.annotations.Test)

Aggregations

AggregationGroupByResult (com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByResult)9 Test (org.testng.annotations.Test)8 IntermediateResultsBlock (com.linkedin.pinot.core.operator.blocks.IntermediateResultsBlock)7 GroupKeyGenerator (com.linkedin.pinot.core.query.aggregation.groupby.GroupKeyGenerator)7 ExecutionStatistics (com.linkedin.pinot.core.operator.ExecutionStatistics)6 AggregationGroupByOperator (com.linkedin.pinot.core.operator.query.AggregationGroupByOperator)6 AvgPair (com.linkedin.pinot.core.query.aggregation.function.customobject.AvgPair)6 GenericRow (com.linkedin.pinot.core.data.GenericRow)2 HashMap (java.util.HashMap)2 QueryException (com.linkedin.pinot.common.exception.QueryException)1 AggregationInfo (com.linkedin.pinot.common.request.AggregationInfo)1 ProcessingException (com.linkedin.pinot.common.response.ProcessingException)1 Operator (com.linkedin.pinot.core.common.Operator)1 AggregationFunctionContext (com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext)1 AggregationGroupByTrimmingService (com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByTrimmingService)1 TraceRunnable (com.linkedin.pinot.core.util.trace.TraceRunnable)1 ArrayList (java.util.ArrayList)1 Iterator (java.util.Iterator)1 List (java.util.List)1 Map (java.util.Map)1