Search in sources :

Example 1 with AggregationFunctionContext

use of com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext in project pinot by linkedin.

the class IntermediateResultsBlock method getAggregationResultDataTable.

@Nonnull
private DataTable getAggregationResultDataTable() throws Exception {
    // Extract each aggregation column name and type from aggregation function context.
    int numAggregationFunctions = _aggregationFunctionContexts.length;
    String[] columnNames = new String[numAggregationFunctions];
    FieldSpec.DataType[] columnTypes = new FieldSpec.DataType[numAggregationFunctions];
    for (int i = 0; i < numAggregationFunctions; i++) {
        AggregationFunctionContext aggregationFunctionContext = _aggregationFunctionContexts[i];
        columnNames[i] = aggregationFunctionContext.getAggregationColumnName();
        columnTypes[i] = aggregationFunctionContext.getAggregationFunction().getIntermediateResultDataType();
    }
    // Build the data table.
    DataTableBuilder dataTableBuilder = new DataTableBuilder(new DataSchema(columnNames, columnTypes));
    dataTableBuilder.startRow();
    for (int i = 0; i < numAggregationFunctions; i++) {
        switch(columnTypes[i]) {
            case LONG:
                dataTableBuilder.setColumn(i, ((Number) _aggregationResult.get(i)).longValue());
                break;
            case DOUBLE:
                dataTableBuilder.setColumn(i, ((Double) _aggregationResult.get(i)).doubleValue());
                break;
            case OBJECT:
                dataTableBuilder.setColumn(i, _aggregationResult.get(i));
                break;
            default:
                throw new UnsupportedOperationException("Unsupported aggregation column data type: " + columnTypes[i] + " for column: " + columnNames[i]);
        }
    }
    dataTableBuilder.finishRow();
    DataTable dataTable = dataTableBuilder.build();
    return attachMetadataToDataTable(dataTable);
}
Also used : DataSchema(com.linkedin.pinot.common.utils.DataSchema) DataTable(com.linkedin.pinot.common.utils.DataTable) AggregationFunctionContext(com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext) DataTableBuilder(com.linkedin.pinot.core.common.datatable.DataTableBuilder) Nonnull(javax.annotation.Nonnull)

Example 2 with AggregationFunctionContext

use of com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext in project pinot by linkedin.

the class MCombineGroupByOperator method combineBlocks.

/**
   * This method combines the result blocks from underlying operators and builds a
   * merged, sorted and trimmed result block.
   * 1. Result blocks from underlying operators are merged concurrently into a
   *   HashMap, with appropriate synchronizations. Result blocks themselves are stored
   *   in the specified blocks[].
   *   - The key in this concurrent map is the group-by key, and value is an array of
   *     Objects (one for each aggregation function).
   *   - Synchronization is provided by locking the group-key that is to be modified.
   *
   * 2. The result of the concurrent map is then translated into what is expected by
   *    the broker (List<Map<String, Object>>).
   *
   * 3. This result is then sorted and then trimmed as per 'TOP N' in the brokerRequest.
   *
   * @return IntermediateResultBlock containing the final results from combine operation.
   */
private IntermediateResultsBlock combineBlocks() throws InterruptedException {
    int numOperators = _operators.size();
    final CountDownLatch operatorLatch = new CountDownLatch(numOperators);
    final Map<String, Object[]> resultsMap = new ConcurrentHashMap<>();
    final ConcurrentLinkedQueue<ProcessingException> mergedProcessingExceptions = new ConcurrentLinkedQueue<>();
    List<AggregationInfo> aggregationInfos = _brokerRequest.getAggregationsInfo();
    final AggregationFunctionContext[] aggregationFunctionContexts = AggregationFunctionUtils.getAggregationFunctionContexts(aggregationInfos, null);
    final int numAggregationFunctions = aggregationFunctionContexts.length;
    for (int i = 0; i < numOperators; i++) {
        final int index = i;
        _executorService.execute(new TraceRunnable() {

            @SuppressWarnings("unchecked")
            @Override
            public void runJob() {
                AggregationGroupByResult aggregationGroupByResult;
                try {
                    IntermediateResultsBlock intermediateResultsBlock = (IntermediateResultsBlock) _operators.get(index).nextBlock();
                    // Merge processing exceptions.
                    List<ProcessingException> processingExceptionsToMerge = intermediateResultsBlock.getProcessingExceptions();
                    if (processingExceptionsToMerge != null) {
                        mergedProcessingExceptions.addAll(processingExceptionsToMerge);
                    }
                    // Merge aggregation group-by result.
                    aggregationGroupByResult = intermediateResultsBlock.getAggregationGroupByResult();
                    if (aggregationGroupByResult != null) {
                        // Iterate over the group-by keys, for each key, update the group-by result in the resultsMap.
                        Iterator<GroupKeyGenerator.GroupKey> groupKeyIterator = aggregationGroupByResult.getGroupKeyIterator();
                        while (groupKeyIterator.hasNext()) {
                            GroupKeyGenerator.GroupKey groupKey = groupKeyIterator.next();
                            String groupKeyString = groupKey.getStringKey();
                            // HashCode method might return negative value, make it non-negative
                            int lockIndex = (groupKeyString.hashCode() & Integer.MAX_VALUE) % NUM_LOCKS;
                            synchronized (LOCKS[lockIndex]) {
                                Object[] results = resultsMap.get(groupKeyString);
                                if (results == null) {
                                    results = new Object[numAggregationFunctions];
                                    for (int j = 0; j < numAggregationFunctions; j++) {
                                        results[j] = aggregationGroupByResult.getResultForKey(groupKey, j);
                                    }
                                    resultsMap.put(groupKeyString, results);
                                } else {
                                    for (int j = 0; j < numAggregationFunctions; j++) {
                                        results[j] = aggregationFunctionContexts[j].getAggregationFunction().merge(results[j], aggregationGroupByResult.getResultForKey(groupKey, j));
                                    }
                                }
                            }
                        }
                    }
                } catch (Exception e) {
                    LOGGER.error("Exception processing CombineGroupBy for index {}, operator {}", index, _operators.get(index).getClass().getName(), e);
                    mergedProcessingExceptions.add(QueryException.getException(QueryException.QUERY_EXECUTION_ERROR, e));
                }
                operatorLatch.countDown();
            }
        });
    }
    boolean opCompleted = operatorLatch.await(_timeOutMs, TimeUnit.MILLISECONDS);
    if (!opCompleted) {
        // If this happens, the broker side should already timed out, just log the error in server side.
        LOGGER.error("Timed out while combining group-by results, after {}ms.", _timeOutMs);
        return new IntermediateResultsBlock(new TimeoutException("CombineGroupBy timed out."));
    }
    // Trim the results map.
    AggregationGroupByTrimmingService aggregationGroupByTrimmingService = new AggregationGroupByTrimmingService(aggregationFunctionContexts, (int) _brokerRequest.getGroupBy().getTopN());
    List<Map<String, Object>> trimmedResults = aggregationGroupByTrimmingService.trimIntermediateResultsMap(resultsMap);
    IntermediateResultsBlock mergedBlock = new IntermediateResultsBlock(aggregationFunctionContexts, trimmedResults, true);
    // Set the processing exceptions.
    if (!mergedProcessingExceptions.isEmpty()) {
        mergedBlock.setProcessingExceptions(new ArrayList<>(mergedProcessingExceptions));
    }
    // Set the execution statistics.
    ExecutionStatistics executionStatistics = new ExecutionStatistics();
    for (Operator operator : _operators) {
        ExecutionStatistics executionStatisticsToMerge = operator.getExecutionStatistics();
        if (executionStatisticsToMerge != null) {
            executionStatistics.merge(executionStatisticsToMerge);
        }
    }
    mergedBlock.setNumDocsScanned(executionStatistics.getNumDocsScanned());
    mergedBlock.setNumEntriesScannedInFilter(executionStatistics.getNumEntriesScannedInFilter());
    mergedBlock.setNumEntriesScannedPostFilter(executionStatistics.getNumEntriesScannedPostFilter());
    mergedBlock.setNumTotalRawDocs(executionStatistics.getNumTotalRawDocs());
    return mergedBlock;
}
Also used : Operator(com.linkedin.pinot.core.common.Operator) AggregationGroupByTrimmingService(com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByTrimmingService) AggregationGroupByResult(com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByResult) TraceRunnable(com.linkedin.pinot.core.util.trace.TraceRunnable) Iterator(java.util.Iterator) ArrayList(java.util.ArrayList) List(java.util.List) AggregationInfo(com.linkedin.pinot.common.request.AggregationInfo) IntermediateResultsBlock(com.linkedin.pinot.core.operator.blocks.IntermediateResultsBlock) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) AggregationFunctionContext(com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext) GroupKeyGenerator(com.linkedin.pinot.core.query.aggregation.groupby.GroupKeyGenerator) ProcessingException(com.linkedin.pinot.common.response.ProcessingException) TimeoutException(java.util.concurrent.TimeoutException) CountDownLatch(java.util.concurrent.CountDownLatch) TimeoutException(java.util.concurrent.TimeoutException) ProcessingException(com.linkedin.pinot.common.response.ProcessingException) QueryException(com.linkedin.pinot.common.exception.QueryException) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 3 with AggregationFunctionContext

use of com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext in project pinot by linkedin.

the class AggregationFunctionUtils method getAggregationFunctionContexts.

@Nonnull
public static AggregationFunctionContext[] getAggregationFunctionContexts(@Nonnull List<AggregationInfo> aggregationInfos, @Nullable SegmentMetadata segmentMetadata) {
    int numAggregationFunctions = aggregationInfos.size();
    AggregationFunctionContext[] aggregationFunctionContexts = new AggregationFunctionContext[numAggregationFunctions];
    for (int i = 0; i < numAggregationFunctions; i++) {
        AggregationInfo aggregationInfo = aggregationInfos.get(i);
        aggregationFunctionContexts[i] = AggregationFunctionContext.instantiate(aggregationInfo);
    }
    if (segmentMetadata != null) {
        AggregationFunctionInitializer aggregationFunctionInitializer = new AggregationFunctionInitializer(segmentMetadata);
        for (AggregationFunctionContext aggregationFunctionContext : aggregationFunctionContexts) {
            aggregationFunctionContext.getAggregationFunction().accept(aggregationFunctionInitializer);
        }
    }
    return aggregationFunctionContexts;
}
Also used : AggregationFunctionInitializer(com.linkedin.pinot.core.plan.AggregationFunctionInitializer) AggregationInfo(com.linkedin.pinot.common.request.AggregationInfo) AggregationFunctionContext(com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext) Nonnull(javax.annotation.Nonnull)

Example 4 with AggregationFunctionContext

use of com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext in project pinot by linkedin.

the class CombineService method mergeTwoBlocks.

public static void mergeTwoBlocks(@Nonnull BrokerRequest brokerRequest, @Nonnull IntermediateResultsBlock mergedBlock, @Nonnull IntermediateResultsBlock blockToMerge) {
    // Combine processing exceptions.
    List<ProcessingException> mergedProcessingExceptions = mergedBlock.getProcessingExceptions();
    List<ProcessingException> processingExceptionsToMerge = blockToMerge.getProcessingExceptions();
    if (mergedProcessingExceptions == null) {
        mergedBlock.setProcessingExceptions(processingExceptionsToMerge);
    } else if (processingExceptionsToMerge != null) {
        mergedProcessingExceptions.addAll(processingExceptionsToMerge);
    }
    // Combine result.
    if (brokerRequest.isSetAggregationsInfo()) {
        if (!brokerRequest.isSetGroupBy()) {
            // Combine aggregation only result.
            // Might be null if caught exception during query execution.
            List<Object> aggregationResultToMerge = blockToMerge.getAggregationResult();
            if (aggregationResultToMerge == null) {
                // No data in block to merge.
                return;
            }
            AggregationFunctionContext[] mergedAggregationFunctionContexts = mergedBlock.getAggregationFunctionContexts();
            if (mergedAggregationFunctionContexts == null) {
                // No data in merged block.
                mergedBlock.setAggregationFunctionContexts(blockToMerge.getAggregationFunctionContexts());
                mergedBlock.setAggregationResults(aggregationResultToMerge);
            }
            // Merge two block.
            List<Object> mergedAggregationResult = mergedBlock.getAggregationResult();
            int numAggregationFunctions = mergedAggregationFunctionContexts.length;
            for (int i = 0; i < numAggregationFunctions; i++) {
                mergedAggregationResult.set(i, mergedAggregationFunctionContexts[i].getAggregationFunction().merge(mergedAggregationResult.get(i), aggregationResultToMerge.get(i)));
            }
        } else {
            // Combine aggregation group-by result, which should not come into CombineService.
            throw new UnsupportedOperationException();
        }
    } else {
        // Combine selection result.
        // Data schema will be null if exceptions caught during query processing.
        // Result set size will be zero if no row matches the predicate.
        DataSchema mergedBlockSchema = mergedBlock.getSelectionDataSchema();
        DataSchema blockToMergeSchema = blockToMerge.getSelectionDataSchema();
        Collection<Serializable[]> mergedBlockResultSet = mergedBlock.getSelectionResult();
        Collection<Serializable[]> blockToMergeResultSet = blockToMerge.getSelectionResult();
        if (mergedBlockSchema == null || mergedBlockResultSet.size() == 0) {
            // If block to merge schema is not null, set its data schema and result to the merged block.
            if (blockToMergeSchema != null) {
                mergedBlock.setSelectionDataSchema(blockToMergeSchema);
                mergedBlock.setSelectionResult(blockToMergeResultSet);
            }
        } else {
            // Some data in merged block.
            Selection selection = brokerRequest.getSelections();
            boolean isSelectionOrderBy = selection.isSetSelectionSortSequence();
            int selectionSize = selection.getSize();
            // No need to merge if already got enough rows for selection only.
            if (!isSelectionOrderBy && (mergedBlockResultSet.size() == selectionSize)) {
                return;
            }
            // Merge only if there are data in block to merge.
            if (blockToMergeSchema != null && blockToMergeResultSet.size() > 0) {
                if (mergedBlockSchema.isTypeCompatibleWith(blockToMergeSchema)) {
                    // Two blocks are mergeable.
                    // Upgrade the merged block schema if necessary.
                    mergedBlockSchema.upgradeToCover(blockToMergeSchema);
                    // Merge two blocks.
                    if (isSelectionOrderBy) {
                        // Combine selection order-by.
                        SelectionOperatorUtils.mergeWithOrdering((PriorityQueue<Serializable[]>) mergedBlockResultSet, blockToMergeResultSet, selection.getOffset() + selectionSize);
                    } else {
                        // Combine selection only.
                        SelectionOperatorUtils.mergeWithoutOrdering(mergedBlockResultSet, blockToMergeResultSet, selectionSize);
                    }
                    mergedBlock.setSelectionResult(mergedBlockResultSet);
                } else {
                    // Two blocks are not mergeable.
                    throw new RuntimeException("Data schema inconsistency between merged block schema: " + mergedBlockSchema + " and block to merge schema: " + blockToMergeSchema + ", drop block to merge.");
                }
            }
        }
    }
}
Also used : Selection(com.linkedin.pinot.common.request.Selection) DataSchema(com.linkedin.pinot.common.utils.DataSchema) AggregationFunctionContext(com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext) ProcessingException(com.linkedin.pinot.common.response.ProcessingException)

Example 5 with AggregationFunctionContext

use of com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext in project pinot by linkedin.

the class DefaultAggregationExecutorTest method testAggregation.

/**
   * Runs 'sum', 'min' & 'max' aggregation functions on the DefaultAggregationExecutor.
   * Asserts that the aggregation results returned by the executor are as expected.
   */
@Test
void testAggregation() {
    Map<String, BaseOperator> dataSourceMap = new HashMap<>();
    for (String column : _indexSegment.getColumnNames()) {
        dataSourceMap.put(column, _indexSegment.getDataSource(column));
    }
    int totalRawDocs = _indexSegment.getSegmentMetadata().getTotalRawDocs();
    MatchEntireSegmentOperator matchEntireSegmentOperator = new MatchEntireSegmentOperator(totalRawDocs);
    BReusableFilteredDocIdSetOperator docIdSetOperator = new BReusableFilteredDocIdSetOperator(matchEntireSegmentOperator, totalRawDocs, 10000);
    MProjectionOperator projectionOperator = new MProjectionOperator(dataSourceMap, docIdSetOperator);
    TransformExpressionOperator transformOperator = new TransformExpressionOperator(projectionOperator, Collections.<TransformExpressionTree>emptyList());
    TransformBlock transformBlock = (TransformBlock) transformOperator.nextBlock();
    int numAggFuncs = _aggregationInfoList.size();
    AggregationFunctionContext[] aggrFuncContextArray = new AggregationFunctionContext[numAggFuncs];
    AggregationFunctionInitializer aggFuncInitializer = new AggregationFunctionInitializer(_indexSegment.getSegmentMetadata());
    for (int i = 0; i < numAggFuncs; i++) {
        AggregationInfo aggregationInfo = _aggregationInfoList.get(i);
        aggrFuncContextArray[i] = AggregationFunctionContext.instantiate(aggregationInfo);
        aggrFuncContextArray[i].getAggregationFunction().accept(aggFuncInitializer);
    }
    AggregationExecutor aggregationExecutor = new DefaultAggregationExecutor(aggrFuncContextArray);
    aggregationExecutor.init();
    aggregationExecutor.aggregate(transformBlock);
    aggregationExecutor.finish();
    List<Object> result = aggregationExecutor.getResult();
    for (int i = 0; i < result.size(); i++) {
        double actual = (double) result.get(i);
        double expected = computeAggregation(AGGREGATION_FUNCTIONS[i], _inputData[i]);
        Assert.assertEquals(actual, expected, "Aggregation mis-match for function " + AGGREGATION_FUNCTIONS[i] + ", Expected: " + expected + " Actual: " + actual);
    }
}
Also used : BaseOperator(com.linkedin.pinot.core.operator.BaseOperator) TransformExpressionOperator(com.linkedin.pinot.core.operator.transform.TransformExpressionOperator) AggregationFunctionInitializer(com.linkedin.pinot.core.plan.AggregationFunctionInitializer) HashMap(java.util.HashMap) MProjectionOperator(com.linkedin.pinot.core.operator.MProjectionOperator) MatchEntireSegmentOperator(com.linkedin.pinot.core.operator.filter.MatchEntireSegmentOperator) TransformBlock(com.linkedin.pinot.core.operator.blocks.TransformBlock) DefaultAggregationExecutor(com.linkedin.pinot.core.query.aggregation.DefaultAggregationExecutor) AggregationExecutor(com.linkedin.pinot.core.query.aggregation.AggregationExecutor) BReusableFilteredDocIdSetOperator(com.linkedin.pinot.core.operator.BReusableFilteredDocIdSetOperator) DefaultAggregationExecutor(com.linkedin.pinot.core.query.aggregation.DefaultAggregationExecutor) AggregationInfo(com.linkedin.pinot.common.request.AggregationInfo) AggregationFunctionContext(com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext) Test(org.testng.annotations.Test)

Aggregations

AggregationFunctionContext (com.linkedin.pinot.core.query.aggregation.AggregationFunctionContext)8 AggregationInfo (com.linkedin.pinot.common.request.AggregationInfo)5 DataSchema (com.linkedin.pinot.common.utils.DataSchema)4 AggregationFunctionInitializer (com.linkedin.pinot.core.plan.AggregationFunctionInitializer)3 Nonnull (javax.annotation.Nonnull)3 Selection (com.linkedin.pinot.common.request.Selection)2 ProcessingException (com.linkedin.pinot.common.response.ProcessingException)2 DataTable (com.linkedin.pinot.common.utils.DataTable)2 Operator (com.linkedin.pinot.core.common.Operator)2 DataTableBuilder (com.linkedin.pinot.core.common.datatable.DataTableBuilder)2 BReusableFilteredDocIdSetOperator (com.linkedin.pinot.core.operator.BReusableFilteredDocIdSetOperator)2 BaseOperator (com.linkedin.pinot.core.operator.BaseOperator)2 MProjectionOperator (com.linkedin.pinot.core.operator.MProjectionOperator)2 IntermediateResultsBlock (com.linkedin.pinot.core.operator.blocks.IntermediateResultsBlock)2 MatchEntireSegmentOperator (com.linkedin.pinot.core.operator.filter.MatchEntireSegmentOperator)2 TransformExpressionOperator (com.linkedin.pinot.core.operator.transform.TransformExpressionOperator)2 QueryException (com.linkedin.pinot.common.exception.QueryException)1 BrokerRequest (com.linkedin.pinot.common.request.BrokerRequest)1 GroupBy (com.linkedin.pinot.common.request.GroupBy)1 TransformBlock (com.linkedin.pinot.core.operator.blocks.TransformBlock)1