use of com.linkedin.pinot.core.operator.filter.MatchEntireSegmentOperator in project pinot by linkedin.
the class NoDictionaryGroupKeyGeneratorTest method testGroupKeyGenerator.
private void testGroupKeyGenerator(String[] groupByColumns, FieldSpec.DataType[] dataTypes) throws Exception {
// Build the projection operator.
MatchEntireSegmentOperator matchEntireSegmentOperator = new MatchEntireSegmentOperator(NUM_ROWS);
BReusableFilteredDocIdSetOperator docIdSetOperator = new BReusableFilteredDocIdSetOperator(matchEntireSegmentOperator, NUM_ROWS, 10000);
MProjectionOperator projectionOperator = new MProjectionOperator(_dataSourceMap, docIdSetOperator);
TransformExpressionOperator transformOperator = new TransformExpressionOperator(projectionOperator, new ArrayList<TransformExpressionTree>());
// Iterator over all projection blocks and generate group keys.
TransformBlock transformBlock;
int[] docIdToGroupKeys = new int[DocIdSetPlanNode.MAX_DOC_PER_CALL];
GroupKeyGenerator groupKeyGenerator = null;
while ((transformBlock = (TransformBlock) transformOperator.nextBlock()) != null) {
if (groupKeyGenerator == null) {
// Build the group key generator.
groupKeyGenerator = (groupByColumns.length == 1) ? new NoDictionarySingleColumnGroupKeyGenerator(groupByColumns[0], dataTypes[0]) : new NoDictionaryMultiColumnGroupKeyGenerator(transformBlock, groupByColumns);
}
groupKeyGenerator.generateKeysForBlock(transformBlock, docIdToGroupKeys);
}
// Assert total number of group keys is as expected
Assert.assertTrue(groupKeyGenerator != null);
Set<String> expectedGroupKeys = getExpectedGroupKeys(_recordReader, groupByColumns);
Assert.assertEquals(groupKeyGenerator.getCurrentGroupKeyUpperBound(), expectedGroupKeys.size(), "Number of group keys mis-match.");
// Assert all group key values are as expected
Iterator<GroupKeyGenerator.GroupKey> uniqueGroupKeys = groupKeyGenerator.getUniqueGroupKeys();
while (uniqueGroupKeys.hasNext()) {
GroupKeyGenerator.GroupKey groupKey = uniqueGroupKeys.next();
String actual = groupKey.getStringKey();
Assert.assertTrue(expectedGroupKeys.contains(actual), "Unexpected group key: " + actual);
}
}
use of com.linkedin.pinot.core.operator.filter.MatchEntireSegmentOperator in project pinot by linkedin.
the class FilterPlanNode method constructPhysicalOperator.
/**
* Helper method to build the operator tree from the filter query tree.
* @param filterQueryTree
* @param segment Index segment
* @param optimizeAlwaysFalse Optimize isResultEmpty predicates
* @return Filter Operator created
*/
@VisibleForTesting
public static BaseFilterOperator constructPhysicalOperator(FilterQueryTree filterQueryTree, IndexSegment segment, boolean optimizeAlwaysFalse) {
BaseFilterOperator ret;
if (null == filterQueryTree) {
return new MatchEntireSegmentOperator(segment.getSegmentMetadata().getTotalRawDocs());
}
final List<FilterQueryTree> childFilters = filterQueryTree.getChildren();
final boolean isLeaf = (childFilters == null) || childFilters.isEmpty();
if (!isLeaf) {
int numChildrenAlwaysFalse = 0;
int numChildren = childFilters.size();
List<BaseFilterOperator> operators = new ArrayList<>();
final FilterOperator filterType = filterQueryTree.getOperator();
for (final FilterQueryTree query : childFilters) {
BaseFilterOperator childOperator = constructPhysicalOperator(query, segment, optimizeAlwaysFalse);
// Count number of always false children.
if (optimizeAlwaysFalse && childOperator.isResultEmpty()) {
numChildrenAlwaysFalse++;
// Early bailout for 'AND' as soon as one of the children always evaluates to false.
if (filterType == FilterOperator.AND) {
break;
}
}
operators.add(childOperator);
}
ret = buildNonLeafOperator(filterType, operators, numChildrenAlwaysFalse, numChildren, optimizeAlwaysFalse);
} else {
final FilterOperator filterType = filterQueryTree.getOperator();
final String column = filterQueryTree.getColumn();
Predicate predicate = Predicate.newPredicate(filterQueryTree);
DataSource ds;
ds = segment.getDataSource(column);
DataSourceMetadata dataSourceMetadata = ds.getDataSourceMetadata();
BaseFilterOperator baseFilterOperator;
int startDocId = 0;
//end is inclusive
int endDocId = segment.getSegmentMetadata().getTotalRawDocs() - 1;
if (dataSourceMetadata.hasInvertedIndex()) {
// range evaluation based on inv index is inefficient, so do this only if is NOT range.
if (!filterType.equals(FilterOperator.RANGE)) {
if (dataSourceMetadata.isSingleValue() && dataSourceMetadata.isSorted()) {
// if the column is sorted use sorted inverted index based implementation
baseFilterOperator = new SortedInvertedIndexBasedFilterOperator(predicate, ds, startDocId, endDocId);
} else {
baseFilterOperator = new BitmapBasedFilterOperator(predicate, ds, startDocId, endDocId);
}
} else {
baseFilterOperator = new ScanBasedFilterOperator(predicate, ds, startDocId, endDocId);
}
} else {
baseFilterOperator = new ScanBasedFilterOperator(predicate, ds, startDocId, endDocId);
}
ret = baseFilterOperator;
}
// If operator evaluates to false, then just return an empty operator.
if (ret.isResultEmpty()) {
ret = new EmptyFilterOperator();
}
return ret;
}
use of com.linkedin.pinot.core.operator.filter.MatchEntireSegmentOperator in project pinot by linkedin.
the class TransformGroupByTest method executeGroupByQuery.
/**
* Helper method that executes the group by query on the index and returns the group by result.
*
* @param query Query to execute
* @return Group by result
*/
private AggregationGroupByResult executeGroupByQuery(IndexSegment indexSegment, String query) {
Operator filterOperator = new MatchEntireSegmentOperator(indexSegment.getSegmentMetadata().getTotalDocs());
final BReusableFilteredDocIdSetOperator docIdSetOperator = new BReusableFilteredDocIdSetOperator(filterOperator, indexSegment.getSegmentMetadata().getTotalDocs(), NUM_ROWS);
final Map<String, BaseOperator> dataSourceMap = buildDataSourceMap(indexSegment.getSegmentMetadata().getSchema());
final MProjectionOperator projectionOperator = new MProjectionOperator(dataSourceMap, docIdSetOperator);
Pql2Compiler compiler = new Pql2Compiler();
BrokerRequest brokerRequest = compiler.compileToBrokerRequest(query);
List<AggregationInfo> aggregationsInfo = brokerRequest.getAggregationsInfo();
int numAggFunctions = aggregationsInfo.size();
AggregationFunctionContext[] aggrFuncContextArray = new AggregationFunctionContext[numAggFunctions];
AggregationFunctionInitializer aggFuncInitializer = new AggregationFunctionInitializer(indexSegment.getSegmentMetadata());
for (int i = 0; i < numAggFunctions; i++) {
AggregationInfo aggregationInfo = aggregationsInfo.get(i);
aggrFuncContextArray[i] = AggregationFunctionContext.instantiate(aggregationInfo);
aggrFuncContextArray[i].getAggregationFunction().accept(aggFuncInitializer);
}
GroupBy groupBy = brokerRequest.getGroupBy();
Set<String> expressions = new HashSet<>(groupBy.getExpressions());
TransformExpressionOperator transformOperator = new TransformExpressionOperator(projectionOperator, TransformPlanNode.buildTransformExpressionTrees(expressions));
AggregationGroupByOperator groupByOperator = new AggregationGroupByOperator(aggrFuncContextArray, groupBy, Integer.MAX_VALUE, transformOperator, NUM_ROWS);
IntermediateResultsBlock block = (IntermediateResultsBlock) groupByOperator.nextBlock();
return block.getAggregationGroupByResult();
}
use of com.linkedin.pinot.core.operator.filter.MatchEntireSegmentOperator in project pinot by linkedin.
the class TransformExpressionOperatorTest method evaluateExpression.
/**
* Helper method to evaluate one expression using the TransformOperator.
* @param expression Expression to evaluate
* @return Result of evaluation
*/
private double[] evaluateExpression(String expression) {
Operator filterOperator = new MatchEntireSegmentOperator(_indexSegment.getSegmentMetadata().getTotalDocs());
final BReusableFilteredDocIdSetOperator docIdSetOperator = new BReusableFilteredDocIdSetOperator(filterOperator, _indexSegment.getSegmentMetadata().getTotalDocs(), NUM_ROWS);
final Map<String, BaseOperator> dataSourceMap = buildDataSourceMap(_indexSegment.getSegmentMetadata().getSchema());
final MProjectionOperator projectionOperator = new MProjectionOperator(dataSourceMap, docIdSetOperator);
Pql2Compiler compiler = new Pql2Compiler();
List<TransformExpressionTree> expressionTrees = new ArrayList<>(1);
expressionTrees.add(compiler.compileToExpressionTree(expression));
TransformExpressionOperator transformOperator = new TransformExpressionOperator(projectionOperator, expressionTrees);
transformOperator.open();
TransformBlock transformBlock = (TransformBlock) transformOperator.getNextBlock();
BlockValSet blockValueSet = transformBlock.getBlockValueSet(expression);
double[] actual = blockValueSet.getDoubleValuesSV();
transformOperator.close();
return actual;
}
use of com.linkedin.pinot.core.operator.filter.MatchEntireSegmentOperator in project pinot by linkedin.
the class DefaultAggregationExecutorTest method testAggregation.
/**
* Runs 'sum', 'min' & 'max' aggregation functions on the DefaultAggregationExecutor.
* Asserts that the aggregation results returned by the executor are as expected.
*/
@Test
void testAggregation() {
Map<String, BaseOperator> dataSourceMap = new HashMap<>();
for (String column : _indexSegment.getColumnNames()) {
dataSourceMap.put(column, _indexSegment.getDataSource(column));
}
int totalRawDocs = _indexSegment.getSegmentMetadata().getTotalRawDocs();
MatchEntireSegmentOperator matchEntireSegmentOperator = new MatchEntireSegmentOperator(totalRawDocs);
BReusableFilteredDocIdSetOperator docIdSetOperator = new BReusableFilteredDocIdSetOperator(matchEntireSegmentOperator, totalRawDocs, 10000);
MProjectionOperator projectionOperator = new MProjectionOperator(dataSourceMap, docIdSetOperator);
TransformExpressionOperator transformOperator = new TransformExpressionOperator(projectionOperator, Collections.<TransformExpressionTree>emptyList());
TransformBlock transformBlock = (TransformBlock) transformOperator.nextBlock();
int numAggFuncs = _aggregationInfoList.size();
AggregationFunctionContext[] aggrFuncContextArray = new AggregationFunctionContext[numAggFuncs];
AggregationFunctionInitializer aggFuncInitializer = new AggregationFunctionInitializer(_indexSegment.getSegmentMetadata());
for (int i = 0; i < numAggFuncs; i++) {
AggregationInfo aggregationInfo = _aggregationInfoList.get(i);
aggrFuncContextArray[i] = AggregationFunctionContext.instantiate(aggregationInfo);
aggrFuncContextArray[i].getAggregationFunction().accept(aggFuncInitializer);
}
AggregationExecutor aggregationExecutor = new DefaultAggregationExecutor(aggrFuncContextArray);
aggregationExecutor.init();
aggregationExecutor.aggregate(transformBlock);
aggregationExecutor.finish();
List<Object> result = aggregationExecutor.getResult();
for (int i = 0; i < result.size(); i++) {
double actual = (double) result.get(i);
double expected = computeAggregation(AGGREGATION_FUNCTIONS[i], _inputData[i]);
Assert.assertEquals(actual, expected, "Aggregation mis-match for function " + AGGREGATION_FUNCTIONS[i] + ", Expected: " + expected + " Actual: " + actual);
}
}
Aggregations