use of org.apache.phoenix.iterate.ParallelIterators in project phoenix by apache.
the class AggregatePlan method newIterator.
@Override
protected ResultIterator newIterator(ParallelScanGrouper scanGrouper, Scan scan) throws SQLException {
if (groupBy.isEmpty()) {
UngroupedAggregateRegionObserver.serializeIntoScan(scan);
} else {
// Set attribute with serialized expressions for coprocessor
GroupedAggregateRegionObserver.serializeIntoScan(scan, groupBy.getScanAttribName(), groupBy.getKeyExpressions());
if (limit != null && orderBy.getOrderByExpressions().isEmpty() && having == null && ((statement.isDistinct() && !statement.isAggregate()) || (!statement.isDistinct() && (context.getAggregationManager().isEmpty() || BaseScannerRegionObserver.KEY_ORDERED_GROUP_BY_EXPRESSIONS.equals(groupBy.getScanAttribName()))))) {
/*
* Optimization to early exit from the scan for a GROUP BY or DISTINCT with a LIMIT.
* We may exit early according to the LIMIT specified if the query has:
* 1) No ORDER BY clause (or the ORDER BY was optimized out). We cannot exit
* early if there's an ORDER BY because the first group may be found last
* in the scan.
* 2) No HAVING clause, since we execute the HAVING on the client side. The LIMIT
* needs to be evaluated *after* the HAVING.
* 3) DISTINCT clause with no GROUP BY. We cannot exit early if there's a
* GROUP BY, as the GROUP BY is processed on the client-side post aggregation
* if a DISTNCT has a GROUP BY. Otherwise, since there are no aggregate
* functions in a DISTINCT, we can exit early regardless of if the
* groups are in row key order or unordered.
* 4) GROUP BY clause with no aggregate functions. This is in the same category
* as (3). If we're using aggregate functions, we need to look at all the
* rows, as otherwise we'd exit early with incorrect aggregate function
* calculations.
* 5) GROUP BY clause along the pk axis, as the rows are processed in row key
* order, so we can early exit, even when aggregate functions are used, as
* the rows in the group are contiguous.
*/
scan.setAttribute(BaseScannerRegionObserver.GROUP_BY_LIMIT, PInteger.INSTANCE.toBytes(limit + (offset == null ? 0 : offset)));
}
}
BaseResultIterators iterators = isSerial ? new SerialIterators(this, null, null, wrapParallelIteratorFactory(), scanGrouper, scan) : new ParallelIterators(this, null, wrapParallelIteratorFactory(), scan, false);
estimatedRows = iterators.getEstimatedRowCount();
estimatedSize = iterators.getEstimatedByteCount();
splits = iterators.getSplits();
scans = iterators.getScans();
AggregatingResultIterator aggResultIterator;
// No need to merge sort for ungrouped aggregation
if (groupBy.isEmpty() || groupBy.isUngroupedAggregate()) {
aggResultIterator = new UngroupedAggregatingResultIterator(new ConcatResultIterator(iterators), aggregators);
// If salted or local index we still need a merge sort as we'll potentially have multiple group by keys that aren't contiguous.
} else if (groupBy.isOrderPreserving() && !(this.getTableRef().getTable().getBucketNum() != null || this.getTableRef().getTable().getIndexType() == IndexType.LOCAL)) {
aggResultIterator = new RowKeyOrderedAggregateResultIterator(iterators, aggregators);
} else {
aggResultIterator = new GroupedAggregatingResultIterator(new MergeSortRowKeyResultIterator(iterators, 0, this.getOrderBy() == OrderBy.REV_ROW_KEY_ORDER_BY), aggregators);
}
if (having != null) {
aggResultIterator = new FilterAggregatingResultIterator(aggResultIterator, having);
}
if (statement.isDistinct() && statement.isAggregate()) {
// Dedup on client if select distinct and aggregation
aggResultIterator = new DistinctAggregatingResultIterator(aggResultIterator, getProjector());
}
ResultIterator resultScanner = aggResultIterator;
if (orderBy.getOrderByExpressions().isEmpty()) {
if (offset != null) {
resultScanner = new OffsetResultIterator(aggResultIterator, offset);
}
if (limit != null) {
resultScanner = new LimitingResultIterator(resultScanner, limit);
}
} else {
int thresholdBytes = context.getConnection().getQueryServices().getProps().getInt(QueryServices.SPOOL_THRESHOLD_BYTES_ATTRIB, QueryServicesOptions.DEFAULT_SPOOL_THRESHOLD_BYTES);
resultScanner = new OrderedAggregatingResultIterator(aggResultIterator, orderBy.getOrderByExpressions(), thresholdBytes, limit, offset);
}
if (context.getSequenceManager().getSequenceCount() > 0) {
resultScanner = new SequenceResultIterator(resultScanner, context.getSequenceManager());
}
return resultScanner;
}
use of org.apache.phoenix.iterate.ParallelIterators in project phoenix by apache.
the class ParallelIteratorsSplitTest method getSplits.
private static List<KeyRange> getSplits(final TableRef tableRef, final Scan scan, final List<HRegionLocation> regions, final ScanRanges scanRanges) throws SQLException {
final List<TableRef> tableRefs = Collections.singletonList(tableRef);
ColumnResolver resolver = new ColumnResolver() {
@Override
public List<PFunction> getFunctions() {
return Collections.emptyList();
}
@Override
public List<TableRef> getTables() {
return tableRefs;
}
@Override
public TableRef resolveTable(String schemaName, String tableName) throws SQLException {
throw new UnsupportedOperationException();
}
@Override
public ColumnRef resolveColumn(String schemaName, String tableName, String colName) throws SQLException {
throw new UnsupportedOperationException();
}
@Override
public PFunction resolveFunction(String functionName) throws SQLException {
throw new UnsupportedOperationException();
}
@Override
public boolean hasUDFs() {
return false;
}
@Override
public PSchema resolveSchema(String schemaName) throws SQLException {
return null;
}
@Override
public List<PSchema> getSchemas() {
return null;
}
};
PhoenixConnection connection = DriverManager.getConnection(getUrl(), PropertiesUtil.deepCopy(TEST_PROPERTIES)).unwrap(PhoenixConnection.class);
final PhoenixStatement statement = new PhoenixStatement(connection);
final StatementContext context = new StatementContext(statement, resolver, scan, new SequenceManager(statement));
context.setScanRanges(scanRanges);
ParallelIterators parallelIterators = new ParallelIterators(new QueryPlan() {
private final Set<TableRef> tableRefs = ImmutableSet.of(tableRef);
@Override
public StatementContext getContext() {
return context;
}
@Override
public ParameterMetaData getParameterMetaData() {
return PhoenixParameterMetaData.EMPTY_PARAMETER_META_DATA;
}
@Override
public ExplainPlan getExplainPlan() throws SQLException {
return ExplainPlan.EMPTY_PLAN;
}
@Override
public ResultIterator iterator(ParallelScanGrouper scanGrouper) throws SQLException {
return ResultIterator.EMPTY_ITERATOR;
}
@Override
public ResultIterator iterator(ParallelScanGrouper scanGrouper, Scan scan) throws SQLException {
return ResultIterator.EMPTY_ITERATOR;
}
@Override
public ResultIterator iterator() throws SQLException {
return ResultIterator.EMPTY_ITERATOR;
}
@Override
public long getEstimatedSize() {
return 0;
}
@Override
public Set<TableRef> getSourceRefs() {
return tableRefs;
}
@Override
public TableRef getTableRef() {
return tableRef;
}
@Override
public RowProjector getProjector() {
return RowProjector.EMPTY_PROJECTOR;
}
@Override
public Integer getLimit() {
return null;
}
@Override
public Integer getOffset() {
return null;
}
@Override
public OrderBy getOrderBy() {
return OrderBy.EMPTY_ORDER_BY;
}
@Override
public GroupBy getGroupBy() {
return GroupBy.EMPTY_GROUP_BY;
}
@Override
public List<KeyRange> getSplits() {
return null;
}
@Override
public FilterableStatement getStatement() {
return SelectStatement.SELECT_ONE;
}
@Override
public boolean isDegenerate() {
return false;
}
@Override
public boolean isRowKeyOrdered() {
return true;
}
@Override
public List<List<Scan>> getScans() {
return null;
}
@Override
public Operation getOperation() {
return Operation.QUERY;
}
@Override
public boolean useRoundRobinIterator() {
return false;
}
@Override
public Long getEstimatedRowsToScan() {
return null;
}
@Override
public Long getEstimatedBytesToScan() {
return null;
}
}, null, new SpoolingResultIterator.SpoolingResultIteratorFactory(context.getConnection().getQueryServices()), context.getScan(), false);
List<KeyRange> keyRanges = parallelIterators.getSplits();
return keyRanges;
}
use of org.apache.phoenix.iterate.ParallelIterators in project phoenix by apache.
the class ScanPlan method newIterator.
@Override
protected ResultIterator newIterator(ParallelScanGrouper scanGrouper, Scan scan) throws SQLException {
// Set any scan attributes before creating the scanner, as it will be too late afterwards
scan.setAttribute(BaseScannerRegionObserver.NON_AGGREGATE_QUERY, QueryConstants.TRUE);
ResultIterator scanner;
TableRef tableRef = this.getTableRef();
PTable table = tableRef.getTable();
boolean isSalted = table.getBucketNum() != null;
/* If no limit or topN, use parallel iterator so that we get results faster. Otherwise, if
* limit is provided, run query serially.
*/
boolean isOrdered = !orderBy.getOrderByExpressions().isEmpty();
Integer perScanLimit = !allowPageFilter || isOrdered ? null : QueryUtil.getOffsetLimit(limit, offset);
boolean isOffsetOnServer = isOffsetPossibleOnServer(context, orderBy, offset, isSalted, table.getIndexType());
/*
* For queries that are doing a row key order by and are not possibly querying more than a
* threshold worth of data, then we only need to initialize scanners corresponding to the
* first (or last, if reverse) scan per region.
*/
boolean initFirstScanOnly = (orderBy == OrderBy.FWD_ROW_KEY_ORDER_BY || orderBy == OrderBy.REV_ROW_KEY_ORDER_BY) && isDataToScanWithinThreshold;
BaseResultIterators iterators;
if (isOffsetOnServer) {
iterators = new SerialIterators(this, perScanLimit, offset, parallelIteratorFactory, scanGrouper, scan);
} else if (isSerial) {
iterators = new SerialIterators(this, perScanLimit, null, parallelIteratorFactory, scanGrouper, scan);
} else {
iterators = new ParallelIterators(this, perScanLimit, parallelIteratorFactory, scanGrouper, scan, initFirstScanOnly);
}
estimatedRows = iterators.getEstimatedRowCount();
estimatedSize = iterators.getEstimatedByteCount();
splits = iterators.getSplits();
scans = iterators.getScans();
if (isOffsetOnServer) {
scanner = new ConcatResultIterator(iterators);
if (limit != null) {
scanner = new LimitingResultIterator(scanner, limit);
}
} else if (isOrdered) {
scanner = new MergeSortTopNResultIterator(iterators, limit, offset, orderBy.getOrderByExpressions());
} else {
if ((isSalted || table.getIndexType() == IndexType.LOCAL) && ScanUtil.shouldRowsBeInRowKeyOrder(orderBy, context)) {
/*
* For salted tables or local index, a merge sort is needed if:
* 1) The config phoenix.query.force.rowkeyorder is set to true
* 2) Or if the query has an order by that wants to sort
* the results by the row key (forward or reverse ordering)
*/
scanner = new MergeSortRowKeyResultIterator(iterators, isSalted ? SaltingUtil.NUM_SALTING_BYTES : 0, orderBy == OrderBy.REV_ROW_KEY_ORDER_BY);
} else if (useRoundRobinIterator()) {
/*
* For any kind of tables, round robin is possible if there is
* no ordering of rows needed.
*/
scanner = new RoundRobinResultIterator(iterators, this);
} else {
scanner = new ConcatResultIterator(iterators);
}
if (offset != null) {
scanner = new OffsetResultIterator(scanner, offset);
}
if (limit != null) {
scanner = new LimitingResultIterator(scanner, limit);
}
}
if (context.getSequenceManager().getSequenceCount() > 0) {
scanner = new SequenceResultIterator(scanner, context.getSequenceManager());
}
return scanner;
}
Aggregations