Search in sources :

Example 1 with ParallelIterators

use of org.apache.phoenix.iterate.ParallelIterators in project phoenix by apache.

the class AggregatePlan method newIterator.

@Override
protected ResultIterator newIterator(ParallelScanGrouper scanGrouper, Scan scan) throws SQLException {
    if (groupBy.isEmpty()) {
        UngroupedAggregateRegionObserver.serializeIntoScan(scan);
    } else {
        // Set attribute with serialized expressions for coprocessor
        GroupedAggregateRegionObserver.serializeIntoScan(scan, groupBy.getScanAttribName(), groupBy.getKeyExpressions());
        if (limit != null && orderBy.getOrderByExpressions().isEmpty() && having == null && ((statement.isDistinct() && !statement.isAggregate()) || (!statement.isDistinct() && (context.getAggregationManager().isEmpty() || BaseScannerRegionObserver.KEY_ORDERED_GROUP_BY_EXPRESSIONS.equals(groupBy.getScanAttribName()))))) {
            /*
                 * Optimization to early exit from the scan for a GROUP BY or DISTINCT with a LIMIT.
                 * We may exit early according to the LIMIT specified if the query has:
                 * 1) No ORDER BY clause (or the ORDER BY was optimized out). We cannot exit
                 *    early if there's an ORDER BY because the first group may be found last
                 *    in the scan.
                 * 2) No HAVING clause, since we execute the HAVING on the client side. The LIMIT
                 *    needs to be evaluated *after* the HAVING.
                 * 3) DISTINCT clause with no GROUP BY. We cannot exit early if there's a
                 *    GROUP BY, as the GROUP BY is processed on the client-side post aggregation
                 *    if a DISTNCT has a GROUP BY. Otherwise, since there are no aggregate
                 *    functions in a DISTINCT, we can exit early regardless of if the
                 *    groups are in row key order or unordered.
                 * 4) GROUP BY clause with no aggregate functions. This is in the same category
                 *    as (3). If we're using aggregate functions, we need to look at all the
                 *    rows, as otherwise we'd exit early with incorrect aggregate function
                 *    calculations.
                 * 5) GROUP BY clause along the pk axis, as the rows are processed in row key
                 *    order, so we can early exit, even when aggregate functions are used, as
                 *    the rows in the group are contiguous.
                 */
            scan.setAttribute(BaseScannerRegionObserver.GROUP_BY_LIMIT, PInteger.INSTANCE.toBytes(limit + (offset == null ? 0 : offset)));
        }
    }
    BaseResultIterators iterators = isSerial ? new SerialIterators(this, null, null, wrapParallelIteratorFactory(), scanGrouper, scan) : new ParallelIterators(this, null, wrapParallelIteratorFactory(), scan, false);
    estimatedRows = iterators.getEstimatedRowCount();
    estimatedSize = iterators.getEstimatedByteCount();
    splits = iterators.getSplits();
    scans = iterators.getScans();
    AggregatingResultIterator aggResultIterator;
    // No need to merge sort for ungrouped aggregation
    if (groupBy.isEmpty() || groupBy.isUngroupedAggregate()) {
        aggResultIterator = new UngroupedAggregatingResultIterator(new ConcatResultIterator(iterators), aggregators);
    // If salted or local index we still need a merge sort as we'll potentially have multiple group by keys that aren't contiguous.
    } else if (groupBy.isOrderPreserving() && !(this.getTableRef().getTable().getBucketNum() != null || this.getTableRef().getTable().getIndexType() == IndexType.LOCAL)) {
        aggResultIterator = new RowKeyOrderedAggregateResultIterator(iterators, aggregators);
    } else {
        aggResultIterator = new GroupedAggregatingResultIterator(new MergeSortRowKeyResultIterator(iterators, 0, this.getOrderBy() == OrderBy.REV_ROW_KEY_ORDER_BY), aggregators);
    }
    if (having != null) {
        aggResultIterator = new FilterAggregatingResultIterator(aggResultIterator, having);
    }
    if (statement.isDistinct() && statement.isAggregate()) {
        // Dedup on client if select distinct and aggregation
        aggResultIterator = new DistinctAggregatingResultIterator(aggResultIterator, getProjector());
    }
    ResultIterator resultScanner = aggResultIterator;
    if (orderBy.getOrderByExpressions().isEmpty()) {
        if (offset != null) {
            resultScanner = new OffsetResultIterator(aggResultIterator, offset);
        }
        if (limit != null) {
            resultScanner = new LimitingResultIterator(resultScanner, limit);
        }
    } else {
        int thresholdBytes = context.getConnection().getQueryServices().getProps().getInt(QueryServices.SPOOL_THRESHOLD_BYTES_ATTRIB, QueryServicesOptions.DEFAULT_SPOOL_THRESHOLD_BYTES);
        resultScanner = new OrderedAggregatingResultIterator(aggResultIterator, orderBy.getOrderByExpressions(), thresholdBytes, limit, offset);
    }
    if (context.getSequenceManager().getSequenceCount() > 0) {
        resultScanner = new SequenceResultIterator(resultScanner, context.getSequenceManager());
    }
    return resultScanner;
}
Also used : ParallelIterators(org.apache.phoenix.iterate.ParallelIterators) MergeSortRowKeyResultIterator(org.apache.phoenix.iterate.MergeSortRowKeyResultIterator) OffsetResultIterator(org.apache.phoenix.iterate.OffsetResultIterator) GroupedAggregatingResultIterator(org.apache.phoenix.iterate.GroupedAggregatingResultIterator) SerialIterators(org.apache.phoenix.iterate.SerialIterators) SequenceResultIterator(org.apache.phoenix.iterate.SequenceResultIterator) RowKeyOrderedAggregateResultIterator(org.apache.phoenix.iterate.RowKeyOrderedAggregateResultIterator) MergeSortRowKeyResultIterator(org.apache.phoenix.iterate.MergeSortRowKeyResultIterator) OrderedResultIterator(org.apache.phoenix.iterate.OrderedResultIterator) SpoolingResultIterator(org.apache.phoenix.iterate.SpoolingResultIterator) UngroupedAggregatingResultIterator(org.apache.phoenix.iterate.UngroupedAggregatingResultIterator) ConcatResultIterator(org.apache.phoenix.iterate.ConcatResultIterator) AggregatingResultIterator(org.apache.phoenix.iterate.AggregatingResultIterator) GroupedAggregatingResultIterator(org.apache.phoenix.iterate.GroupedAggregatingResultIterator) FilterAggregatingResultIterator(org.apache.phoenix.iterate.FilterAggregatingResultIterator) ResultIterator(org.apache.phoenix.iterate.ResultIterator) DistinctAggregatingResultIterator(org.apache.phoenix.iterate.DistinctAggregatingResultIterator) PeekingResultIterator(org.apache.phoenix.iterate.PeekingResultIterator) SequenceResultIterator(org.apache.phoenix.iterate.SequenceResultIterator) LimitingResultIterator(org.apache.phoenix.iterate.LimitingResultIterator) RowKeyOrderedAggregateResultIterator(org.apache.phoenix.iterate.RowKeyOrderedAggregateResultIterator) OrderedAggregatingResultIterator(org.apache.phoenix.iterate.OrderedAggregatingResultIterator) OffsetResultIterator(org.apache.phoenix.iterate.OffsetResultIterator) BaseResultIterators(org.apache.phoenix.iterate.BaseResultIterators) LimitingResultIterator(org.apache.phoenix.iterate.LimitingResultIterator) UngroupedAggregatingResultIterator(org.apache.phoenix.iterate.UngroupedAggregatingResultIterator) AggregatingResultIterator(org.apache.phoenix.iterate.AggregatingResultIterator) GroupedAggregatingResultIterator(org.apache.phoenix.iterate.GroupedAggregatingResultIterator) FilterAggregatingResultIterator(org.apache.phoenix.iterate.FilterAggregatingResultIterator) DistinctAggregatingResultIterator(org.apache.phoenix.iterate.DistinctAggregatingResultIterator) OrderedAggregatingResultIterator(org.apache.phoenix.iterate.OrderedAggregatingResultIterator) ConcatResultIterator(org.apache.phoenix.iterate.ConcatResultIterator) DistinctAggregatingResultIterator(org.apache.phoenix.iterate.DistinctAggregatingResultIterator) UngroupedAggregatingResultIterator(org.apache.phoenix.iterate.UngroupedAggregatingResultIterator) FilterAggregatingResultIterator(org.apache.phoenix.iterate.FilterAggregatingResultIterator) OrderedAggregatingResultIterator(org.apache.phoenix.iterate.OrderedAggregatingResultIterator)

Example 2 with ParallelIterators

use of org.apache.phoenix.iterate.ParallelIterators in project phoenix by apache.

the class ParallelIteratorsSplitTest method getSplits.

private static List<KeyRange> getSplits(final TableRef tableRef, final Scan scan, final List<HRegionLocation> regions, final ScanRanges scanRanges) throws SQLException {
    final List<TableRef> tableRefs = Collections.singletonList(tableRef);
    ColumnResolver resolver = new ColumnResolver() {

        @Override
        public List<PFunction> getFunctions() {
            return Collections.emptyList();
        }

        @Override
        public List<TableRef> getTables() {
            return tableRefs;
        }

        @Override
        public TableRef resolveTable(String schemaName, String tableName) throws SQLException {
            throw new UnsupportedOperationException();
        }

        @Override
        public ColumnRef resolveColumn(String schemaName, String tableName, String colName) throws SQLException {
            throw new UnsupportedOperationException();
        }

        @Override
        public PFunction resolveFunction(String functionName) throws SQLException {
            throw new UnsupportedOperationException();
        }

        @Override
        public boolean hasUDFs() {
            return false;
        }

        @Override
        public PSchema resolveSchema(String schemaName) throws SQLException {
            return null;
        }

        @Override
        public List<PSchema> getSchemas() {
            return null;
        }
    };
    PhoenixConnection connection = DriverManager.getConnection(getUrl(), PropertiesUtil.deepCopy(TEST_PROPERTIES)).unwrap(PhoenixConnection.class);
    final PhoenixStatement statement = new PhoenixStatement(connection);
    final StatementContext context = new StatementContext(statement, resolver, scan, new SequenceManager(statement));
    context.setScanRanges(scanRanges);
    ParallelIterators parallelIterators = new ParallelIterators(new QueryPlan() {

        private final Set<TableRef> tableRefs = ImmutableSet.of(tableRef);

        @Override
        public StatementContext getContext() {
            return context;
        }

        @Override
        public ParameterMetaData getParameterMetaData() {
            return PhoenixParameterMetaData.EMPTY_PARAMETER_META_DATA;
        }

        @Override
        public ExplainPlan getExplainPlan() throws SQLException {
            return ExplainPlan.EMPTY_PLAN;
        }

        @Override
        public ResultIterator iterator(ParallelScanGrouper scanGrouper) throws SQLException {
            return ResultIterator.EMPTY_ITERATOR;
        }

        @Override
        public ResultIterator iterator(ParallelScanGrouper scanGrouper, Scan scan) throws SQLException {
            return ResultIterator.EMPTY_ITERATOR;
        }

        @Override
        public ResultIterator iterator() throws SQLException {
            return ResultIterator.EMPTY_ITERATOR;
        }

        @Override
        public long getEstimatedSize() {
            return 0;
        }

        @Override
        public Set<TableRef> getSourceRefs() {
            return tableRefs;
        }

        @Override
        public TableRef getTableRef() {
            return tableRef;
        }

        @Override
        public RowProjector getProjector() {
            return RowProjector.EMPTY_PROJECTOR;
        }

        @Override
        public Integer getLimit() {
            return null;
        }

        @Override
        public Integer getOffset() {
            return null;
        }

        @Override
        public OrderBy getOrderBy() {
            return OrderBy.EMPTY_ORDER_BY;
        }

        @Override
        public GroupBy getGroupBy() {
            return GroupBy.EMPTY_GROUP_BY;
        }

        @Override
        public List<KeyRange> getSplits() {
            return null;
        }

        @Override
        public FilterableStatement getStatement() {
            return SelectStatement.SELECT_ONE;
        }

        @Override
        public boolean isDegenerate() {
            return false;
        }

        @Override
        public boolean isRowKeyOrdered() {
            return true;
        }

        @Override
        public List<List<Scan>> getScans() {
            return null;
        }

        @Override
        public Operation getOperation() {
            return Operation.QUERY;
        }

        @Override
        public boolean useRoundRobinIterator() {
            return false;
        }

        @Override
        public Long getEstimatedRowsToScan() {
            return null;
        }

        @Override
        public Long getEstimatedBytesToScan() {
            return null;
        }
    }, null, new SpoolingResultIterator.SpoolingResultIteratorFactory(context.getConnection().getQueryServices()), context.getScan(), false);
    List<KeyRange> keyRanges = parallelIterators.getSplits();
    return keyRanges;
}
Also used : PhoenixConnection(org.apache.phoenix.jdbc.PhoenixConnection) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) PFunction(org.apache.phoenix.parse.PFunction) SQLException(java.sql.SQLException) Operation(org.apache.phoenix.jdbc.PhoenixStatement.Operation) QueryPlan(org.apache.phoenix.compile.QueryPlan) PhoenixStatement(org.apache.phoenix.jdbc.PhoenixStatement) SequenceManager(org.apache.phoenix.compile.SequenceManager) StatementContext(org.apache.phoenix.compile.StatementContext) FilterableStatement(org.apache.phoenix.parse.FilterableStatement) List(java.util.List) ColumnResolver(org.apache.phoenix.compile.ColumnResolver) OrderBy(org.apache.phoenix.compile.OrderByCompiler.OrderBy) ParallelIterators(org.apache.phoenix.iterate.ParallelIterators) GroupBy(org.apache.phoenix.compile.GroupByCompiler.GroupBy) SpoolingResultIterator(org.apache.phoenix.iterate.SpoolingResultIterator) ResultIterator(org.apache.phoenix.iterate.ResultIterator) PSchema(org.apache.phoenix.parse.PSchema) ParallelScanGrouper(org.apache.phoenix.iterate.ParallelScanGrouper) RowProjector(org.apache.phoenix.compile.RowProjector) Scan(org.apache.hadoop.hbase.client.Scan) SpoolingResultIterator(org.apache.phoenix.iterate.SpoolingResultIterator) TableRef(org.apache.phoenix.schema.TableRef) ParameterMetaData(java.sql.ParameterMetaData) PhoenixParameterMetaData(org.apache.phoenix.jdbc.PhoenixParameterMetaData) ExplainPlan(org.apache.phoenix.compile.ExplainPlan)

Example 3 with ParallelIterators

use of org.apache.phoenix.iterate.ParallelIterators in project phoenix by apache.

the class ScanPlan method newIterator.

@Override
protected ResultIterator newIterator(ParallelScanGrouper scanGrouper, Scan scan) throws SQLException {
    // Set any scan attributes before creating the scanner, as it will be too late afterwards
    scan.setAttribute(BaseScannerRegionObserver.NON_AGGREGATE_QUERY, QueryConstants.TRUE);
    ResultIterator scanner;
    TableRef tableRef = this.getTableRef();
    PTable table = tableRef.getTable();
    boolean isSalted = table.getBucketNum() != null;
    /* If no limit or topN, use parallel iterator so that we get results faster. Otherwise, if
         * limit is provided, run query serially.
         */
    boolean isOrdered = !orderBy.getOrderByExpressions().isEmpty();
    Integer perScanLimit = !allowPageFilter || isOrdered ? null : QueryUtil.getOffsetLimit(limit, offset);
    boolean isOffsetOnServer = isOffsetPossibleOnServer(context, orderBy, offset, isSalted, table.getIndexType());
    /*
         * For queries that are doing a row key order by and are not possibly querying more than a
         * threshold worth of data, then we only need to initialize scanners corresponding to the
         * first (or last, if reverse) scan per region.
         */
    boolean initFirstScanOnly = (orderBy == OrderBy.FWD_ROW_KEY_ORDER_BY || orderBy == OrderBy.REV_ROW_KEY_ORDER_BY) && isDataToScanWithinThreshold;
    BaseResultIterators iterators;
    if (isOffsetOnServer) {
        iterators = new SerialIterators(this, perScanLimit, offset, parallelIteratorFactory, scanGrouper, scan);
    } else if (isSerial) {
        iterators = new SerialIterators(this, perScanLimit, null, parallelIteratorFactory, scanGrouper, scan);
    } else {
        iterators = new ParallelIterators(this, perScanLimit, parallelIteratorFactory, scanGrouper, scan, initFirstScanOnly);
    }
    estimatedRows = iterators.getEstimatedRowCount();
    estimatedSize = iterators.getEstimatedByteCount();
    splits = iterators.getSplits();
    scans = iterators.getScans();
    if (isOffsetOnServer) {
        scanner = new ConcatResultIterator(iterators);
        if (limit != null) {
            scanner = new LimitingResultIterator(scanner, limit);
        }
    } else if (isOrdered) {
        scanner = new MergeSortTopNResultIterator(iterators, limit, offset, orderBy.getOrderByExpressions());
    } else {
        if ((isSalted || table.getIndexType() == IndexType.LOCAL) && ScanUtil.shouldRowsBeInRowKeyOrder(orderBy, context)) {
            /*
                 * For salted tables or local index, a merge sort is needed if: 
                 * 1) The config phoenix.query.force.rowkeyorder is set to true 
                 * 2) Or if the query has an order by that wants to sort
                 * the results by the row key (forward or reverse ordering)
                 */
            scanner = new MergeSortRowKeyResultIterator(iterators, isSalted ? SaltingUtil.NUM_SALTING_BYTES : 0, orderBy == OrderBy.REV_ROW_KEY_ORDER_BY);
        } else if (useRoundRobinIterator()) {
            /*
                 * For any kind of tables, round robin is possible if there is
                 * no ordering of rows needed.
                 */
            scanner = new RoundRobinResultIterator(iterators, this);
        } else {
            scanner = new ConcatResultIterator(iterators);
        }
        if (offset != null) {
            scanner = new OffsetResultIterator(scanner, offset);
        }
        if (limit != null) {
            scanner = new LimitingResultIterator(scanner, limit);
        }
    }
    if (context.getSequenceManager().getSequenceCount() > 0) {
        scanner = new SequenceResultIterator(scanner, context.getSequenceManager());
    }
    return scanner;
}
Also used : ParallelIterators(org.apache.phoenix.iterate.ParallelIterators) MergeSortRowKeyResultIterator(org.apache.phoenix.iterate.MergeSortRowKeyResultIterator) OffsetResultIterator(org.apache.phoenix.iterate.OffsetResultIterator) SerialIterators(org.apache.phoenix.iterate.SerialIterators) MergeSortTopNResultIterator(org.apache.phoenix.iterate.MergeSortTopNResultIterator) SequenceResultIterator(org.apache.phoenix.iterate.SequenceResultIterator) MergeSortRowKeyResultIterator(org.apache.phoenix.iterate.MergeSortRowKeyResultIterator) SequenceResultIterator(org.apache.phoenix.iterate.SequenceResultIterator) SpoolingResultIterator(org.apache.phoenix.iterate.SpoolingResultIterator) LimitingResultIterator(org.apache.phoenix.iterate.LimitingResultIterator) OffsetResultIterator(org.apache.phoenix.iterate.OffsetResultIterator) ConcatResultIterator(org.apache.phoenix.iterate.ConcatResultIterator) RoundRobinResultIterator(org.apache.phoenix.iterate.RoundRobinResultIterator) ResultIterator(org.apache.phoenix.iterate.ResultIterator) MergeSortTopNResultIterator(org.apache.phoenix.iterate.MergeSortTopNResultIterator) ChunkedResultIterator(org.apache.phoenix.iterate.ChunkedResultIterator) BaseResultIterators(org.apache.phoenix.iterate.BaseResultIterators) LimitingResultIterator(org.apache.phoenix.iterate.LimitingResultIterator) RoundRobinResultIterator(org.apache.phoenix.iterate.RoundRobinResultIterator) PTable(org.apache.phoenix.schema.PTable) ConcatResultIterator(org.apache.phoenix.iterate.ConcatResultIterator) TableRef(org.apache.phoenix.schema.TableRef)

Aggregations

ParallelIterators (org.apache.phoenix.iterate.ParallelIterators)3 ResultIterator (org.apache.phoenix.iterate.ResultIterator)3 SpoolingResultIterator (org.apache.phoenix.iterate.SpoolingResultIterator)3 BaseResultIterators (org.apache.phoenix.iterate.BaseResultIterators)2 ConcatResultIterator (org.apache.phoenix.iterate.ConcatResultIterator)2 LimitingResultIterator (org.apache.phoenix.iterate.LimitingResultIterator)2 MergeSortRowKeyResultIterator (org.apache.phoenix.iterate.MergeSortRowKeyResultIterator)2 OffsetResultIterator (org.apache.phoenix.iterate.OffsetResultIterator)2 SequenceResultIterator (org.apache.phoenix.iterate.SequenceResultIterator)2 SerialIterators (org.apache.phoenix.iterate.SerialIterators)2 ImmutableSet (com.google.common.collect.ImmutableSet)1 ParameterMetaData (java.sql.ParameterMetaData)1 SQLException (java.sql.SQLException)1 List (java.util.List)1 Set (java.util.Set)1 Scan (org.apache.hadoop.hbase.client.Scan)1 ColumnResolver (org.apache.phoenix.compile.ColumnResolver)1 ExplainPlan (org.apache.phoenix.compile.ExplainPlan)1 GroupBy (org.apache.phoenix.compile.GroupByCompiler.GroupBy)1 OrderBy (org.apache.phoenix.compile.OrderByCompiler.OrderBy)1