Examples with LimitScope - org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope

Example 1 with LimitScope

use of org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope in project hbase by apache.

the class RegionScannerImpl method nextInternal.

private boolean nextInternal(List<Cell> results, ScannerContext scannerContext) throws IOException {
    Preconditions.checkArgument(results.isEmpty(), "First parameter should be an empty list");
    Preconditions.checkArgument(scannerContext != null, "Scanner context cannot be null");
    Optional<RpcCall> rpcCall = RpcServer.getCurrentCall();
    // Save the initial progress from the Scanner context in these local variables. The progress
    // may need to be reset a few times if rows are being filtered out so we save the initial
    // progress.
    int initialBatchProgress = scannerContext.getBatchProgress();
    long initialSizeProgress = scannerContext.getDataSizeProgress();
    long initialHeapSizeProgress = scannerContext.getHeapSizeProgress();
    // Used to check time limit
    LimitScope limitScope = LimitScope.BETWEEN_CELLS;
    // and joinedHeap has no more data to read for the last row (if set, joinedContinuationRow).
    while (true) {
        resetProgress(scannerContext, initialBatchProgress, initialSizeProgress, initialHeapSizeProgress);
        checkClientDisconnect(rpcCall);
        // Check for thread interrupt status in case we have been signaled from
        // #interruptRegionOperation.
        region.checkInterrupt();
        // Let's see what we have in the storeHeap.
        Cell current = this.storeHeap.peek();
        boolean shouldStop = shouldStop(current);
        // When has filter row is true it means that the all the cells for a particular row must be
        // read before a filtering decision can be made. This means that filters where hasFilterRow
        // run the risk of enLongAddering out of memory errors in the case that they are applied to a
        // table that has very large rows.
        boolean hasFilterRow = this.filter != null && this.filter.hasFilterRow();
        // LimitScope.BETWEEN_ROWS so that those limits are not reached mid-row
        if (hasFilterRow) {
            if (LOG.isTraceEnabled()) {
                LOG.trace("filter#hasFilterRow is true which prevents partial results from being " + " formed. Changing scope of limits that may create partials");
            }
            scannerContext.setSizeLimitScope(LimitScope.BETWEEN_ROWS);
            scannerContext.setTimeLimitScope(LimitScope.BETWEEN_ROWS);
            limitScope = LimitScope.BETWEEN_ROWS;
        }
        if (scannerContext.checkTimeLimit(LimitScope.BETWEEN_CELLS)) {
            if (hasFilterRow) {
                throw new IncompatibleFilterException("Filter whose hasFilterRow() returns true is incompatible with scans that must " + " stop mid-row because of a limit. ScannerContext:" + scannerContext);
            }
            return true;
        }
        // If not, then it's main path - getting results from storeHeap.
        if (joinedContinuationRow == null) {
            // First, check if we are at a stop row. If so, there are no more results.
            if (shouldStop) {
                if (hasFilterRow) {
                    filter.filterRowCells(results);
                }
                return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
            }
            // Technically, if we hit limits before on this row, we don't need this call.
            if (filterRowKey(current)) {
                incrementCountOfRowsFilteredMetric(scannerContext);
                // early check, see HBASE-16296
                if (isFilterDoneInternal()) {
                    return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
                }
                // Typically the count of rows scanned is incremented inside #populateResult. However,
                // here we are filtering a row based purely on its row key, preventing us from calling
                // #populateResult. Thus, perform the necessary increment here to rows scanned metric
                incrementCountOfRowsScannedMetric(scannerContext);
                boolean moreRows = nextRow(scannerContext, current);
                if (!moreRows) {
                    return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
                }
                results.clear();
                // Read nothing as the rowkey was filtered, but still need to check time limit
                if (scannerContext.checkTimeLimit(limitScope)) {
                    return true;
                }
                continue;
            }
            // Ok, we are good, let's try to get some results from the main heap.
            populateResult(results, this.storeHeap, scannerContext, current);
            if (scannerContext.checkAnyLimitReached(LimitScope.BETWEEN_CELLS)) {
                if (hasFilterRow) {
                    throw new IncompatibleFilterException("Filter whose hasFilterRow() returns true is incompatible with scans that must " + " stop mid-row because of a limit. ScannerContext:" + scannerContext);
                }
                return true;
            }
            // Check for thread interrupt status in case we have been signaled from
            // #interruptRegionOperation.
            region.checkInterrupt();
            Cell nextKv = this.storeHeap.peek();
            shouldStop = shouldStop(nextKv);
            // save that the row was empty before filters applied to it.
            final boolean isEmptyRow = results.isEmpty();
            // We have the part of the row necessary for filtering (all of it, usually).
            // First filter with the filterRow(List).
            FilterWrapper.FilterRowRetCode ret = FilterWrapper.FilterRowRetCode.NOT_CALLED;
            if (hasFilterRow) {
                ret = filter.filterRowCellsWithRet(results);
                // according to contents of results now.
                if (scannerContext.getKeepProgress()) {
                    scannerContext.setProgress(initialBatchProgress, initialSizeProgress, initialHeapSizeProgress);
                } else {
                    scannerContext.clearProgress();
                }
                scannerContext.incrementBatchProgress(results.size());
                for (Cell cell : results) {
                    scannerContext.incrementSizeProgress(PrivateCellUtil.estimatedSerializedSizeOf(cell), cell.heapSize());
                }
            }
            if (isEmptyRow || ret == FilterWrapper.FilterRowRetCode.EXCLUDE || filterRow()) {
                incrementCountOfRowsFilteredMetric(scannerContext);
                results.clear();
                boolean moreRows = nextRow(scannerContext, current);
                if (!moreRows) {
                    return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
                }
                // we should continue on. Otherwise, nothing else to do.
                if (!shouldStop) {
                    // Read nothing as the cells was filtered, but still need to check time limit
                    if (scannerContext.checkTimeLimit(limitScope)) {
                        return true;
                    }
                    continue;
                }
                return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
            }
            // fetch to (possibly) reduce amount of data loads from disk.
            if (this.joinedHeap != null) {
                boolean mayHaveData = joinedHeapMayHaveData(current);
                if (mayHaveData) {
                    joinedContinuationRow = current;
                    populateFromJoinedHeap(results, scannerContext);
                    if (scannerContext.checkAnyLimitReached(LimitScope.BETWEEN_CELLS)) {
                        return true;
                    }
                }
            }
        } else {
            // Populating from the joined heap was stopped by limits, populate some more.
            populateFromJoinedHeap(results, scannerContext);
            if (scannerContext.checkAnyLimitReached(LimitScope.BETWEEN_CELLS)) {
                return true;
            }
        }
        // the case, we need to call it again on the next next() invocation.
        if (joinedContinuationRow != null) {
            return scannerContext.setScannerState(NextState.MORE_VALUES).hasMoreValues();
        }
        // the case when SingleColumnValueExcludeFilter is used.
        if (results.isEmpty()) {
            incrementCountOfRowsFilteredMetric(scannerContext);
            boolean moreRows = nextRow(scannerContext, current);
            if (!moreRows) {
                return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
            }
            if (!shouldStop) {
                continue;
            }
        }
        if (shouldStop) {
            return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
        } else {
            return scannerContext.setScannerState(NextState.MORE_VALUES).hasMoreValues();
        }
    }
}

Also used : FilterWrapper(org.apache.hadoop.hbase.filter.FilterWrapper) RpcCall(org.apache.hadoop.hbase.ipc.RpcCall) LimitScope(org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope) IncompatibleFilterException(org.apache.hadoop.hbase.filter.IncompatibleFilterException) Cell(org.apache.hadoop.hbase.Cell)

Example 2 with LimitScope

use of org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope in project hbase by apache.

the class RSRpcServices method scan.

// return whether we have more results in region.
private boolean scan(HBaseRpcController controller, ScanRequest request, RegionScannerHolder rsh, long maxQuotaResultSize, int maxResults, List<Result> results, ScanResponse.Builder builder, MutableObject lastBlock, RpcCallContext context) throws IOException {
    Region region = rsh.r;
    RegionScanner scanner = rsh.s;
    long maxResultSize;
    if (scanner.getMaxResultSize() > 0) {
        maxResultSize = Math.min(scanner.getMaxResultSize(), maxQuotaResultSize);
    } else {
        maxResultSize = maxQuotaResultSize;
    }
    // This is cells inside a row. Default size is 10 so if many versions or many cfs,
    // then we'll resize. Resizings show in profiler. Set it higher than 10. For now
    // arbitrary 32. TODO: keep record of general size of results being returned.
    List<Cell> values = new ArrayList<>(32);
    region.startRegionOperation(Operation.SCAN);
    try {
        int i = 0;
        long before = EnvironmentEdgeManager.currentTime();
        synchronized (scanner) {
            boolean stale = (region.getRegionInfo().getReplicaId() != 0);
            boolean clientHandlesPartials = request.hasClientHandlesPartials() && request.getClientHandlesPartials();
            boolean clientHandlesHeartbeats = request.hasClientHandlesHeartbeats() && request.getClientHandlesHeartbeats();
            // On the server side we must ensure that the correct ordering of partial results is
            // returned to the client to allow them to properly reconstruct the partial results.
            // If the coprocessor host is adding to the result list, we cannot guarantee the
            // correct ordering of partial results and so we prevent partial results from being
            // formed.
            boolean serverGuaranteesOrderOfPartials = results.isEmpty();
            boolean allowPartialResults = clientHandlesPartials && serverGuaranteesOrderOfPartials;
            boolean moreRows = false;
            // Heartbeat messages occur when the processing of the ScanRequest is exceeds a
            // certain time threshold on the server. When the time threshold is exceeded, the
            // server stops the scan and sends back whatever Results it has accumulated within
            // that time period (may be empty). Since heartbeat messages have the potential to
            // create partial Results (in the event that the timeout occurs in the middle of a
            // row), we must only generate heartbeat messages when the client can handle both
            // heartbeats AND partials
            boolean allowHeartbeatMessages = clientHandlesHeartbeats && allowPartialResults;
            long timeLimit = getTimeLimit(controller, allowHeartbeatMessages);
            final LimitScope sizeScope = allowPartialResults ? LimitScope.BETWEEN_CELLS : LimitScope.BETWEEN_ROWS;
            final LimitScope timeScope = allowHeartbeatMessages ? LimitScope.BETWEEN_CELLS : LimitScope.BETWEEN_ROWS;
            boolean trackMetrics = request.hasTrackScanMetrics() && request.getTrackScanMetrics();
            // Configure with limits for this RPC. Set keep progress true since size progress
            // towards size limit should be kept between calls to nextRaw
            ScannerContext.Builder contextBuilder = ScannerContext.newBuilder(true);
            // maxResultSize - either we can reach this much size for all cells(being read) data or sum
            // of heap size occupied by cells(being read). Cell data means its key and value parts.
            contextBuilder.setSizeLimit(sizeScope, maxResultSize, maxResultSize);
            contextBuilder.setBatchLimit(scanner.getBatch());
            contextBuilder.setTimeLimit(timeScope, timeLimit);
            contextBuilder.setTrackMetrics(trackMetrics);
            ScannerContext scannerContext = contextBuilder.build();
            boolean limitReached = false;
            while (i < maxResults) {
                // Reset the batch progress to 0 before every call to RegionScanner#nextRaw. The
                // batch limit is a limit on the number of cells per Result. Thus, if progress is
                // being tracked (i.e. scannerContext.keepProgress() is true) then we need to
                // reset the batch progress between nextRaw invocations since we don't want the
                // batch progress from previous calls to affect future calls
                scannerContext.setBatchProgress(0);
                // Collect values to be returned here
                moreRows = scanner.nextRaw(values, scannerContext);
                if (!values.isEmpty()) {
                    Result r = Result.create(values, null, stale, scannerContext.hasMoreCellsInRow());
                    lastBlock.setValue(addSize(context, r, lastBlock.getValue()));
                    results.add(r);
                    i++;
                }
                boolean sizeLimitReached = scannerContext.checkSizeLimit(LimitScope.BETWEEN_ROWS);
                boolean timeLimitReached = scannerContext.checkTimeLimit(LimitScope.BETWEEN_ROWS);
                boolean rowLimitReached = i >= maxResults;
                limitReached = sizeLimitReached || timeLimitReached || rowLimitReached;
                if (limitReached || !moreRows) {
                    if (LOG.isTraceEnabled()) {
                        LOG.trace("Done scanning. limitReached: " + limitReached + " moreRows: " + moreRows + " scannerContext: " + scannerContext);
                    }
                    // another ScanRequest only to realize that they already have all the values
                    if (moreRows) {
                        // Heartbeat messages occur when the time limit has been reached.
                        builder.setHeartbeatMessage(timeLimitReached);
                    }
                    break;
                }
                values.clear();
            }
            if (limitReached || moreRows) {
                // We stopped prematurely
                builder.setMoreResultsInRegion(true);
            } else {
                // We didn't get a single batch
                builder.setMoreResultsInRegion(false);
            }
            // client requested metrics, retrieve the metrics from the scanner context.
            if (trackMetrics) {
                Map<String, Long> metrics = scannerContext.getMetrics().getMetricsMap();
                ScanMetrics.Builder metricBuilder = ScanMetrics.newBuilder();
                NameInt64Pair.Builder pairBuilder = NameInt64Pair.newBuilder();
                for (Entry<String, Long> entry : metrics.entrySet()) {
                    pairBuilder.setName(entry.getKey());
                    pairBuilder.setValue(entry.getValue());
                    metricBuilder.addMetrics(pairBuilder.build());
                }
                builder.setScanMetrics(metricBuilder.build());
            }
        }
        region.updateReadRequestsCount(i);
        long end = EnvironmentEdgeManager.currentTime();
        long responseCellSize = context != null ? context.getResponseCellSize() : 0;
        region.getMetrics().updateScanTime(end - before);
        if (regionServer.metricsRegionServer != null) {
            regionServer.metricsRegionServer.updateScanSize(responseCellSize);
            regionServer.metricsRegionServer.updateScanTime(end - before);
        }
    } finally {
        region.closeRegionOperation();
    }
    // coprocessor postNext hook
    if (region.getCoprocessorHost() != null) {
        region.getCoprocessorHost().postScannerNext(scanner, results, maxResults, true);
    }
    return builder.getMoreResultsInRegion();
}

Also used : LimitScope(org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope) ArrayList(java.util.ArrayList) NameInt64Pair(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.NameInt64Pair) ByteString(org.apache.hadoop.hbase.shaded.com.google.protobuf.ByteString) ScanMetrics(org.apache.hadoop.hbase.shaded.protobuf.generated.MapReduceProtos.ScanMetrics) RegionActionResult(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.RegionActionResult) Result(org.apache.hadoop.hbase.client.Result) AtomicLong(java.util.concurrent.atomic.AtomicLong) Cell(org.apache.hadoop.hbase.Cell) ByteBufferCell(org.apache.hadoop.hbase.ByteBufferCell)

Example 3 with LimitScope

use of org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope in project hbase by apache.

the class RSRpcServices method scan.

// return whether we have more results in region.
private void scan(HBaseRpcController controller, ScanRequest request, RegionScannerHolder rsh, long maxQuotaResultSize, int maxResults, int limitOfRows, List<Result> results, ScanResponse.Builder builder, MutableObject<Object> lastBlock, RpcCallContext context) throws IOException {
    HRegion region = rsh.r;
    RegionScanner scanner = rsh.s;
    long maxResultSize;
    if (scanner.getMaxResultSize() > 0) {
        maxResultSize = Math.min(scanner.getMaxResultSize(), maxQuotaResultSize);
    } else {
        maxResultSize = maxQuotaResultSize;
    }
    // This is cells inside a row. Default size is 10 so if many versions or many cfs,
    // then we'll resize. Resizings show in profiler. Set it higher than 10. For now
    // arbitrary 32. TODO: keep record of general size of results being returned.
    List<Cell> values = new ArrayList<>(32);
    region.startRegionOperation(Operation.SCAN);
    long before = EnvironmentEdgeManager.currentTime();
    // Used to check if we've matched the row limit set on the Scan
    int numOfCompleteRows = 0;
    // Count of times we call nextRaw; can be > numOfCompleteRows.
    int numOfNextRawCalls = 0;
    try {
        int numOfResults = 0;
        synchronized (scanner) {
            boolean stale = (region.getRegionInfo().getReplicaId() != 0);
            boolean clientHandlesPartials = request.hasClientHandlesPartials() && request.getClientHandlesPartials();
            boolean clientHandlesHeartbeats = request.hasClientHandlesHeartbeats() && request.getClientHandlesHeartbeats();
            // On the server side we must ensure that the correct ordering of partial results is
            // returned to the client to allow them to properly reconstruct the partial results.
            // If the coprocessor host is adding to the result list, we cannot guarantee the
            // correct ordering of partial results and so we prevent partial results from being
            // formed.
            boolean serverGuaranteesOrderOfPartials = results.isEmpty();
            boolean allowPartialResults = clientHandlesPartials && serverGuaranteesOrderOfPartials;
            boolean moreRows = false;
            // Heartbeat messages occur when the processing of the ScanRequest is exceeds a
            // certain time threshold on the server. When the time threshold is exceeded, the
            // server stops the scan and sends back whatever Results it has accumulated within
            // that time period (may be empty). Since heartbeat messages have the potential to
            // create partial Results (in the event that the timeout occurs in the middle of a
            // row), we must only generate heartbeat messages when the client can handle both
            // heartbeats AND partials
            boolean allowHeartbeatMessages = clientHandlesHeartbeats && allowPartialResults;
            long timeLimit = getTimeLimit(controller, allowHeartbeatMessages);
            final LimitScope sizeScope = allowPartialResults ? LimitScope.BETWEEN_CELLS : LimitScope.BETWEEN_ROWS;
            final LimitScope timeScope = allowHeartbeatMessages ? LimitScope.BETWEEN_CELLS : LimitScope.BETWEEN_ROWS;
            boolean trackMetrics = request.hasTrackScanMetrics() && request.getTrackScanMetrics();
            // Configure with limits for this RPC. Set keep progress true since size progress
            // towards size limit should be kept between calls to nextRaw
            ScannerContext.Builder contextBuilder = ScannerContext.newBuilder(true);
            // maxResultSize - either we can reach this much size for all cells(being read) data or sum
            // of heap size occupied by cells(being read). Cell data means its key and value parts.
            contextBuilder.setSizeLimit(sizeScope, maxResultSize, maxResultSize);
            contextBuilder.setBatchLimit(scanner.getBatch());
            contextBuilder.setTimeLimit(timeScope, timeLimit);
            contextBuilder.setTrackMetrics(trackMetrics);
            ScannerContext scannerContext = contextBuilder.build();
            boolean limitReached = false;
            while (numOfResults < maxResults) {
                // Reset the batch progress to 0 before every call to RegionScanner#nextRaw. The
                // batch limit is a limit on the number of cells per Result. Thus, if progress is
                // being tracked (i.e. scannerContext.keepProgress() is true) then we need to
                // reset the batch progress between nextRaw invocations since we don't want the
                // batch progress from previous calls to affect future calls
                scannerContext.setBatchProgress(0);
                // Collect values to be returned here
                moreRows = scanner.nextRaw(values, scannerContext);
                numOfNextRawCalls++;
                if (!values.isEmpty()) {
                    if (limitOfRows > 0) {
                        // so then we need to increase the numOfCompleteRows.
                        if (results.isEmpty()) {
                            if (rsh.rowOfLastPartialResult != null && !CellUtil.matchingRows(values.get(0), rsh.rowOfLastPartialResult)) {
                                numOfCompleteRows++;
                                checkLimitOfRows(numOfCompleteRows, limitOfRows, moreRows, scannerContext, builder);
                            }
                        } else {
                            Result lastResult = results.get(results.size() - 1);
                            if (lastResult.mayHaveMoreCellsInRow() && !CellUtil.matchingRows(values.get(0), lastResult.getRow())) {
                                numOfCompleteRows++;
                                checkLimitOfRows(numOfCompleteRows, limitOfRows, moreRows, scannerContext, builder);
                            }
                        }
                        if (builder.hasMoreResults() && !builder.getMoreResults()) {
                            break;
                        }
                    }
                    boolean mayHaveMoreCellsInRow = scannerContext.mayHaveMoreCellsInRow();
                    Result r = Result.create(values, null, stale, mayHaveMoreCellsInRow);
                    lastBlock.setValue(addSize(context, r, lastBlock.getValue()));
                    results.add(r);
                    numOfResults++;
                    if (!mayHaveMoreCellsInRow && limitOfRows > 0) {
                        numOfCompleteRows++;
                        checkLimitOfRows(numOfCompleteRows, limitOfRows, moreRows, scannerContext, builder);
                        if (builder.hasMoreResults() && !builder.getMoreResults()) {
                            break;
                        }
                    }
                } else if (!moreRows && !results.isEmpty()) {
                    // No more cells for the scan here, we need to ensure that the mayHaveMoreCellsInRow of
                    // last result is false. Otherwise it's possible that: the first nextRaw returned
                    // because BATCH_LIMIT_REACHED (BTW it happen to exhaust all cells of the scan),so the
                    // last result's mayHaveMoreCellsInRow will be true. while the following nextRaw will
                    // return with moreRows=false, which means moreResultsInRegion would be false, it will
                    // be a contradictory state (HBASE-21206).
                    int lastIdx = results.size() - 1;
                    Result r = results.get(lastIdx);
                    if (r.mayHaveMoreCellsInRow()) {
                        results.set(lastIdx, Result.create(r.rawCells(), r.getExists(), r.isStale(), false));
                    }
                }
                boolean sizeLimitReached = scannerContext.checkSizeLimit(LimitScope.BETWEEN_ROWS);
                boolean timeLimitReached = scannerContext.checkTimeLimit(LimitScope.BETWEEN_ROWS);
                boolean resultsLimitReached = numOfResults >= maxResults;
                limitReached = sizeLimitReached || timeLimitReached || resultsLimitReached;
                if (limitReached || !moreRows) {
                    // another ScanRequest only to realize that they already have all the values
                    if (moreRows && timeLimitReached) {
                        // Heartbeat messages occur when the time limit has been reached.
                        builder.setHeartbeatMessage(true);
                        if (rsh.needCursor) {
                            Cell cursorCell = scannerContext.getLastPeekedCell();
                            if (cursorCell != null) {
                                builder.setCursor(ProtobufUtil.toCursor(cursorCell));
                            }
                        }
                    }
                    break;
                }
                values.clear();
            }
            builder.setMoreResultsInRegion(moreRows);
            // client requested metrics, retrieve the metrics from the scanner context.
            if (trackMetrics) {
                Map<String, Long> metrics = scannerContext.getMetrics().getMetricsMap();
                ScanMetrics.Builder metricBuilder = ScanMetrics.newBuilder();
                NameInt64Pair.Builder pairBuilder = NameInt64Pair.newBuilder();
                for (Entry<String, Long> entry : metrics.entrySet()) {
                    pairBuilder.setName(entry.getKey());
                    pairBuilder.setValue(entry.getValue());
                    metricBuilder.addMetrics(pairBuilder.build());
                }
                builder.setScanMetrics(metricBuilder.build());
            }
        }
    } finally {
        region.closeRegionOperation();
        // Update serverside metrics, even on error.
        long end = EnvironmentEdgeManager.currentTime();
        long responseCellSize = context != null ? context.getResponseCellSize() : 0;
        region.getMetrics().updateScanTime(end - before);
        final MetricsRegionServer metricsRegionServer = server.getMetrics();
        if (metricsRegionServer != null) {
            metricsRegionServer.updateScanSize(region.getTableDescriptor().getTableName(), responseCellSize);
            metricsRegionServer.updateScanTime(region.getTableDescriptor().getTableName(), end - before);
            metricsRegionServer.updateReadQueryMeter(region.getRegionInfo().getTable(), numOfNextRawCalls);
        }
    }
    // coprocessor postNext hook
    if (region.getCoprocessorHost() != null) {
        region.getCoprocessorHost().postScannerNext(scanner, results, maxResults, true);
    }
}

Also used : LimitScope(org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope) ArrayList(java.util.ArrayList) NameInt64Pair(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.NameInt64Pair) ByteString(org.apache.hbase.thirdparty.com.google.protobuf.ByteString) ScanMetrics(org.apache.hadoop.hbase.shaded.protobuf.generated.MapReduceProtos.ScanMetrics) RegionActionResult(org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.RegionActionResult) Result(org.apache.hadoop.hbase.client.Result) CheckAndMutateResult(org.apache.hadoop.hbase.client.CheckAndMutateResult) AtomicLong(java.util.concurrent.atomic.AtomicLong) Cell(org.apache.hadoop.hbase.Cell) ByteBufferExtendedCell(org.apache.hadoop.hbase.ByteBufferExtendedCell)

Example 4 with LimitScope

use of org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope in project hbase by apache.

the class RegionScannerImpl method populateResult.

/**
 * Fetches records with currentRow into results list, until next row, batchLimit (if not -1) is
 * reached, or remainingResultSize (if not -1) is reaced
 * @param heap KeyValueHeap to fetch data from.It must be positioned on correct row before call.
 * @return state of last call to {@link KeyValueHeap#next()}
 */
private boolean populateResult(List<Cell> results, KeyValueHeap heap, ScannerContext scannerContext, Cell currentRowCell) throws IOException {
    Cell nextKv;
    boolean moreCellsInRow = false;
    boolean tmpKeepProgress = scannerContext.getKeepProgress();
    // Scanning between column families and thus the scope is between cells
    LimitScope limitScope = LimitScope.BETWEEN_CELLS;
    do {
        // Check for thread interrupt status in case we have been signaled from
        // #interruptRegionOperation.
        region.checkInterrupt();
        // We want to maintain any progress that is made towards the limits while scanning across
        // different column families. To do this, we toggle the keep progress flag on during calls
        // to the StoreScanner to ensure that any progress made thus far is not wiped away.
        scannerContext.setKeepProgress(true);
        heap.next(results, scannerContext);
        scannerContext.setKeepProgress(tmpKeepProgress);
        nextKv = heap.peek();
        moreCellsInRow = moreCellsInRow(nextKv, currentRowCell);
        if (!moreCellsInRow) {
            incrementCountOfRowsScannedMetric(scannerContext);
        }
        if (moreCellsInRow && scannerContext.checkBatchLimit(limitScope)) {
            return scannerContext.setScannerState(NextState.BATCH_LIMIT_REACHED).hasMoreValues();
        } else if (scannerContext.checkSizeLimit(limitScope)) {
            ScannerContext.NextState state = moreCellsInRow ? NextState.SIZE_LIMIT_REACHED_MID_ROW : NextState.SIZE_LIMIT_REACHED;
            return scannerContext.setScannerState(state).hasMoreValues();
        } else if (scannerContext.checkTimeLimit(limitScope)) {
            ScannerContext.NextState state = moreCellsInRow ? NextState.TIME_LIMIT_REACHED_MID_ROW : NextState.TIME_LIMIT_REACHED;
            return scannerContext.setScannerState(state).hasMoreValues();
        }
    } while (moreCellsInRow);
    return nextKv != null;
}

Also used : LimitScope(org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope) NextState(org.apache.hadoop.hbase.regionserver.ScannerContext.NextState) NextState(org.apache.hadoop.hbase.regionserver.ScannerContext.NextState) Cell(org.apache.hadoop.hbase.Cell)

Aggregations

Cell (org.apache.hadoop.hbase.Cell)4 LimitScope (org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope)4 ArrayList (java.util.ArrayList)2 AtomicLong (java.util.concurrent.atomic.AtomicLong)2 Result (org.apache.hadoop.hbase.client.Result)2 RegionActionResult (org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.RegionActionResult)2 NameInt64Pair (org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.NameInt64Pair)2 ScanMetrics (org.apache.hadoop.hbase.shaded.protobuf.generated.MapReduceProtos.ScanMetrics)2 ByteBufferCell (org.apache.hadoop.hbase.ByteBufferCell)1 ByteBufferExtendedCell (org.apache.hadoop.hbase.ByteBufferExtendedCell)1 CheckAndMutateResult (org.apache.hadoop.hbase.client.CheckAndMutateResult)1 FilterWrapper (org.apache.hadoop.hbase.filter.FilterWrapper)1 IncompatibleFilterException (org.apache.hadoop.hbase.filter.IncompatibleFilterException)1 RpcCall (org.apache.hadoop.hbase.ipc.RpcCall)1 NextState (org.apache.hadoop.hbase.regionserver.ScannerContext.NextState)1 ByteString (org.apache.hadoop.hbase.shaded.com.google.protobuf.ByteString)1 ByteString (org.apache.hbase.thirdparty.com.google.protobuf.ByteString)1