Search in sources :

Example 1 with ScannerContext

use of org.apache.hadoop.hbase.regionserver.ScannerContext in project hbase by apache.

the class Compactor method performCompaction.

/**
   * Performs the compaction.
   * @param fd FileDetails of cell sink writer
   * @param scanner Where to read from.
   * @param writer Where to write to.
   * @param smallestReadPoint Smallest read point.
   * @param cleanSeqId When true, remove seqId(used to be mvcc) value which is <=
   *          smallestReadPoint
   * @param major Is a major compaction.
   * @param numofFilesToCompact the number of files to compact
   * @return Whether compaction ended; false if it was interrupted for some reason.
   */
protected boolean performCompaction(FileDetails fd, InternalScanner scanner, CellSink writer, long smallestReadPoint, boolean cleanSeqId, ThroughputController throughputController, boolean major, int numofFilesToCompact) throws IOException {
    assert writer instanceof ShipperListener;
    long bytesWrittenProgressForCloseCheck = 0;
    long bytesWrittenProgressForLog = 0;
    long bytesWrittenProgressForShippedCall = 0;
    // Since scanner.next() can return 'false' but still be delivering data,
    // we have to use a do/while loop.
    List<Cell> cells = new ArrayList<>();
    long closeCheckSizeLimit = HStore.getCloseCheckInterval();
    long lastMillis = 0;
    if (LOG.isDebugEnabled()) {
        lastMillis = EnvironmentEdgeManager.currentTime();
    }
    String compactionName = ThroughputControlUtil.getNameForThrottling(store, "compaction");
    long now = 0;
    boolean hasMore;
    ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build();
    throughputController.start(compactionName);
    KeyValueScanner kvs = (scanner instanceof KeyValueScanner) ? (KeyValueScanner) scanner : null;
    long shippedCallSizeLimit = (long) numofFilesToCompact * this.store.getFamily().getBlocksize();
    try {
        do {
            hasMore = scanner.next(cells, scannerContext);
            if (LOG.isDebugEnabled()) {
                now = EnvironmentEdgeManager.currentTime();
            }
            // output to writer:
            Cell lastCleanCell = null;
            long lastCleanCellSeqId = 0;
            for (Cell c : cells) {
                if (cleanSeqId && c.getSequenceId() <= smallestReadPoint) {
                    lastCleanCell = c;
                    lastCleanCellSeqId = c.getSequenceId();
                    CellUtil.setSequenceId(c, 0);
                } else {
                    lastCleanCell = null;
                    lastCleanCellSeqId = 0;
                }
                writer.append(c);
                int len = KeyValueUtil.length(c);
                ++progress.currentCompactedKVs;
                progress.totalCompactedSize += len;
                bytesWrittenProgressForShippedCall += len;
                if (LOG.isDebugEnabled()) {
                    bytesWrittenProgressForLog += len;
                }
                throughputController.control(compactionName, len);
                // check periodically to see if a system stop is requested
                if (closeCheckSizeLimit > 0) {
                    bytesWrittenProgressForCloseCheck += len;
                    if (bytesWrittenProgressForCloseCheck > closeCheckSizeLimit) {
                        bytesWrittenProgressForCloseCheck = 0;
                        if (!store.areWritesEnabled()) {
                            progress.cancel();
                            return false;
                        }
                    }
                }
                if (kvs != null && bytesWrittenProgressForShippedCall > shippedCallSizeLimit) {
                    if (lastCleanCell != null) {
                        // HBASE-16931, set back sequence id to avoid affecting scan order unexpectedly.
                        // ShipperListener will do a clone of the last cells it refer, so need to set back
                        // sequence id before ShipperListener.beforeShipped
                        CellUtil.setSequenceId(lastCleanCell, lastCleanCellSeqId);
                    }
                    // Clone the cells that are in the writer so that they are freed of references,
                    // if they are holding any.
                    ((ShipperListener) writer).beforeShipped();
                    // The SHARED block references, being read for compaction, will be kept in prevBlocks
                    // list(See HFileScannerImpl#prevBlocks). In case of scan flow, after each set of cells
                    // being returned to client, we will call shipped() which can clear this list. Here by
                    // we are doing the similar thing. In between the compaction (after every N cells
                    // written with collective size of 'shippedCallSizeLimit') we will call shipped which
                    // may clear prevBlocks list.
                    kvs.shipped();
                    bytesWrittenProgressForShippedCall = 0;
                }
            }
            if (lastCleanCell != null) {
                // HBASE-16931, set back sequence id to avoid affecting scan order unexpectedly
                CellUtil.setSequenceId(lastCleanCell, lastCleanCellSeqId);
            }
            // logging at DEBUG level
            if (LOG.isDebugEnabled()) {
                if ((now - lastMillis) >= COMPACTION_PROGRESS_LOG_INTERVAL) {
                    LOG.debug("Compaction progress: " + compactionName + " " + progress + String.format(", rate=%.2f kB/sec", (bytesWrittenProgressForLog / 1024.0) / ((now - lastMillis) / 1000.0)) + ", throughputController is " + throughputController);
                    lastMillis = now;
                    bytesWrittenProgressForLog = 0;
                }
            }
            cells.clear();
        } while (hasMore);
    } catch (InterruptedException e) {
        progress.cancel();
        throw new InterruptedIOException("Interrupted while control throughput of compacting " + compactionName);
    } finally {
        throughputController.finish(compactionName);
    }
    progress.complete();
    return true;
}
Also used : InterruptedIOException(java.io.InterruptedIOException) ArrayList(java.util.ArrayList) KeyValueScanner(org.apache.hadoop.hbase.regionserver.KeyValueScanner) Cell(org.apache.hadoop.hbase.Cell) ScannerContext(org.apache.hadoop.hbase.regionserver.ScannerContext) ShipperListener(org.apache.hadoop.hbase.regionserver.ShipperListener)

Example 2 with ScannerContext

use of org.apache.hadoop.hbase.regionserver.ScannerContext in project phoenix by apache.

the class RegionScannerFactory method getWrappedScanner.

/**
   * Return wrapped scanner that catches unexpected exceptions (i.e. Phoenix bugs) and
   * re-throws as DoNotRetryIOException to prevent needless retrying hanging the query
   * for 30 seconds. Unfortunately, until HBASE-7481 gets fixed, there's no way to do
   * the same from a custom filter.
   * @param arrayKVRefs
   * @param arrayFuncRefs
   * @param offset starting position in the rowkey.
   * @param scan
   * @param tupleProjector
   * @param dataRegion
   * @param indexMaintainer
   * @param tx current transaction
   * @param viewConstants
   */
public RegionScanner getWrappedScanner(final RegionCoprocessorEnvironment env, final RegionScanner s, final Set<KeyValueColumnExpression> arrayKVRefs, final Expression[] arrayFuncRefs, final int offset, final Scan scan, final ColumnReference[] dataColumns, final TupleProjector tupleProjector, final Region dataRegion, final IndexMaintainer indexMaintainer, Transaction tx, final byte[][] viewConstants, final KeyValueSchema kvSchema, final ValueBitSet kvSchemaBitSet, final TupleProjector projector, final ImmutableBytesWritable ptr, final boolean useQualifierAsListIndex) {
    return new RegionScanner() {

        private boolean hasReferences = checkForReferenceFiles();

        private HRegionInfo regionInfo = env.getRegionInfo();

        private byte[] actualStartKey = getActualStartKey();

        // If there are any reference files after local index region merge some cases we might
        // get the records less than scan start row key. This will happen when we replace the
        // actual region start key with merge region start key. This method gives whether are
        // there any reference files in the region or not.
        private boolean checkForReferenceFiles() {
            if (!ScanUtil.isLocalIndex(scan))
                return false;
            for (byte[] family : scan.getFamilies()) {
                if (getRegion().getStore(family).hasReferences()) {
                    return true;
                }
            }
            return false;
        }

        // Get the actual scan start row of local index. This will be used to compare the row
        // key of the results less than scan start row when there are references.
        public byte[] getActualStartKey() {
            return ScanUtil.isLocalIndex(scan) ? ScanUtil.getActualStartRow(scan, regionInfo) : null;
        }

        @Override
        public boolean next(List<Cell> results) throws IOException {
            try {
                return s.next(results);
            } catch (Throwable t) {
                ServerUtil.throwIOException(getRegion().getRegionInfo().getRegionNameAsString(), t);
                // impossible
                return false;
            }
        }

        @Override
        public boolean next(List<Cell> result, ScannerContext scannerContext) throws IOException {
            try {
                return s.next(result, scannerContext);
            } catch (Throwable t) {
                ServerUtil.throwIOException(getRegion().getRegionInfo().getRegionNameAsString(), t);
                // impossible
                return false;
            }
        }

        @Override
        public void close() throws IOException {
            s.close();
        }

        @Override
        public HRegionInfo getRegionInfo() {
            return s.getRegionInfo();
        }

        @Override
        public boolean isFilterDone() throws IOException {
            return s.isFilterDone();
        }

        @Override
        public boolean reseek(byte[] row) throws IOException {
            return s.reseek(row);
        }

        @Override
        public long getMvccReadPoint() {
            return s.getMvccReadPoint();
        }

        @Override
        public boolean nextRaw(List<Cell> result) throws IOException {
            try {
                boolean next = s.nextRaw(result);
                Cell arrayElementCell = null;
                if (result.size() == 0) {
                    return next;
                }
                if (arrayFuncRefs != null && arrayFuncRefs.length > 0 && arrayKVRefs.size() > 0) {
                    int arrayElementCellPosition = replaceArrayIndexElement(arrayKVRefs, arrayFuncRefs, result);
                    arrayElementCell = result.get(arrayElementCellPosition);
                }
                if (ScanUtil.isLocalIndex(scan) && !ScanUtil.isAnalyzeTable(scan)) {
                    if (hasReferences && actualStartKey != null) {
                        next = scanTillScanStartRow(s, arrayKVRefs, arrayFuncRefs, result, null, arrayElementCell);
                        if (result.isEmpty()) {
                            return next;
                        }
                    }
                    /* In the following, c is only used when data region is null.
            dataRegion will never be null in case of non-coprocessor call,
            therefore no need to refactor
             */
                    IndexUtil.wrapResultUsingOffset(env, result, offset, dataColumns, tupleProjector, dataRegion, indexMaintainer, viewConstants, ptr);
                }
                if (projector != null) {
                    Tuple toProject = useQualifierAsListIndex ? new PositionBasedResultTuple(result) : new ResultTuple(Result.create(result));
                    Tuple tuple = projector.projectResults(toProject, useNewValueColumnQualifier);
                    result.clear();
                    result.add(tuple.getValue(0));
                    if (arrayElementCell != null) {
                        result.add(arrayElementCell);
                    }
                }
                // There is a scanattribute set to retrieve the specific array element
                return next;
            } catch (Throwable t) {
                ServerUtil.throwIOException(getRegion().getRegionInfo().getRegionNameAsString(), t);
                // impossible
                return false;
            }
        }

        @Override
        public boolean nextRaw(List<Cell> result, ScannerContext scannerContext) throws IOException {
            try {
                boolean next = s.nextRaw(result, scannerContext);
                Cell arrayElementCell = null;
                if (result.size() == 0) {
                    return next;
                }
                if (arrayFuncRefs != null && arrayFuncRefs.length > 0 && arrayKVRefs.size() > 0) {
                    int arrayElementCellPosition = replaceArrayIndexElement(arrayKVRefs, arrayFuncRefs, result);
                    arrayElementCell = result.get(arrayElementCellPosition);
                }
                if ((offset > 0 || ScanUtil.isLocalIndex(scan)) && !ScanUtil.isAnalyzeTable(scan)) {
                    if (hasReferences && actualStartKey != null) {
                        next = scanTillScanStartRow(s, arrayKVRefs, arrayFuncRefs, result, scannerContext, arrayElementCell);
                        if (result.isEmpty()) {
                            return next;
                        }
                    }
                    /* In the following, c is only used when data region is null.
            dataRegion will never be null in case of non-coprocessor call,
            therefore no need to refactor
             */
                    IndexUtil.wrapResultUsingOffset(env, result, offset, dataColumns, tupleProjector, dataRegion, indexMaintainer, viewConstants, ptr);
                }
                if (projector != null) {
                    Tuple toProject = useQualifierAsListIndex ? new PositionBasedMultiKeyValueTuple(result) : new ResultTuple(Result.create(result));
                    Tuple tuple = projector.projectResults(toProject, useNewValueColumnQualifier);
                    result.clear();
                    result.add(tuple.getValue(0));
                    if (arrayElementCell != null)
                        result.add(arrayElementCell);
                }
                // There is a scanattribute set to retrieve the specific array element
                return next;
            } catch (Throwable t) {
                ServerUtil.throwIOException(getRegion().getRegionInfo().getRegionNameAsString(), t);
                // impossible
                return false;
            }
        }

        /**
       * When there is a merge in progress while scanning local indexes we might get the key values less than scan start row.
       * In that case we need to scan until get the row key more or  equal to scan start key.
       * TODO try to fix this case in LocalIndexStoreFileScanner when there is a merge.
       */
        private boolean scanTillScanStartRow(final RegionScanner s, final Set<KeyValueColumnExpression> arrayKVRefs, final Expression[] arrayFuncRefs, List<Cell> result, ScannerContext scannerContext, Cell arrayElementCell) throws IOException {
            boolean next = true;
            Cell firstCell = result.get(0);
            while (Bytes.compareTo(firstCell.getRowArray(), firstCell.getRowOffset(), firstCell.getRowLength(), actualStartKey, 0, actualStartKey.length) < 0) {
                result.clear();
                if (scannerContext == null) {
                    next = s.nextRaw(result);
                } else {
                    next = s.nextRaw(result, scannerContext);
                }
                if (result.isEmpty()) {
                    return next;
                }
                if (arrayFuncRefs != null && arrayFuncRefs.length > 0 && arrayKVRefs.size() > 0) {
                    int arrayElementCellPosition = replaceArrayIndexElement(arrayKVRefs, arrayFuncRefs, result);
                    arrayElementCell = result.get(arrayElementCellPosition);
                }
                firstCell = result.get(0);
            }
            return next;
        }

        private int replaceArrayIndexElement(final Set<KeyValueColumnExpression> arrayKVRefs, final Expression[] arrayFuncRefs, List<Cell> result) {
            // make a copy of the results array here, as we're modifying it below
            MultiKeyValueTuple tuple = new MultiKeyValueTuple(ImmutableList.copyOf(result));
            // The size of both the arrays would be same?
            // Using KeyValueSchema to set and retrieve the value
            // collect the first kv to get the row
            Cell rowKv = result.get(0);
            for (KeyValueColumnExpression kvExp : arrayKVRefs) {
                if (kvExp.evaluate(tuple, ptr)) {
                    ListIterator<Cell> itr = result.listIterator();
                    while (itr.hasNext()) {
                        Cell kv = itr.next();
                        if (Bytes.equals(kvExp.getColumnFamily(), 0, kvExp.getColumnFamily().length, kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength()) && Bytes.equals(kvExp.getColumnQualifier(), 0, kvExp.getColumnQualifier().length, kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength())) {
                            // remove the kv that has the full array values.
                            itr.remove();
                            break;
                        }
                    }
                }
            }
            byte[] value = kvSchema.toBytes(tuple, arrayFuncRefs, kvSchemaBitSet, ptr);
            // Add a dummy kv with the exact value of the array index
            result.add(new KeyValue(rowKv.getRowArray(), rowKv.getRowOffset(), rowKv.getRowLength(), QueryConstants.ARRAY_VALUE_COLUMN_FAMILY, 0, QueryConstants.ARRAY_VALUE_COLUMN_FAMILY.length, QueryConstants.ARRAY_VALUE_COLUMN_QUALIFIER, 0, QueryConstants.ARRAY_VALUE_COLUMN_QUALIFIER.length, HConstants.LATEST_TIMESTAMP, KeyValue.Type.codeToType(rowKv.getTypeByte()), value, 0, value.length));
            return result.size() - 1;
        }

        @Override
        public long getMaxResultSize() {
            return s.getMaxResultSize();
        }

        @Override
        public int getBatch() {
            return s.getBatch();
        }
    };
}
Also used : Set(java.util.Set) ValueBitSet(org.apache.phoenix.schema.ValueBitSet) RegionScanner(org.apache.hadoop.hbase.regionserver.RegionScanner) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) KeyValueColumnExpression(org.apache.phoenix.expression.KeyValueColumnExpression) ScannerContext(org.apache.hadoop.hbase.regionserver.ScannerContext)

Example 3 with ScannerContext

use of org.apache.hadoop.hbase.regionserver.ScannerContext in project hbase by apache.

the class AccessController method checkCoveringPermission.

/**
   * Determine if cell ACLs covered by the operation grant access. This is expensive.
   * @return false if cell ACLs failed to grant access, true otherwise
   * @throws IOException
   */
private boolean checkCoveringPermission(User user, OpType request, RegionCoprocessorEnvironment e, byte[] row, Map<byte[], ? extends Collection<?>> familyMap, long opTs, Action... actions) throws IOException {
    if (!cellFeaturesEnabled) {
        return false;
    }
    long cellGrants = 0;
    long latestCellTs = 0;
    Get get = new Get(row);
    // Only in case of Put/Delete op, consider TS within cell (if set for individual cells).
    // When every cell, within a Mutation, can be linked with diff TS we can not rely on only one
    // version. We have to get every cell version and check its TS against the TS asked for in
    // Mutation and skip those Cells which is outside this Mutation TS.In case of Put, we have to
    // consider only one such passing cell. In case of Delete we have to consider all the cell
    // versions under this passing version. When Delete Mutation contains columns which are a
    // version delete just consider only one version for those column cells.
    boolean considerCellTs = (request == OpType.PUT || request == OpType.DELETE);
    if (considerCellTs) {
        get.setMaxVersions();
    } else {
        get.setMaxVersions(1);
    }
    boolean diffCellTsFromOpTs = false;
    for (Map.Entry<byte[], ? extends Collection<?>> entry : familyMap.entrySet()) {
        byte[] col = entry.getKey();
        // maps so we would not need to do this
        if (entry.getValue() instanceof Set) {
            Set<byte[]> set = (Set<byte[]>) entry.getValue();
            if (set == null || set.isEmpty()) {
                get.addFamily(col);
            } else {
                for (byte[] qual : set) {
                    get.addColumn(col, qual);
                }
            }
        } else if (entry.getValue() instanceof List) {
            List<Cell> list = (List<Cell>) entry.getValue();
            if (list == null || list.isEmpty()) {
                get.addFamily(col);
            } else {
                // In case of family delete, a Cell will be added into the list with Qualifier as null.
                for (Cell cell : list) {
                    if (cell.getQualifierLength() == 0 && (cell.getTypeByte() == Type.DeleteFamily.getCode() || cell.getTypeByte() == Type.DeleteFamilyVersion.getCode())) {
                        get.addFamily(col);
                    } else {
                        get.addColumn(col, CellUtil.cloneQualifier(cell));
                    }
                    if (considerCellTs) {
                        long cellTs = cell.getTimestamp();
                        latestCellTs = Math.max(latestCellTs, cellTs);
                        diffCellTsFromOpTs = diffCellTsFromOpTs || (opTs != cellTs);
                    }
                }
            }
        } else if (entry.getValue() == null) {
            get.addFamily(col);
        } else {
            throw new RuntimeException("Unhandled collection type " + entry.getValue().getClass().getName());
        }
    }
    // We want to avoid looking into the future. So, if the cells of the
    // operation specify a timestamp, or the operation itself specifies a
    // timestamp, then we use the maximum ts found. Otherwise, we bound
    // the Get to the current server time. We add 1 to the timerange since
    // the upper bound of a timerange is exclusive yet we need to examine
    // any cells found there inclusively.
    long latestTs = Math.max(opTs, latestCellTs);
    if (latestTs == 0 || latestTs == HConstants.LATEST_TIMESTAMP) {
        latestTs = EnvironmentEdgeManager.currentTime();
    }
    get.setTimeRange(0, latestTs + 1);
    // case with Put. There no need to get all versions but get latest version only.
    if (!diffCellTsFromOpTs && request == OpType.PUT) {
        get.setMaxVersions(1);
    }
    if (LOG.isTraceEnabled()) {
        LOG.trace("Scanning for cells with " + get);
    }
    // This Map is identical to familyMap. The key is a BR rather than byte[].
    // It will be easy to do gets over this new Map as we can create get keys over the Cell cf by
    // new SimpleByteRange(cell.familyArray, cell.familyOffset, cell.familyLen)
    Map<ByteRange, List<Cell>> familyMap1 = new HashMap<>();
    for (Entry<byte[], ? extends Collection<?>> entry : familyMap.entrySet()) {
        if (entry.getValue() instanceof List) {
            familyMap1.put(new SimpleMutableByteRange(entry.getKey()), (List<Cell>) entry.getValue());
        }
    }
    RegionScanner scanner = getRegion(e).getScanner(new Scan(get));
    List<Cell> cells = Lists.newArrayList();
    Cell prevCell = null;
    ByteRange curFam = new SimpleMutableByteRange();
    boolean curColAllVersions = (request == OpType.DELETE);
    long curColCheckTs = opTs;
    boolean foundColumn = false;
    try {
        boolean more = false;
        ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(1).build();
        do {
            cells.clear();
            // scan with limit as 1 to hold down memory use on wide rows
            more = scanner.next(cells, scannerContext);
            for (Cell cell : cells) {
                if (LOG.isTraceEnabled()) {
                    LOG.trace("Found cell " + cell);
                }
                boolean colChange = prevCell == null || !CellUtil.matchingColumn(prevCell, cell);
                if (colChange)
                    foundColumn = false;
                prevCell = cell;
                if (!curColAllVersions && foundColumn) {
                    continue;
                }
                if (colChange && considerCellTs) {
                    curFam.set(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
                    List<Cell> cols = familyMap1.get(curFam);
                    for (Cell col : cols) {
                        // why the below (col.getQualifierLength() == 0) check.
                        if ((col.getQualifierLength() == 0 && request == OpType.DELETE) || CellUtil.matchingQualifier(cell, col)) {
                            byte type = col.getTypeByte();
                            if (considerCellTs) {
                                curColCheckTs = col.getTimestamp();
                            }
                            // For a Delete op we pass allVersions as true. When a Delete Mutation contains
                            // a version delete for a column no need to check all the covering cells within
                            // that column. Check all versions when Type is DeleteColumn or DeleteFamily
                            // One version delete types are Delete/DeleteFamilyVersion
                            curColAllVersions = (KeyValue.Type.DeleteColumn.getCode() == type) || (KeyValue.Type.DeleteFamily.getCode() == type);
                            break;
                        }
                    }
                }
                if (cell.getTimestamp() > curColCheckTs) {
                    // Just ignore this cell. This is not a covering cell.
                    continue;
                }
                foundColumn = true;
                for (Action action : actions) {
                    // Are there permissions for this user for the cell?
                    if (!authManager.authorize(user, getTableName(e), cell, action)) {
                        // We can stop if the cell ACL denies access
                        return false;
                    }
                }
                cellGrants++;
            }
        } while (more);
    } catch (AccessDeniedException ex) {
        throw ex;
    } catch (IOException ex) {
        LOG.error("Exception while getting cells to calculate covering permission", ex);
    } finally {
        scanner.close();
    }
    // after no table or CF grants are found.
    return cellGrants > 0;
}
Also used : PrivilegedExceptionAction(java.security.PrivilegedExceptionAction) Action(org.apache.hadoop.hbase.security.access.Permission.Action) AccessDeniedException(org.apache.hadoop.hbase.security.AccessDeniedException) Set(java.util.Set) TreeSet(java.util.TreeSet) ImmutableSet(com.google.common.collect.ImmutableSet) HashMap(java.util.HashMap) ByteRange(org.apache.hadoop.hbase.util.ByteRange) SimpleMutableByteRange(org.apache.hadoop.hbase.util.SimpleMutableByteRange) IOException(java.io.IOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) SimpleMutableByteRange(org.apache.hadoop.hbase.util.SimpleMutableByteRange) RegionScanner(org.apache.hadoop.hbase.regionserver.RegionScanner) Get(org.apache.hadoop.hbase.client.Get) FilterList(org.apache.hadoop.hbase.filter.FilterList) ArrayList(java.util.ArrayList) List(java.util.List) Scan(org.apache.hadoop.hbase.client.Scan) Map(java.util.Map) TreeMap(java.util.TreeMap) HashMap(java.util.HashMap) Cell(org.apache.hadoop.hbase.Cell) ScannerContext(org.apache.hadoop.hbase.regionserver.ScannerContext)

Example 4 with ScannerContext

use of org.apache.hadoop.hbase.regionserver.ScannerContext in project hbase by apache.

the class PartitionedMobCompactor method compactMobFilesInBatch.

/**
   * Compacts a partition of selected small mob files and all the del files in a batch.
   * @param request The compaction request.
   * @param partition A compaction partition.
   * @param connection To use for transport
   * @param table The current table.
   * @param filesToCompact The files to be compacted.
   * @param batch The number of mob files to be compacted in a batch.
   * @param bulkloadPathOfPartition The directory where the bulkload column of the current
   *   partition is saved.
   * @param bulkloadColumnPath The directory where the bulkload files of current partition
   *   are saved.
   * @param newFiles The paths of new mob files after compactions.
   * @throws IOException if IO failure is encountered
   */
private void compactMobFilesInBatch(PartitionedMobCompactionRequest request, CompactionPartition partition, Connection connection, Table table, List<StoreFile> filesToCompact, int batch, Path bulkloadPathOfPartition, Path bulkloadColumnPath, List<Path> newFiles) throws IOException {
    // open scanner to the selected mob files and del files.
    StoreScanner scanner = createScanner(filesToCompact, ScanType.COMPACT_DROP_DELETES);
    // the mob files to be compacted, not include the del files.
    List<StoreFile> mobFilesToCompact = filesToCompact.subList(0, batch);
    // Pair(maxSeqId, cellsCount)
    Pair<Long, Long> fileInfo = getFileInfo(mobFilesToCompact);
    // open writers for the mob files and new ref store files.
    StoreFileWriter writer = null;
    StoreFileWriter refFileWriter = null;
    Path filePath = null;
    long mobCells = 0;
    boolean cleanupTmpMobFile = false;
    boolean cleanupBulkloadDirOfPartition = false;
    boolean cleanupCommittedMobFile = false;
    boolean closeReaders = true;
    try {
        try {
            writer = MobUtils.createWriter(conf, fs, column, partition.getPartitionId().getLatestDate(), tempPath, Long.MAX_VALUE, column.getCompactionCompressionType(), partition.getPartitionId().getStartKey(), compactionCacheConfig, cryptoContext, true);
            cleanupTmpMobFile = true;
            filePath = writer.getPath();
            byte[] fileName = Bytes.toBytes(filePath.getName());
            // create a temp file and open a writer for it in the bulkloadPath
            refFileWriter = MobUtils.createRefFileWriter(conf, fs, column, bulkloadColumnPath, fileInfo.getSecond().longValue(), compactionCacheConfig, cryptoContext, true);
            cleanupBulkloadDirOfPartition = true;
            List<Cell> cells = new ArrayList<>();
            boolean hasMore;
            ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build();
            do {
                hasMore = scanner.next(cells, scannerContext);
                for (Cell cell : cells) {
                    // write the mob cell to the mob file.
                    writer.append(cell);
                    // write the new reference cell to the store file.
                    Cell reference = MobUtils.createMobRefCell(cell, fileName, this.refCellTags);
                    refFileWriter.append(reference);
                    mobCells++;
                }
                cells.clear();
            } while (hasMore);
        } finally {
            // close the scanner.
            scanner.close();
            if (cleanupTmpMobFile) {
                // append metadata to the mob file, and close the mob file writer.
                closeMobFileWriter(writer, fileInfo.getFirst(), mobCells);
            }
            if (cleanupBulkloadDirOfPartition) {
                // append metadata and bulkload info to the ref mob file, and close the writer.
                closeRefFileWriter(refFileWriter, fileInfo.getFirst(), request.selectionTime);
            }
        }
        if (mobCells > 0) {
            // commit mob file
            MobUtils.commitFile(conf, fs, filePath, mobFamilyDir, compactionCacheConfig);
            cleanupTmpMobFile = false;
            cleanupCommittedMobFile = true;
            // bulkload the ref file
            bulkloadRefFile(connection, table, bulkloadPathOfPartition, filePath.getName());
            cleanupCommittedMobFile = false;
            newFiles.add(new Path(mobFamilyDir, filePath.getName()));
        }
        // archive the old mob files, do not archive the del files.
        try {
            closeStoreFileReaders(mobFilesToCompact);
            closeReaders = false;
            MobUtils.removeMobFiles(conf, fs, tableName, mobTableDir, column.getName(), mobFilesToCompact);
        } catch (IOException e) {
            LOG.error("Failed to archive the files " + mobFilesToCompact, e);
        }
    } finally {
        if (closeReaders) {
            closeStoreFileReaders(mobFilesToCompact);
        }
        if (cleanupTmpMobFile) {
            deletePath(filePath);
        }
        if (cleanupBulkloadDirOfPartition) {
            // delete the bulkload files in bulkloadPath
            deletePath(bulkloadPathOfPartition);
        }
        if (cleanupCommittedMobFile) {
            deletePath(new Path(mobFamilyDir, filePath.getName()));
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) StoreFileWriter(org.apache.hadoop.hbase.regionserver.StoreFileWriter) ArrayList(java.util.ArrayList) IOException(java.io.IOException) StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile) StoreScanner(org.apache.hadoop.hbase.regionserver.StoreScanner) Cell(org.apache.hadoop.hbase.Cell) ScannerContext(org.apache.hadoop.hbase.regionserver.ScannerContext)

Example 5 with ScannerContext

use of org.apache.hadoop.hbase.regionserver.ScannerContext in project hbase by apache.

the class DefaultMobStoreCompactor method performCompaction.

/**
   * Performs compaction on a column family with the mob flag enabled.
   * This is for when the mob threshold size has changed or if the mob
   * column family mode has been toggled via an alter table statement.
   * Compacts the files by the following rules.
   * 1. If the Put cell has a mob reference tag, the cell's value is the path of the mob file.
   * <ol>
   * <li>
   * If the value size of a cell is larger than the threshold, this cell is regarded as a mob,
   * directly copy the (with mob tag) cell into the new store file.
   * </li>
   * <li>
   * Otherwise, retrieve the mob cell from the mob file, and writes a copy of the cell into
   * the new store file.
   * </li>
   * </ol>
   * 2. If the Put cell doesn't have a reference tag.
   * <ol>
   * <li>
   * If the value size of a cell is larger than the threshold, this cell is regarded as a mob,
   * write this cell to a mob file, and write the path of this mob file to the store file.
   * </li>
   * <li>
   * Otherwise, directly write this cell into the store file.
   * </li>
   * </ol>
   * 3. Decide how to write a Delete cell.
   * <ol>
   * <li>
   * If a Delete cell does not have a mob reference tag which means this delete marker have not
   * been written to the mob del file, write this cell to the mob del file, and write this cell
   * with a ref tag to a store file.
   * </li>
   * <li>
   * Otherwise, directly write it to a store file.
   * </li>
   * </ol>
   * After the major compaction on the normal hfiles, we have a guarantee that we have purged all
   * deleted or old version mob refs, and the delete markers are written to a del file with the
   * suffix _del. Because of this, it is safe to use the del file in the mob compaction.
   * The mob compaction doesn't take place in the normal hfiles, it occurs directly in the
   * mob files. When the small mob files are merged into bigger ones, the del file is added into
   * the scanner to filter the deleted cells.
   * @param fd File details
   * @param scanner Where to read from.
   * @param writer Where to write to.
   * @param smallestReadPoint Smallest read point.
   * @param cleanSeqId When true, remove seqId(used to be mvcc) value which is <= smallestReadPoint
   * @param throughputController The compaction throughput controller.
   * @param major Is a major compaction.
   * @param numofFilesToCompact the number of files to compact
   * @return Whether compaction ended; false if it was interrupted for any reason.
   */
@Override
protected boolean performCompaction(FileDetails fd, InternalScanner scanner, CellSink writer, long smallestReadPoint, boolean cleanSeqId, ThroughputController throughputController, boolean major, int numofFilesToCompact) throws IOException {
    long bytesWrittenProgressForCloseCheck = 0;
    long bytesWrittenProgressForLog = 0;
    long bytesWrittenProgressForShippedCall = 0;
    // Since scanner.next() can return 'false' but still be delivering data,
    // we have to use a do/while loop.
    List<Cell> cells = new ArrayList<>();
    // Limit to "hbase.hstore.compaction.kv.max" (default 10) to avoid OOME
    int closeCheckSizeLimit = HStore.getCloseCheckInterval();
    long lastMillis = 0;
    if (LOG.isDebugEnabled()) {
        lastMillis = EnvironmentEdgeManager.currentTime();
    }
    String compactionName = ThroughputControlUtil.getNameForThrottling(store, "compaction");
    long now = 0;
    boolean hasMore;
    Path path = MobUtils.getMobFamilyPath(conf, store.getTableName(), store.getColumnFamilyName());
    byte[] fileName = null;
    StoreFileWriter mobFileWriter = null, delFileWriter = null;
    long mobCells = 0, deleteMarkersCount = 0;
    long cellsCountCompactedToMob = 0, cellsCountCompactedFromMob = 0;
    long cellsSizeCompactedToMob = 0, cellsSizeCompactedFromMob = 0;
    boolean finished = false;
    ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build();
    throughputController.start(compactionName);
    KeyValueScanner kvs = (scanner instanceof KeyValueScanner) ? (KeyValueScanner) scanner : null;
    long shippedCallSizeLimit = (long) numofFilesToCompact * this.store.getFamily().getBlocksize();
    try {
        try {
            // If the mob file writer could not be created, directly write the cell to the store file.
            mobFileWriter = mobStore.createWriterInTmp(new Date(fd.latestPutTs), fd.maxKeyCount, compactionCompression, store.getRegionInfo().getStartKey(), true);
            fileName = Bytes.toBytes(mobFileWriter.getPath().getName());
        } catch (IOException e) {
            LOG.warn("Failed to create mob writer, " + "we will continue the compaction by writing MOB cells directly in store files", e);
        }
        if (major) {
            try {
                delFileWriter = mobStore.createDelFileWriterInTmp(new Date(fd.latestPutTs), fd.maxKeyCount, compactionCompression, store.getRegionInfo().getStartKey());
            } catch (IOException e) {
                LOG.warn("Failed to create del writer, " + "we will continue the compaction by writing delete markers directly in store files", e);
            }
        }
        do {
            hasMore = scanner.next(cells, scannerContext);
            if (LOG.isDebugEnabled()) {
                now = EnvironmentEdgeManager.currentTime();
            }
            for (Cell c : cells) {
                if (major && CellUtil.isDelete(c)) {
                    if (MobUtils.isMobReferenceCell(c) || delFileWriter == null) {
                        // Directly write it to a store file
                        writer.append(c);
                    } else {
                        // Add a ref tag to this cell and write it to a store file.
                        writer.append(MobUtils.createMobRefDeleteMarker(c));
                        // Write the cell to a del file
                        delFileWriter.append(c);
                        deleteMarkersCount++;
                    }
                } else if (mobFileWriter == null || c.getTypeByte() != KeyValue.Type.Put.getCode()) {
                    // If the mob file writer is null or the kv type is not put, directly write the cell
                    // to the store file.
                    writer.append(c);
                } else if (MobUtils.isMobReferenceCell(c)) {
                    if (MobUtils.hasValidMobRefCellValue(c)) {
                        int size = MobUtils.getMobValueLength(c);
                        if (size > mobSizeThreshold) {
                            // If the value size is larger than the threshold, it's regarded as a mob. Since
                            // its value is already in the mob file, directly write this cell to the store file
                            writer.append(c);
                        } else {
                            // If the value is not larger than the threshold, it's not regarded a mob. Retrieve
                            // the mob cell from the mob file, and write it back to the store file.
                            Cell mobCell = mobStore.resolve(c, false);
                            if (mobCell.getValueLength() != 0) {
                                // put the mob data back to the store file
                                CellUtil.setSequenceId(mobCell, c.getSequenceId());
                                writer.append(mobCell);
                                cellsCountCompactedFromMob++;
                                cellsSizeCompactedFromMob += mobCell.getValueLength();
                            } else {
                                // If the value of a file is empty, there might be issues when retrieving,
                                // directly write the cell to the store file, and leave it to be handled by the
                                // next compaction.
                                writer.append(c);
                            }
                        }
                    } else {
                        LOG.warn("The value format of the KeyValue " + c + " is wrong, its length is less than " + Bytes.SIZEOF_INT);
                        writer.append(c);
                    }
                } else if (c.getValueLength() <= mobSizeThreshold) {
                    //If value size of a cell is not larger than the threshold, directly write to store file
                    writer.append(c);
                } else {
                    // If the value size of a cell is larger than the threshold, it's regarded as a mob,
                    // write this cell to a mob file, and write the path to the store file.
                    mobCells++;
                    // append the original keyValue in the mob file.
                    mobFileWriter.append(c);
                    Cell reference = MobUtils.createMobRefCell(c, fileName, this.mobStore.getRefCellTags());
                    // write the cell whose value is the path of a mob file to the store file.
                    writer.append(reference);
                    cellsCountCompactedToMob++;
                    cellsSizeCompactedToMob += c.getValueLength();
                }
                int len = KeyValueUtil.length(c);
                ++progress.currentCompactedKVs;
                progress.totalCompactedSize += len;
                bytesWrittenProgressForShippedCall += len;
                if (LOG.isDebugEnabled()) {
                    bytesWrittenProgressForLog += len;
                }
                throughputController.control(compactionName, len);
                // check periodically to see if a system stop is requested
                if (closeCheckSizeLimit > 0) {
                    bytesWrittenProgressForCloseCheck += len;
                    if (bytesWrittenProgressForCloseCheck > closeCheckSizeLimit) {
                        bytesWrittenProgressForCloseCheck = 0;
                        if (!store.areWritesEnabled()) {
                            progress.cancel();
                            return false;
                        }
                    }
                }
                if (kvs != null && bytesWrittenProgressForShippedCall > shippedCallSizeLimit) {
                    ((ShipperListener) writer).beforeShipped();
                    kvs.shipped();
                    bytesWrittenProgressForShippedCall = 0;
                }
            }
            // logging at DEBUG level
            if (LOG.isDebugEnabled()) {
                if ((now - lastMillis) >= COMPACTION_PROGRESS_LOG_INTERVAL) {
                    LOG.debug("Compaction progress: " + compactionName + " " + progress + String.format(", rate=%.2f kB/sec", (bytesWrittenProgressForLog / 1024.0) / ((now - lastMillis) / 1000.0)) + ", throughputController is " + throughputController);
                    lastMillis = now;
                    bytesWrittenProgressForLog = 0;
                }
            }
            cells.clear();
        } while (hasMore);
        finished = true;
    } catch (InterruptedException e) {
        progress.cancel();
        throw new InterruptedIOException("Interrupted while control throughput of compacting " + compactionName);
    } finally {
        throughputController.finish(compactionName);
        if (!finished && mobFileWriter != null) {
            abortWriter(mobFileWriter);
        }
        if (!finished && delFileWriter != null) {
            abortWriter(delFileWriter);
        }
    }
    if (delFileWriter != null) {
        if (deleteMarkersCount > 0) {
            // If the del file is not empty, commit it.
            // If the commit fails, the compaction is re-performed again.
            delFileWriter.appendMetadata(fd.maxSeqId, major, deleteMarkersCount);
            delFileWriter.close();
            mobStore.commitFile(delFileWriter.getPath(), path);
        } else {
            // If the del file is empty, delete it instead of committing.
            abortWriter(delFileWriter);
        }
    }
    if (mobFileWriter != null) {
        if (mobCells > 0) {
            // If the mob file is not empty, commit it.
            mobFileWriter.appendMetadata(fd.maxSeqId, major, mobCells);
            mobFileWriter.close();
            mobStore.commitFile(mobFileWriter.getPath(), path);
        } else {
            // If the mob file is empty, delete it instead of committing.
            abortWriter(mobFileWriter);
        }
    }
    mobStore.updateCellsCountCompactedFromMob(cellsCountCompactedFromMob);
    mobStore.updateCellsCountCompactedToMob(cellsCountCompactedToMob);
    mobStore.updateCellsSizeCompactedFromMob(cellsSizeCompactedFromMob);
    mobStore.updateCellsSizeCompactedToMob(cellsSizeCompactedToMob);
    progress.complete();
    return true;
}
Also used : Path(org.apache.hadoop.fs.Path) StoreFileWriter(org.apache.hadoop.hbase.regionserver.StoreFileWriter) InterruptedIOException(java.io.InterruptedIOException) ArrayList(java.util.ArrayList) KeyValueScanner(org.apache.hadoop.hbase.regionserver.KeyValueScanner) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) Date(java.util.Date) Cell(org.apache.hadoop.hbase.Cell) ScannerContext(org.apache.hadoop.hbase.regionserver.ScannerContext) ShipperListener(org.apache.hadoop.hbase.regionserver.ShipperListener)

Aggregations

ScannerContext (org.apache.hadoop.hbase.regionserver.ScannerContext)7 ArrayList (java.util.ArrayList)6 Cell (org.apache.hadoop.hbase.Cell)6 IOException (java.io.IOException)5 StoreFileWriter (org.apache.hadoop.hbase.regionserver.StoreFileWriter)4 InterruptedIOException (java.io.InterruptedIOException)3 Date (java.util.Date)3 Path (org.apache.hadoop.fs.Path)3 List (java.util.List)2 Set (java.util.Set)2 KeyValueScanner (org.apache.hadoop.hbase.regionserver.KeyValueScanner)2 RegionScanner (org.apache.hadoop.hbase.regionserver.RegionScanner)2 ShipperListener (org.apache.hadoop.hbase.regionserver.ShipperListener)2 StoreScanner (org.apache.hadoop.hbase.regionserver.StoreScanner)2 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 PrivilegedExceptionAction (java.security.PrivilegedExceptionAction)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1