use of org.apache.hadoop.hbase.regionserver.ScannerContext in project hbase by apache.
the class Compactor method performCompaction.
/**
* Performs the compaction.
* @param fd FileDetails of cell sink writer
* @param scanner Where to read from.
* @param writer Where to write to.
* @param smallestReadPoint Smallest read point.
* @param cleanSeqId When true, remove seqId(used to be mvcc) value which is <=
* smallestReadPoint
* @param major Is a major compaction.
* @param numofFilesToCompact the number of files to compact
* @return Whether compaction ended; false if it was interrupted for some reason.
*/
protected boolean performCompaction(FileDetails fd, InternalScanner scanner, CellSink writer, long smallestReadPoint, boolean cleanSeqId, ThroughputController throughputController, boolean major, int numofFilesToCompact) throws IOException {
assert writer instanceof ShipperListener;
long bytesWrittenProgressForCloseCheck = 0;
long bytesWrittenProgressForLog = 0;
long bytesWrittenProgressForShippedCall = 0;
// Since scanner.next() can return 'false' but still be delivering data,
// we have to use a do/while loop.
List<Cell> cells = new ArrayList<>();
long closeCheckSizeLimit = HStore.getCloseCheckInterval();
long lastMillis = 0;
if (LOG.isDebugEnabled()) {
lastMillis = EnvironmentEdgeManager.currentTime();
}
String compactionName = ThroughputControlUtil.getNameForThrottling(store, "compaction");
long now = 0;
boolean hasMore;
ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build();
throughputController.start(compactionName);
KeyValueScanner kvs = (scanner instanceof KeyValueScanner) ? (KeyValueScanner) scanner : null;
long shippedCallSizeLimit = (long) numofFilesToCompact * this.store.getFamily().getBlocksize();
try {
do {
hasMore = scanner.next(cells, scannerContext);
if (LOG.isDebugEnabled()) {
now = EnvironmentEdgeManager.currentTime();
}
// output to writer:
Cell lastCleanCell = null;
long lastCleanCellSeqId = 0;
for (Cell c : cells) {
if (cleanSeqId && c.getSequenceId() <= smallestReadPoint) {
lastCleanCell = c;
lastCleanCellSeqId = c.getSequenceId();
CellUtil.setSequenceId(c, 0);
} else {
lastCleanCell = null;
lastCleanCellSeqId = 0;
}
writer.append(c);
int len = KeyValueUtil.length(c);
++progress.currentCompactedKVs;
progress.totalCompactedSize += len;
bytesWrittenProgressForShippedCall += len;
if (LOG.isDebugEnabled()) {
bytesWrittenProgressForLog += len;
}
throughputController.control(compactionName, len);
// check periodically to see if a system stop is requested
if (closeCheckSizeLimit > 0) {
bytesWrittenProgressForCloseCheck += len;
if (bytesWrittenProgressForCloseCheck > closeCheckSizeLimit) {
bytesWrittenProgressForCloseCheck = 0;
if (!store.areWritesEnabled()) {
progress.cancel();
return false;
}
}
}
if (kvs != null && bytesWrittenProgressForShippedCall > shippedCallSizeLimit) {
if (lastCleanCell != null) {
// HBASE-16931, set back sequence id to avoid affecting scan order unexpectedly.
// ShipperListener will do a clone of the last cells it refer, so need to set back
// sequence id before ShipperListener.beforeShipped
CellUtil.setSequenceId(lastCleanCell, lastCleanCellSeqId);
}
// Clone the cells that are in the writer so that they are freed of references,
// if they are holding any.
((ShipperListener) writer).beforeShipped();
// The SHARED block references, being read for compaction, will be kept in prevBlocks
// list(See HFileScannerImpl#prevBlocks). In case of scan flow, after each set of cells
// being returned to client, we will call shipped() which can clear this list. Here by
// we are doing the similar thing. In between the compaction (after every N cells
// written with collective size of 'shippedCallSizeLimit') we will call shipped which
// may clear prevBlocks list.
kvs.shipped();
bytesWrittenProgressForShippedCall = 0;
}
}
if (lastCleanCell != null) {
// HBASE-16931, set back sequence id to avoid affecting scan order unexpectedly
CellUtil.setSequenceId(lastCleanCell, lastCleanCellSeqId);
}
// logging at DEBUG level
if (LOG.isDebugEnabled()) {
if ((now - lastMillis) >= COMPACTION_PROGRESS_LOG_INTERVAL) {
LOG.debug("Compaction progress: " + compactionName + " " + progress + String.format(", rate=%.2f kB/sec", (bytesWrittenProgressForLog / 1024.0) / ((now - lastMillis) / 1000.0)) + ", throughputController is " + throughputController);
lastMillis = now;
bytesWrittenProgressForLog = 0;
}
}
cells.clear();
} while (hasMore);
} catch (InterruptedException e) {
progress.cancel();
throw new InterruptedIOException("Interrupted while control throughput of compacting " + compactionName);
} finally {
throughputController.finish(compactionName);
}
progress.complete();
return true;
}
use of org.apache.hadoop.hbase.regionserver.ScannerContext in project phoenix by apache.
the class RegionScannerFactory method getWrappedScanner.
/**
* Return wrapped scanner that catches unexpected exceptions (i.e. Phoenix bugs) and
* re-throws as DoNotRetryIOException to prevent needless retrying hanging the query
* for 30 seconds. Unfortunately, until HBASE-7481 gets fixed, there's no way to do
* the same from a custom filter.
* @param arrayKVRefs
* @param arrayFuncRefs
* @param offset starting position in the rowkey.
* @param scan
* @param tupleProjector
* @param dataRegion
* @param indexMaintainer
* @param tx current transaction
* @param viewConstants
*/
public RegionScanner getWrappedScanner(final RegionCoprocessorEnvironment env, final RegionScanner s, final Set<KeyValueColumnExpression> arrayKVRefs, final Expression[] arrayFuncRefs, final int offset, final Scan scan, final ColumnReference[] dataColumns, final TupleProjector tupleProjector, final Region dataRegion, final IndexMaintainer indexMaintainer, Transaction tx, final byte[][] viewConstants, final KeyValueSchema kvSchema, final ValueBitSet kvSchemaBitSet, final TupleProjector projector, final ImmutableBytesWritable ptr, final boolean useQualifierAsListIndex) {
return new RegionScanner() {
private boolean hasReferences = checkForReferenceFiles();
private HRegionInfo regionInfo = env.getRegionInfo();
private byte[] actualStartKey = getActualStartKey();
// If there are any reference files after local index region merge some cases we might
// get the records less than scan start row key. This will happen when we replace the
// actual region start key with merge region start key. This method gives whether are
// there any reference files in the region or not.
private boolean checkForReferenceFiles() {
if (!ScanUtil.isLocalIndex(scan))
return false;
for (byte[] family : scan.getFamilies()) {
if (getRegion().getStore(family).hasReferences()) {
return true;
}
}
return false;
}
// Get the actual scan start row of local index. This will be used to compare the row
// key of the results less than scan start row when there are references.
public byte[] getActualStartKey() {
return ScanUtil.isLocalIndex(scan) ? ScanUtil.getActualStartRow(scan, regionInfo) : null;
}
@Override
public boolean next(List<Cell> results) throws IOException {
try {
return s.next(results);
} catch (Throwable t) {
ServerUtil.throwIOException(getRegion().getRegionInfo().getRegionNameAsString(), t);
// impossible
return false;
}
}
@Override
public boolean next(List<Cell> result, ScannerContext scannerContext) throws IOException {
try {
return s.next(result, scannerContext);
} catch (Throwable t) {
ServerUtil.throwIOException(getRegion().getRegionInfo().getRegionNameAsString(), t);
// impossible
return false;
}
}
@Override
public void close() throws IOException {
s.close();
}
@Override
public HRegionInfo getRegionInfo() {
return s.getRegionInfo();
}
@Override
public boolean isFilterDone() throws IOException {
return s.isFilterDone();
}
@Override
public boolean reseek(byte[] row) throws IOException {
return s.reseek(row);
}
@Override
public long getMvccReadPoint() {
return s.getMvccReadPoint();
}
@Override
public boolean nextRaw(List<Cell> result) throws IOException {
try {
boolean next = s.nextRaw(result);
Cell arrayElementCell = null;
if (result.size() == 0) {
return next;
}
if (arrayFuncRefs != null && arrayFuncRefs.length > 0 && arrayKVRefs.size() > 0) {
int arrayElementCellPosition = replaceArrayIndexElement(arrayKVRefs, arrayFuncRefs, result);
arrayElementCell = result.get(arrayElementCellPosition);
}
if (ScanUtil.isLocalIndex(scan) && !ScanUtil.isAnalyzeTable(scan)) {
if (hasReferences && actualStartKey != null) {
next = scanTillScanStartRow(s, arrayKVRefs, arrayFuncRefs, result, null, arrayElementCell);
if (result.isEmpty()) {
return next;
}
}
/* In the following, c is only used when data region is null.
dataRegion will never be null in case of non-coprocessor call,
therefore no need to refactor
*/
IndexUtil.wrapResultUsingOffset(env, result, offset, dataColumns, tupleProjector, dataRegion, indexMaintainer, viewConstants, ptr);
}
if (projector != null) {
Tuple toProject = useQualifierAsListIndex ? new PositionBasedResultTuple(result) : new ResultTuple(Result.create(result));
Tuple tuple = projector.projectResults(toProject, useNewValueColumnQualifier);
result.clear();
result.add(tuple.getValue(0));
if (arrayElementCell != null) {
result.add(arrayElementCell);
}
}
// There is a scanattribute set to retrieve the specific array element
return next;
} catch (Throwable t) {
ServerUtil.throwIOException(getRegion().getRegionInfo().getRegionNameAsString(), t);
// impossible
return false;
}
}
@Override
public boolean nextRaw(List<Cell> result, ScannerContext scannerContext) throws IOException {
try {
boolean next = s.nextRaw(result, scannerContext);
Cell arrayElementCell = null;
if (result.size() == 0) {
return next;
}
if (arrayFuncRefs != null && arrayFuncRefs.length > 0 && arrayKVRefs.size() > 0) {
int arrayElementCellPosition = replaceArrayIndexElement(arrayKVRefs, arrayFuncRefs, result);
arrayElementCell = result.get(arrayElementCellPosition);
}
if ((offset > 0 || ScanUtil.isLocalIndex(scan)) && !ScanUtil.isAnalyzeTable(scan)) {
if (hasReferences && actualStartKey != null) {
next = scanTillScanStartRow(s, arrayKVRefs, arrayFuncRefs, result, scannerContext, arrayElementCell);
if (result.isEmpty()) {
return next;
}
}
/* In the following, c is only used when data region is null.
dataRegion will never be null in case of non-coprocessor call,
therefore no need to refactor
*/
IndexUtil.wrapResultUsingOffset(env, result, offset, dataColumns, tupleProjector, dataRegion, indexMaintainer, viewConstants, ptr);
}
if (projector != null) {
Tuple toProject = useQualifierAsListIndex ? new PositionBasedMultiKeyValueTuple(result) : new ResultTuple(Result.create(result));
Tuple tuple = projector.projectResults(toProject, useNewValueColumnQualifier);
result.clear();
result.add(tuple.getValue(0));
if (arrayElementCell != null)
result.add(arrayElementCell);
}
// There is a scanattribute set to retrieve the specific array element
return next;
} catch (Throwable t) {
ServerUtil.throwIOException(getRegion().getRegionInfo().getRegionNameAsString(), t);
// impossible
return false;
}
}
/**
* When there is a merge in progress while scanning local indexes we might get the key values less than scan start row.
* In that case we need to scan until get the row key more or equal to scan start key.
* TODO try to fix this case in LocalIndexStoreFileScanner when there is a merge.
*/
private boolean scanTillScanStartRow(final RegionScanner s, final Set<KeyValueColumnExpression> arrayKVRefs, final Expression[] arrayFuncRefs, List<Cell> result, ScannerContext scannerContext, Cell arrayElementCell) throws IOException {
boolean next = true;
Cell firstCell = result.get(0);
while (Bytes.compareTo(firstCell.getRowArray(), firstCell.getRowOffset(), firstCell.getRowLength(), actualStartKey, 0, actualStartKey.length) < 0) {
result.clear();
if (scannerContext == null) {
next = s.nextRaw(result);
} else {
next = s.nextRaw(result, scannerContext);
}
if (result.isEmpty()) {
return next;
}
if (arrayFuncRefs != null && arrayFuncRefs.length > 0 && arrayKVRefs.size() > 0) {
int arrayElementCellPosition = replaceArrayIndexElement(arrayKVRefs, arrayFuncRefs, result);
arrayElementCell = result.get(arrayElementCellPosition);
}
firstCell = result.get(0);
}
return next;
}
private int replaceArrayIndexElement(final Set<KeyValueColumnExpression> arrayKVRefs, final Expression[] arrayFuncRefs, List<Cell> result) {
// make a copy of the results array here, as we're modifying it below
MultiKeyValueTuple tuple = new MultiKeyValueTuple(ImmutableList.copyOf(result));
// The size of both the arrays would be same?
// Using KeyValueSchema to set and retrieve the value
// collect the first kv to get the row
Cell rowKv = result.get(0);
for (KeyValueColumnExpression kvExp : arrayKVRefs) {
if (kvExp.evaluate(tuple, ptr)) {
ListIterator<Cell> itr = result.listIterator();
while (itr.hasNext()) {
Cell kv = itr.next();
if (Bytes.equals(kvExp.getColumnFamily(), 0, kvExp.getColumnFamily().length, kv.getFamilyArray(), kv.getFamilyOffset(), kv.getFamilyLength()) && Bytes.equals(kvExp.getColumnQualifier(), 0, kvExp.getColumnQualifier().length, kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength())) {
// remove the kv that has the full array values.
itr.remove();
break;
}
}
}
}
byte[] value = kvSchema.toBytes(tuple, arrayFuncRefs, kvSchemaBitSet, ptr);
// Add a dummy kv with the exact value of the array index
result.add(new KeyValue(rowKv.getRowArray(), rowKv.getRowOffset(), rowKv.getRowLength(), QueryConstants.ARRAY_VALUE_COLUMN_FAMILY, 0, QueryConstants.ARRAY_VALUE_COLUMN_FAMILY.length, QueryConstants.ARRAY_VALUE_COLUMN_QUALIFIER, 0, QueryConstants.ARRAY_VALUE_COLUMN_QUALIFIER.length, HConstants.LATEST_TIMESTAMP, KeyValue.Type.codeToType(rowKv.getTypeByte()), value, 0, value.length));
return result.size() - 1;
}
@Override
public long getMaxResultSize() {
return s.getMaxResultSize();
}
@Override
public int getBatch() {
return s.getBatch();
}
};
}
use of org.apache.hadoop.hbase.regionserver.ScannerContext in project hbase by apache.
the class AccessController method checkCoveringPermission.
/**
* Determine if cell ACLs covered by the operation grant access. This is expensive.
* @return false if cell ACLs failed to grant access, true otherwise
* @throws IOException
*/
private boolean checkCoveringPermission(User user, OpType request, RegionCoprocessorEnvironment e, byte[] row, Map<byte[], ? extends Collection<?>> familyMap, long opTs, Action... actions) throws IOException {
if (!cellFeaturesEnabled) {
return false;
}
long cellGrants = 0;
long latestCellTs = 0;
Get get = new Get(row);
// Only in case of Put/Delete op, consider TS within cell (if set for individual cells).
// When every cell, within a Mutation, can be linked with diff TS we can not rely on only one
// version. We have to get every cell version and check its TS against the TS asked for in
// Mutation and skip those Cells which is outside this Mutation TS.In case of Put, we have to
// consider only one such passing cell. In case of Delete we have to consider all the cell
// versions under this passing version. When Delete Mutation contains columns which are a
// version delete just consider only one version for those column cells.
boolean considerCellTs = (request == OpType.PUT || request == OpType.DELETE);
if (considerCellTs) {
get.setMaxVersions();
} else {
get.setMaxVersions(1);
}
boolean diffCellTsFromOpTs = false;
for (Map.Entry<byte[], ? extends Collection<?>> entry : familyMap.entrySet()) {
byte[] col = entry.getKey();
// maps so we would not need to do this
if (entry.getValue() instanceof Set) {
Set<byte[]> set = (Set<byte[]>) entry.getValue();
if (set == null || set.isEmpty()) {
get.addFamily(col);
} else {
for (byte[] qual : set) {
get.addColumn(col, qual);
}
}
} else if (entry.getValue() instanceof List) {
List<Cell> list = (List<Cell>) entry.getValue();
if (list == null || list.isEmpty()) {
get.addFamily(col);
} else {
// In case of family delete, a Cell will be added into the list with Qualifier as null.
for (Cell cell : list) {
if (cell.getQualifierLength() == 0 && (cell.getTypeByte() == Type.DeleteFamily.getCode() || cell.getTypeByte() == Type.DeleteFamilyVersion.getCode())) {
get.addFamily(col);
} else {
get.addColumn(col, CellUtil.cloneQualifier(cell));
}
if (considerCellTs) {
long cellTs = cell.getTimestamp();
latestCellTs = Math.max(latestCellTs, cellTs);
diffCellTsFromOpTs = diffCellTsFromOpTs || (opTs != cellTs);
}
}
}
} else if (entry.getValue() == null) {
get.addFamily(col);
} else {
throw new RuntimeException("Unhandled collection type " + entry.getValue().getClass().getName());
}
}
// We want to avoid looking into the future. So, if the cells of the
// operation specify a timestamp, or the operation itself specifies a
// timestamp, then we use the maximum ts found. Otherwise, we bound
// the Get to the current server time. We add 1 to the timerange since
// the upper bound of a timerange is exclusive yet we need to examine
// any cells found there inclusively.
long latestTs = Math.max(opTs, latestCellTs);
if (latestTs == 0 || latestTs == HConstants.LATEST_TIMESTAMP) {
latestTs = EnvironmentEdgeManager.currentTime();
}
get.setTimeRange(0, latestTs + 1);
// case with Put. There no need to get all versions but get latest version only.
if (!diffCellTsFromOpTs && request == OpType.PUT) {
get.setMaxVersions(1);
}
if (LOG.isTraceEnabled()) {
LOG.trace("Scanning for cells with " + get);
}
// This Map is identical to familyMap. The key is a BR rather than byte[].
// It will be easy to do gets over this new Map as we can create get keys over the Cell cf by
// new SimpleByteRange(cell.familyArray, cell.familyOffset, cell.familyLen)
Map<ByteRange, List<Cell>> familyMap1 = new HashMap<>();
for (Entry<byte[], ? extends Collection<?>> entry : familyMap.entrySet()) {
if (entry.getValue() instanceof List) {
familyMap1.put(new SimpleMutableByteRange(entry.getKey()), (List<Cell>) entry.getValue());
}
}
RegionScanner scanner = getRegion(e).getScanner(new Scan(get));
List<Cell> cells = Lists.newArrayList();
Cell prevCell = null;
ByteRange curFam = new SimpleMutableByteRange();
boolean curColAllVersions = (request == OpType.DELETE);
long curColCheckTs = opTs;
boolean foundColumn = false;
try {
boolean more = false;
ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(1).build();
do {
cells.clear();
// scan with limit as 1 to hold down memory use on wide rows
more = scanner.next(cells, scannerContext);
for (Cell cell : cells) {
if (LOG.isTraceEnabled()) {
LOG.trace("Found cell " + cell);
}
boolean colChange = prevCell == null || !CellUtil.matchingColumn(prevCell, cell);
if (colChange)
foundColumn = false;
prevCell = cell;
if (!curColAllVersions && foundColumn) {
continue;
}
if (colChange && considerCellTs) {
curFam.set(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
List<Cell> cols = familyMap1.get(curFam);
for (Cell col : cols) {
// why the below (col.getQualifierLength() == 0) check.
if ((col.getQualifierLength() == 0 && request == OpType.DELETE) || CellUtil.matchingQualifier(cell, col)) {
byte type = col.getTypeByte();
if (considerCellTs) {
curColCheckTs = col.getTimestamp();
}
// For a Delete op we pass allVersions as true. When a Delete Mutation contains
// a version delete for a column no need to check all the covering cells within
// that column. Check all versions when Type is DeleteColumn or DeleteFamily
// One version delete types are Delete/DeleteFamilyVersion
curColAllVersions = (KeyValue.Type.DeleteColumn.getCode() == type) || (KeyValue.Type.DeleteFamily.getCode() == type);
break;
}
}
}
if (cell.getTimestamp() > curColCheckTs) {
// Just ignore this cell. This is not a covering cell.
continue;
}
foundColumn = true;
for (Action action : actions) {
// Are there permissions for this user for the cell?
if (!authManager.authorize(user, getTableName(e), cell, action)) {
// We can stop if the cell ACL denies access
return false;
}
}
cellGrants++;
}
} while (more);
} catch (AccessDeniedException ex) {
throw ex;
} catch (IOException ex) {
LOG.error("Exception while getting cells to calculate covering permission", ex);
} finally {
scanner.close();
}
// after no table or CF grants are found.
return cellGrants > 0;
}
use of org.apache.hadoop.hbase.regionserver.ScannerContext in project hbase by apache.
the class PartitionedMobCompactor method compactMobFilesInBatch.
/**
* Compacts a partition of selected small mob files and all the del files in a batch.
* @param request The compaction request.
* @param partition A compaction partition.
* @param connection To use for transport
* @param table The current table.
* @param filesToCompact The files to be compacted.
* @param batch The number of mob files to be compacted in a batch.
* @param bulkloadPathOfPartition The directory where the bulkload column of the current
* partition is saved.
* @param bulkloadColumnPath The directory where the bulkload files of current partition
* are saved.
* @param newFiles The paths of new mob files after compactions.
* @throws IOException if IO failure is encountered
*/
private void compactMobFilesInBatch(PartitionedMobCompactionRequest request, CompactionPartition partition, Connection connection, Table table, List<StoreFile> filesToCompact, int batch, Path bulkloadPathOfPartition, Path bulkloadColumnPath, List<Path> newFiles) throws IOException {
// open scanner to the selected mob files and del files.
StoreScanner scanner = createScanner(filesToCompact, ScanType.COMPACT_DROP_DELETES);
// the mob files to be compacted, not include the del files.
List<StoreFile> mobFilesToCompact = filesToCompact.subList(0, batch);
// Pair(maxSeqId, cellsCount)
Pair<Long, Long> fileInfo = getFileInfo(mobFilesToCompact);
// open writers for the mob files and new ref store files.
StoreFileWriter writer = null;
StoreFileWriter refFileWriter = null;
Path filePath = null;
long mobCells = 0;
boolean cleanupTmpMobFile = false;
boolean cleanupBulkloadDirOfPartition = false;
boolean cleanupCommittedMobFile = false;
boolean closeReaders = true;
try {
try {
writer = MobUtils.createWriter(conf, fs, column, partition.getPartitionId().getLatestDate(), tempPath, Long.MAX_VALUE, column.getCompactionCompressionType(), partition.getPartitionId().getStartKey(), compactionCacheConfig, cryptoContext, true);
cleanupTmpMobFile = true;
filePath = writer.getPath();
byte[] fileName = Bytes.toBytes(filePath.getName());
// create a temp file and open a writer for it in the bulkloadPath
refFileWriter = MobUtils.createRefFileWriter(conf, fs, column, bulkloadColumnPath, fileInfo.getSecond().longValue(), compactionCacheConfig, cryptoContext, true);
cleanupBulkloadDirOfPartition = true;
List<Cell> cells = new ArrayList<>();
boolean hasMore;
ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build();
do {
hasMore = scanner.next(cells, scannerContext);
for (Cell cell : cells) {
// write the mob cell to the mob file.
writer.append(cell);
// write the new reference cell to the store file.
Cell reference = MobUtils.createMobRefCell(cell, fileName, this.refCellTags);
refFileWriter.append(reference);
mobCells++;
}
cells.clear();
} while (hasMore);
} finally {
// close the scanner.
scanner.close();
if (cleanupTmpMobFile) {
// append metadata to the mob file, and close the mob file writer.
closeMobFileWriter(writer, fileInfo.getFirst(), mobCells);
}
if (cleanupBulkloadDirOfPartition) {
// append metadata and bulkload info to the ref mob file, and close the writer.
closeRefFileWriter(refFileWriter, fileInfo.getFirst(), request.selectionTime);
}
}
if (mobCells > 0) {
// commit mob file
MobUtils.commitFile(conf, fs, filePath, mobFamilyDir, compactionCacheConfig);
cleanupTmpMobFile = false;
cleanupCommittedMobFile = true;
// bulkload the ref file
bulkloadRefFile(connection, table, bulkloadPathOfPartition, filePath.getName());
cleanupCommittedMobFile = false;
newFiles.add(new Path(mobFamilyDir, filePath.getName()));
}
// archive the old mob files, do not archive the del files.
try {
closeStoreFileReaders(mobFilesToCompact);
closeReaders = false;
MobUtils.removeMobFiles(conf, fs, tableName, mobTableDir, column.getName(), mobFilesToCompact);
} catch (IOException e) {
LOG.error("Failed to archive the files " + mobFilesToCompact, e);
}
} finally {
if (closeReaders) {
closeStoreFileReaders(mobFilesToCompact);
}
if (cleanupTmpMobFile) {
deletePath(filePath);
}
if (cleanupBulkloadDirOfPartition) {
// delete the bulkload files in bulkloadPath
deletePath(bulkloadPathOfPartition);
}
if (cleanupCommittedMobFile) {
deletePath(new Path(mobFamilyDir, filePath.getName()));
}
}
}
use of org.apache.hadoop.hbase.regionserver.ScannerContext in project hbase by apache.
the class DefaultMobStoreCompactor method performCompaction.
/**
* Performs compaction on a column family with the mob flag enabled.
* This is for when the mob threshold size has changed or if the mob
* column family mode has been toggled via an alter table statement.
* Compacts the files by the following rules.
* 1. If the Put cell has a mob reference tag, the cell's value is the path of the mob file.
* <ol>
* <li>
* If the value size of a cell is larger than the threshold, this cell is regarded as a mob,
* directly copy the (with mob tag) cell into the new store file.
* </li>
* <li>
* Otherwise, retrieve the mob cell from the mob file, and writes a copy of the cell into
* the new store file.
* </li>
* </ol>
* 2. If the Put cell doesn't have a reference tag.
* <ol>
* <li>
* If the value size of a cell is larger than the threshold, this cell is regarded as a mob,
* write this cell to a mob file, and write the path of this mob file to the store file.
* </li>
* <li>
* Otherwise, directly write this cell into the store file.
* </li>
* </ol>
* 3. Decide how to write a Delete cell.
* <ol>
* <li>
* If a Delete cell does not have a mob reference tag which means this delete marker have not
* been written to the mob del file, write this cell to the mob del file, and write this cell
* with a ref tag to a store file.
* </li>
* <li>
* Otherwise, directly write it to a store file.
* </li>
* </ol>
* After the major compaction on the normal hfiles, we have a guarantee that we have purged all
* deleted or old version mob refs, and the delete markers are written to a del file with the
* suffix _del. Because of this, it is safe to use the del file in the mob compaction.
* The mob compaction doesn't take place in the normal hfiles, it occurs directly in the
* mob files. When the small mob files are merged into bigger ones, the del file is added into
* the scanner to filter the deleted cells.
* @param fd File details
* @param scanner Where to read from.
* @param writer Where to write to.
* @param smallestReadPoint Smallest read point.
* @param cleanSeqId When true, remove seqId(used to be mvcc) value which is <= smallestReadPoint
* @param throughputController The compaction throughput controller.
* @param major Is a major compaction.
* @param numofFilesToCompact the number of files to compact
* @return Whether compaction ended; false if it was interrupted for any reason.
*/
@Override
protected boolean performCompaction(FileDetails fd, InternalScanner scanner, CellSink writer, long smallestReadPoint, boolean cleanSeqId, ThroughputController throughputController, boolean major, int numofFilesToCompact) throws IOException {
long bytesWrittenProgressForCloseCheck = 0;
long bytesWrittenProgressForLog = 0;
long bytesWrittenProgressForShippedCall = 0;
// Since scanner.next() can return 'false' but still be delivering data,
// we have to use a do/while loop.
List<Cell> cells = new ArrayList<>();
// Limit to "hbase.hstore.compaction.kv.max" (default 10) to avoid OOME
int closeCheckSizeLimit = HStore.getCloseCheckInterval();
long lastMillis = 0;
if (LOG.isDebugEnabled()) {
lastMillis = EnvironmentEdgeManager.currentTime();
}
String compactionName = ThroughputControlUtil.getNameForThrottling(store, "compaction");
long now = 0;
boolean hasMore;
Path path = MobUtils.getMobFamilyPath(conf, store.getTableName(), store.getColumnFamilyName());
byte[] fileName = null;
StoreFileWriter mobFileWriter = null, delFileWriter = null;
long mobCells = 0, deleteMarkersCount = 0;
long cellsCountCompactedToMob = 0, cellsCountCompactedFromMob = 0;
long cellsSizeCompactedToMob = 0, cellsSizeCompactedFromMob = 0;
boolean finished = false;
ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build();
throughputController.start(compactionName);
KeyValueScanner kvs = (scanner instanceof KeyValueScanner) ? (KeyValueScanner) scanner : null;
long shippedCallSizeLimit = (long) numofFilesToCompact * this.store.getFamily().getBlocksize();
try {
try {
// If the mob file writer could not be created, directly write the cell to the store file.
mobFileWriter = mobStore.createWriterInTmp(new Date(fd.latestPutTs), fd.maxKeyCount, compactionCompression, store.getRegionInfo().getStartKey(), true);
fileName = Bytes.toBytes(mobFileWriter.getPath().getName());
} catch (IOException e) {
LOG.warn("Failed to create mob writer, " + "we will continue the compaction by writing MOB cells directly in store files", e);
}
if (major) {
try {
delFileWriter = mobStore.createDelFileWriterInTmp(new Date(fd.latestPutTs), fd.maxKeyCount, compactionCompression, store.getRegionInfo().getStartKey());
} catch (IOException e) {
LOG.warn("Failed to create del writer, " + "we will continue the compaction by writing delete markers directly in store files", e);
}
}
do {
hasMore = scanner.next(cells, scannerContext);
if (LOG.isDebugEnabled()) {
now = EnvironmentEdgeManager.currentTime();
}
for (Cell c : cells) {
if (major && CellUtil.isDelete(c)) {
if (MobUtils.isMobReferenceCell(c) || delFileWriter == null) {
// Directly write it to a store file
writer.append(c);
} else {
// Add a ref tag to this cell and write it to a store file.
writer.append(MobUtils.createMobRefDeleteMarker(c));
// Write the cell to a del file
delFileWriter.append(c);
deleteMarkersCount++;
}
} else if (mobFileWriter == null || c.getTypeByte() != KeyValue.Type.Put.getCode()) {
// If the mob file writer is null or the kv type is not put, directly write the cell
// to the store file.
writer.append(c);
} else if (MobUtils.isMobReferenceCell(c)) {
if (MobUtils.hasValidMobRefCellValue(c)) {
int size = MobUtils.getMobValueLength(c);
if (size > mobSizeThreshold) {
// If the value size is larger than the threshold, it's regarded as a mob. Since
// its value is already in the mob file, directly write this cell to the store file
writer.append(c);
} else {
// If the value is not larger than the threshold, it's not regarded a mob. Retrieve
// the mob cell from the mob file, and write it back to the store file.
Cell mobCell = mobStore.resolve(c, false);
if (mobCell.getValueLength() != 0) {
// put the mob data back to the store file
CellUtil.setSequenceId(mobCell, c.getSequenceId());
writer.append(mobCell);
cellsCountCompactedFromMob++;
cellsSizeCompactedFromMob += mobCell.getValueLength();
} else {
// If the value of a file is empty, there might be issues when retrieving,
// directly write the cell to the store file, and leave it to be handled by the
// next compaction.
writer.append(c);
}
}
} else {
LOG.warn("The value format of the KeyValue " + c + " is wrong, its length is less than " + Bytes.SIZEOF_INT);
writer.append(c);
}
} else if (c.getValueLength() <= mobSizeThreshold) {
//If value size of a cell is not larger than the threshold, directly write to store file
writer.append(c);
} else {
// If the value size of a cell is larger than the threshold, it's regarded as a mob,
// write this cell to a mob file, and write the path to the store file.
mobCells++;
// append the original keyValue in the mob file.
mobFileWriter.append(c);
Cell reference = MobUtils.createMobRefCell(c, fileName, this.mobStore.getRefCellTags());
// write the cell whose value is the path of a mob file to the store file.
writer.append(reference);
cellsCountCompactedToMob++;
cellsSizeCompactedToMob += c.getValueLength();
}
int len = KeyValueUtil.length(c);
++progress.currentCompactedKVs;
progress.totalCompactedSize += len;
bytesWrittenProgressForShippedCall += len;
if (LOG.isDebugEnabled()) {
bytesWrittenProgressForLog += len;
}
throughputController.control(compactionName, len);
// check periodically to see if a system stop is requested
if (closeCheckSizeLimit > 0) {
bytesWrittenProgressForCloseCheck += len;
if (bytesWrittenProgressForCloseCheck > closeCheckSizeLimit) {
bytesWrittenProgressForCloseCheck = 0;
if (!store.areWritesEnabled()) {
progress.cancel();
return false;
}
}
}
if (kvs != null && bytesWrittenProgressForShippedCall > shippedCallSizeLimit) {
((ShipperListener) writer).beforeShipped();
kvs.shipped();
bytesWrittenProgressForShippedCall = 0;
}
}
// logging at DEBUG level
if (LOG.isDebugEnabled()) {
if ((now - lastMillis) >= COMPACTION_PROGRESS_LOG_INTERVAL) {
LOG.debug("Compaction progress: " + compactionName + " " + progress + String.format(", rate=%.2f kB/sec", (bytesWrittenProgressForLog / 1024.0) / ((now - lastMillis) / 1000.0)) + ", throughputController is " + throughputController);
lastMillis = now;
bytesWrittenProgressForLog = 0;
}
}
cells.clear();
} while (hasMore);
finished = true;
} catch (InterruptedException e) {
progress.cancel();
throw new InterruptedIOException("Interrupted while control throughput of compacting " + compactionName);
} finally {
throughputController.finish(compactionName);
if (!finished && mobFileWriter != null) {
abortWriter(mobFileWriter);
}
if (!finished && delFileWriter != null) {
abortWriter(delFileWriter);
}
}
if (delFileWriter != null) {
if (deleteMarkersCount > 0) {
// If the del file is not empty, commit it.
// If the commit fails, the compaction is re-performed again.
delFileWriter.appendMetadata(fd.maxSeqId, major, deleteMarkersCount);
delFileWriter.close();
mobStore.commitFile(delFileWriter.getPath(), path);
} else {
// If the del file is empty, delete it instead of committing.
abortWriter(delFileWriter);
}
}
if (mobFileWriter != null) {
if (mobCells > 0) {
// If the mob file is not empty, commit it.
mobFileWriter.appendMetadata(fd.maxSeqId, major, mobCells);
mobFileWriter.close();
mobStore.commitFile(mobFileWriter.getPath(), path);
} else {
// If the mob file is empty, delete it instead of committing.
abortWriter(mobFileWriter);
}
}
mobStore.updateCellsCountCompactedFromMob(cellsCountCompactedFromMob);
mobStore.updateCellsCountCompactedToMob(cellsCountCompactedToMob);
mobStore.updateCellsSizeCompactedFromMob(cellsSizeCompactedFromMob);
mobStore.updateCellsSizeCompactedToMob(cellsSizeCompactedToMob);
progress.complete();
return true;
}
Aggregations