use of org.apache.hadoop.hbase.regionserver.StoreFile in project hbase by apache.
the class PartitionedMobCompactor method compactMobFilesInBatch.
/**
* Compacts a partition of selected small mob files and all the del files in a batch.
* @param request The compaction request.
* @param partition A compaction partition.
* @param connection To use for transport
* @param table The current table.
* @param filesToCompact The files to be compacted.
* @param batch The number of mob files to be compacted in a batch.
* @param bulkloadPathOfPartition The directory where the bulkload column of the current
* partition is saved.
* @param bulkloadColumnPath The directory where the bulkload files of current partition
* are saved.
* @param newFiles The paths of new mob files after compactions.
* @throws IOException if IO failure is encountered
*/
private void compactMobFilesInBatch(PartitionedMobCompactionRequest request, CompactionPartition partition, Connection connection, Table table, List<StoreFile> filesToCompact, int batch, Path bulkloadPathOfPartition, Path bulkloadColumnPath, List<Path> newFiles) throws IOException {
// open scanner to the selected mob files and del files.
StoreScanner scanner = createScanner(filesToCompact, ScanType.COMPACT_DROP_DELETES);
// the mob files to be compacted, not include the del files.
List<StoreFile> mobFilesToCompact = filesToCompact.subList(0, batch);
// Pair(maxSeqId, cellsCount)
Pair<Long, Long> fileInfo = getFileInfo(mobFilesToCompact);
// open writers for the mob files and new ref store files.
StoreFileWriter writer = null;
StoreFileWriter refFileWriter = null;
Path filePath = null;
long mobCells = 0;
boolean cleanupTmpMobFile = false;
boolean cleanupBulkloadDirOfPartition = false;
boolean cleanupCommittedMobFile = false;
boolean closeReaders = true;
try {
try {
writer = MobUtils.createWriter(conf, fs, column, partition.getPartitionId().getLatestDate(), tempPath, Long.MAX_VALUE, column.getCompactionCompressionType(), partition.getPartitionId().getStartKey(), compactionCacheConfig, cryptoContext, true);
cleanupTmpMobFile = true;
filePath = writer.getPath();
byte[] fileName = Bytes.toBytes(filePath.getName());
// create a temp file and open a writer for it in the bulkloadPath
refFileWriter = MobUtils.createRefFileWriter(conf, fs, column, bulkloadColumnPath, fileInfo.getSecond().longValue(), compactionCacheConfig, cryptoContext, true);
cleanupBulkloadDirOfPartition = true;
List<Cell> cells = new ArrayList<>();
boolean hasMore;
ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build();
do {
hasMore = scanner.next(cells, scannerContext);
for (Cell cell : cells) {
// write the mob cell to the mob file.
writer.append(cell);
// write the new reference cell to the store file.
Cell reference = MobUtils.createMobRefCell(cell, fileName, this.refCellTags);
refFileWriter.append(reference);
mobCells++;
}
cells.clear();
} while (hasMore);
} finally {
// close the scanner.
scanner.close();
if (cleanupTmpMobFile) {
// append metadata to the mob file, and close the mob file writer.
closeMobFileWriter(writer, fileInfo.getFirst(), mobCells);
}
if (cleanupBulkloadDirOfPartition) {
// append metadata and bulkload info to the ref mob file, and close the writer.
closeRefFileWriter(refFileWriter, fileInfo.getFirst(), request.selectionTime);
}
}
if (mobCells > 0) {
// commit mob file
MobUtils.commitFile(conf, fs, filePath, mobFamilyDir, compactionCacheConfig);
cleanupTmpMobFile = false;
cleanupCommittedMobFile = true;
// bulkload the ref file
bulkloadRefFile(connection, table, bulkloadPathOfPartition, filePath.getName());
cleanupCommittedMobFile = false;
newFiles.add(new Path(mobFamilyDir, filePath.getName()));
}
// archive the old mob files, do not archive the del files.
try {
closeStoreFileReaders(mobFilesToCompact);
closeReaders = false;
MobUtils.removeMobFiles(conf, fs, tableName, mobTableDir, column.getName(), mobFilesToCompact);
} catch (IOException e) {
LOG.error("Failed to archive the files " + mobFilesToCompact, e);
}
} finally {
if (closeReaders) {
closeStoreFileReaders(mobFilesToCompact);
}
if (cleanupTmpMobFile) {
deletePath(filePath);
}
if (cleanupBulkloadDirOfPartition) {
// delete the bulkload files in bulkloadPath
deletePath(bulkloadPathOfPartition);
}
if (cleanupCommittedMobFile) {
deletePath(new Path(mobFamilyDir, filePath.getName()));
}
}
}
use of org.apache.hadoop.hbase.regionserver.StoreFile in project hbase by apache.
the class PartitionedMobCompactor method compactMobFilePartition.
/**
* Compacts a partition of selected small mob files and all the del files.
* @param request The compaction request.
* @param partition A compaction partition.
* @param delFiles The del files.
* @param connection The connection to use.
* @param table The current table.
* @return The paths of new mob files after compactions.
* @throws IOException if IO failure is encountered
*/
private List<Path> compactMobFilePartition(PartitionedMobCompactionRequest request, CompactionPartition partition, List<StoreFile> delFiles, Connection connection, Table table) throws IOException {
if (MobUtils.isMobFileExpired(column, EnvironmentEdgeManager.currentTime(), partition.getPartitionId().getDate())) {
// return an empty list.
return Collections.emptyList();
}
List<Path> newFiles = new ArrayList<>();
List<FileStatus> files = partition.listFiles();
int offset = 0;
Path bulkloadPathOfPartition = new Path(bulkloadPath, partition.getPartitionId().toString());
Path bulkloadColumnPath = new Path(bulkloadPathOfPartition, column.getNameAsString());
while (offset < files.size()) {
int batch = compactionBatchSize;
if (files.size() - offset < compactionBatchSize) {
batch = files.size() - offset;
}
if (batch == 1 && delFiles.isEmpty()) {
// only one file left and no del files, do not compact it,
// and directly add it to the new files.
newFiles.add(files.get(offset).getPath());
offset++;
continue;
}
// clean the bulkload directory to avoid loading old files.
fs.delete(bulkloadPathOfPartition, true);
// add the selected mob files and del files into filesToCompact
List<StoreFile> filesToCompact = new ArrayList<>();
for (int i = offset; i < batch + offset; i++) {
StoreFile sf = new StoreFile(fs, files.get(i).getPath(), conf, compactionCacheConfig, BloomType.NONE);
filesToCompact.add(sf);
}
filesToCompact.addAll(delFiles);
// compact the mob files in a batch.
compactMobFilesInBatch(request, partition, connection, table, filesToCompact, batch, bulkloadPathOfPartition, bulkloadColumnPath, newFiles);
// move to the next batch.
offset += batch;
}
LOG.info("Compaction is finished. The number of mob files is changed from " + files.size() + " to " + newFiles.size());
return newFiles;
}
use of org.apache.hadoop.hbase.regionserver.StoreFile in project hbase by apache.
the class MobFile method readCell.
/**
* Reads a cell from the mob file.
* @param search The cell need to be searched in the mob file.
* @param cacheMobBlocks Should this scanner cache blocks.
* @param readPt the read point.
* @return The cell in the mob file.
* @throws IOException
*/
public Cell readCell(Cell search, boolean cacheMobBlocks, long readPt) throws IOException {
Cell result = null;
StoreFileScanner scanner = null;
List<StoreFile> sfs = new ArrayList<>();
sfs.add(sf);
try {
List<StoreFileScanner> sfScanners = StoreFileScanner.getScannersForStoreFiles(sfs, cacheMobBlocks, true, false, false, readPt);
if (!sfScanners.isEmpty()) {
scanner = sfScanners.get(0);
if (scanner.seek(search)) {
result = scanner.peek();
}
}
} finally {
if (scanner != null) {
scanner.close();
}
}
return result;
}
use of org.apache.hadoop.hbase.regionserver.StoreFile in project hbase by apache.
the class SplitTableRegionProcedure method splitStoreFiles.
/**
* Create Split directory
* @param env MasterProcedureEnv
* @throws IOException
*/
private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env, final HRegionFileSystem regionFs) throws IOException {
final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
final Configuration conf = env.getMasterConfiguration();
// The following code sets up a thread pool executor with as many slots as
// there's files to split. It then fires up everything, waits for
// completion and finally checks for any exception
//
// Note: splitStoreFiles creates daughter region dirs under the parent splits dir
// Nothing to unroll here if failure -- re-run createSplitsDir will
// clean this up.
int nbFiles = 0;
for (String family : regionFs.getFamilies()) {
Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family);
if (storeFiles != null) {
nbFiles += storeFiles.size();
}
}
if (nbFiles == 0) {
// no file needs to be splitted.
return new Pair<>(0, 0);
}
// Default max #threads to use is the smaller of table's configured number of blocking store
// files or the available number of logical cores.
int defMaxThreads = Math.min(conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT), Runtime.getRuntime().availableProcessors());
// Max #threads is the smaller of the number of storefiles or the default max determined above.
int maxThreads = Math.min(conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, defMaxThreads), nbFiles);
LOG.info("Preparing to split " + nbFiles + " storefiles for region " + parentHRI + " using " + maxThreads + " threads");
ThreadPoolExecutor threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(maxThreads, Threads.getNamedThreadFactory("StoreFileSplitter-%1$d"));
List<Future<Pair<Path, Path>>> futures = new ArrayList<>(nbFiles);
// Split each store file.
final HTableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
for (String family : regionFs.getFamilies()) {
final HColumnDescriptor hcd = htd.getFamily(family.getBytes());
final Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family);
if (storeFiles != null && storeFiles.size() > 0) {
final CacheConfig cacheConf = new CacheConfig(conf, hcd);
for (StoreFileInfo storeFileInfo : storeFiles) {
StoreFileSplitter sfs = new StoreFileSplitter(regionFs, family.getBytes(), new StoreFile(mfs.getFileSystem(), storeFileInfo, conf, cacheConf, hcd.getBloomFilterType()));
futures.add(threadPool.submit(sfs));
}
}
}
// Shutdown the pool
threadPool.shutdown();
// Wait for all the tasks to finish
long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 30000);
try {
boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
if (stillRunning) {
threadPool.shutdownNow();
// wait for the thread to shutdown completely.
while (!threadPool.isTerminated()) {
Thread.sleep(50);
}
throw new IOException("Took too long to split the" + " files and create the references, aborting split");
}
} catch (InterruptedException e) {
throw (InterruptedIOException) new InterruptedIOException().initCause(e);
}
int daughterA = 0;
int daughterB = 0;
// Look for any exception
for (Future<Pair<Path, Path>> future : futures) {
try {
Pair<Path, Path> p = future.get();
daughterA += p.getFirst() != null ? 1 : 0;
daughterB += p.getSecond() != null ? 1 : 0;
} catch (InterruptedException e) {
throw (InterruptedIOException) new InterruptedIOException().initCause(e);
} catch (ExecutionException e) {
throw new IOException(e);
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Split storefiles for region " + parentHRI + " Daughter A: " + daughterA + " storefiles, Daughter B: " + daughterB + " storefiles.");
}
return new Pair<>(daughterA, daughterB);
}
use of org.apache.hadoop.hbase.regionserver.StoreFile in project hbase by apache.
the class PartitionedMobCompactor method getListOfDelFilesForPartition.
@VisibleForTesting
List<StoreFile> getListOfDelFilesForPartition(final CompactionPartition partition, final List<CompactionDelPartition> delPartitions) {
// Binary search for startKey and endKey
List<StoreFile> result = new ArrayList<>();
DelPartitionComparator comparator = new DelPartitionComparator(false);
CompactionDelPartitionId id = new CompactionDelPartitionId(null, partition.getStartKey());
CompactionDelPartition target = new CompactionDelPartition(id);
int start = Collections.binarySearch(delPartitions, target, comparator);
// Get the start index for partition
if (start < 0) {
// Calculate the insert point
start = (start + 1) * (-1);
if (start == delPartitions.size()) {
// no overlap
return result;
} else {
// Check another case which has no overlap
if (Bytes.compareTo(partition.getEndKey(), delPartitions.get(start).getId().getStartKey()) < 0) {
return result;
}
}
}
// Search for end index for the partition
comparator.setCompareStartKey(true);
id.setStartKey(partition.getEndKey());
int end = Collections.binarySearch(delPartitions, target, comparator);
if (end < 0) {
end = (end + 1) * (-1);
if (end == 0) {
return result;
} else {
--end;
if (Bytes.compareTo(partition.getStartKey(), delPartitions.get(end).getId().getEndKey()) > 0) {
return result;
}
}
}
for (int i = start; i <= end; ++i) {
result.addAll(delPartitions.get(i).getStoreFiles());
}
return result;
}
Aggregations