Search in sources :

Example 36 with StoreFile

use of org.apache.hadoop.hbase.regionserver.StoreFile in project hbase by apache.

the class PartitionedMobCompactor method compactMobFilesInBatch.

/**
   * Compacts a partition of selected small mob files and all the del files in a batch.
   * @param request The compaction request.
   * @param partition A compaction partition.
   * @param connection To use for transport
   * @param table The current table.
   * @param filesToCompact The files to be compacted.
   * @param batch The number of mob files to be compacted in a batch.
   * @param bulkloadPathOfPartition The directory where the bulkload column of the current
   *   partition is saved.
   * @param bulkloadColumnPath The directory where the bulkload files of current partition
   *   are saved.
   * @param newFiles The paths of new mob files after compactions.
   * @throws IOException if IO failure is encountered
   */
private void compactMobFilesInBatch(PartitionedMobCompactionRequest request, CompactionPartition partition, Connection connection, Table table, List<StoreFile> filesToCompact, int batch, Path bulkloadPathOfPartition, Path bulkloadColumnPath, List<Path> newFiles) throws IOException {
    // open scanner to the selected mob files and del files.
    StoreScanner scanner = createScanner(filesToCompact, ScanType.COMPACT_DROP_DELETES);
    // the mob files to be compacted, not include the del files.
    List<StoreFile> mobFilesToCompact = filesToCompact.subList(0, batch);
    // Pair(maxSeqId, cellsCount)
    Pair<Long, Long> fileInfo = getFileInfo(mobFilesToCompact);
    // open writers for the mob files and new ref store files.
    StoreFileWriter writer = null;
    StoreFileWriter refFileWriter = null;
    Path filePath = null;
    long mobCells = 0;
    boolean cleanupTmpMobFile = false;
    boolean cleanupBulkloadDirOfPartition = false;
    boolean cleanupCommittedMobFile = false;
    boolean closeReaders = true;
    try {
        try {
            writer = MobUtils.createWriter(conf, fs, column, partition.getPartitionId().getLatestDate(), tempPath, Long.MAX_VALUE, column.getCompactionCompressionType(), partition.getPartitionId().getStartKey(), compactionCacheConfig, cryptoContext, true);
            cleanupTmpMobFile = true;
            filePath = writer.getPath();
            byte[] fileName = Bytes.toBytes(filePath.getName());
            // create a temp file and open a writer for it in the bulkloadPath
            refFileWriter = MobUtils.createRefFileWriter(conf, fs, column, bulkloadColumnPath, fileInfo.getSecond().longValue(), compactionCacheConfig, cryptoContext, true);
            cleanupBulkloadDirOfPartition = true;
            List<Cell> cells = new ArrayList<>();
            boolean hasMore;
            ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build();
            do {
                hasMore = scanner.next(cells, scannerContext);
                for (Cell cell : cells) {
                    // write the mob cell to the mob file.
                    writer.append(cell);
                    // write the new reference cell to the store file.
                    Cell reference = MobUtils.createMobRefCell(cell, fileName, this.refCellTags);
                    refFileWriter.append(reference);
                    mobCells++;
                }
                cells.clear();
            } while (hasMore);
        } finally {
            // close the scanner.
            scanner.close();
            if (cleanupTmpMobFile) {
                // append metadata to the mob file, and close the mob file writer.
                closeMobFileWriter(writer, fileInfo.getFirst(), mobCells);
            }
            if (cleanupBulkloadDirOfPartition) {
                // append metadata and bulkload info to the ref mob file, and close the writer.
                closeRefFileWriter(refFileWriter, fileInfo.getFirst(), request.selectionTime);
            }
        }
        if (mobCells > 0) {
            // commit mob file
            MobUtils.commitFile(conf, fs, filePath, mobFamilyDir, compactionCacheConfig);
            cleanupTmpMobFile = false;
            cleanupCommittedMobFile = true;
            // bulkload the ref file
            bulkloadRefFile(connection, table, bulkloadPathOfPartition, filePath.getName());
            cleanupCommittedMobFile = false;
            newFiles.add(new Path(mobFamilyDir, filePath.getName()));
        }
        // archive the old mob files, do not archive the del files.
        try {
            closeStoreFileReaders(mobFilesToCompact);
            closeReaders = false;
            MobUtils.removeMobFiles(conf, fs, tableName, mobTableDir, column.getName(), mobFilesToCompact);
        } catch (IOException e) {
            LOG.error("Failed to archive the files " + mobFilesToCompact, e);
        }
    } finally {
        if (closeReaders) {
            closeStoreFileReaders(mobFilesToCompact);
        }
        if (cleanupTmpMobFile) {
            deletePath(filePath);
        }
        if (cleanupBulkloadDirOfPartition) {
            // delete the bulkload files in bulkloadPath
            deletePath(bulkloadPathOfPartition);
        }
        if (cleanupCommittedMobFile) {
            deletePath(new Path(mobFamilyDir, filePath.getName()));
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) StoreFileWriter(org.apache.hadoop.hbase.regionserver.StoreFileWriter) ArrayList(java.util.ArrayList) IOException(java.io.IOException) StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile) StoreScanner(org.apache.hadoop.hbase.regionserver.StoreScanner) Cell(org.apache.hadoop.hbase.Cell) ScannerContext(org.apache.hadoop.hbase.regionserver.ScannerContext)

Example 37 with StoreFile

use of org.apache.hadoop.hbase.regionserver.StoreFile in project hbase by apache.

the class PartitionedMobCompactor method compactMobFilePartition.

/**
   * Compacts a partition of selected small mob files and all the del files.
   * @param request The compaction request.
   * @param partition A compaction partition.
   * @param delFiles The del files.
   * @param connection The connection to use.
   * @param table The current table.
   * @return The paths of new mob files after compactions.
   * @throws IOException if IO failure is encountered
   */
private List<Path> compactMobFilePartition(PartitionedMobCompactionRequest request, CompactionPartition partition, List<StoreFile> delFiles, Connection connection, Table table) throws IOException {
    if (MobUtils.isMobFileExpired(column, EnvironmentEdgeManager.currentTime(), partition.getPartitionId().getDate())) {
        // return an empty list.
        return Collections.emptyList();
    }
    List<Path> newFiles = new ArrayList<>();
    List<FileStatus> files = partition.listFiles();
    int offset = 0;
    Path bulkloadPathOfPartition = new Path(bulkloadPath, partition.getPartitionId().toString());
    Path bulkloadColumnPath = new Path(bulkloadPathOfPartition, column.getNameAsString());
    while (offset < files.size()) {
        int batch = compactionBatchSize;
        if (files.size() - offset < compactionBatchSize) {
            batch = files.size() - offset;
        }
        if (batch == 1 && delFiles.isEmpty()) {
            // only one file left and no del files, do not compact it,
            // and directly add it to the new files.
            newFiles.add(files.get(offset).getPath());
            offset++;
            continue;
        }
        // clean the bulkload directory to avoid loading old files.
        fs.delete(bulkloadPathOfPartition, true);
        // add the selected mob files and del files into filesToCompact
        List<StoreFile> filesToCompact = new ArrayList<>();
        for (int i = offset; i < batch + offset; i++) {
            StoreFile sf = new StoreFile(fs, files.get(i).getPath(), conf, compactionCacheConfig, BloomType.NONE);
            filesToCompact.add(sf);
        }
        filesToCompact.addAll(delFiles);
        // compact the mob files in a batch.
        compactMobFilesInBatch(request, partition, connection, table, filesToCompact, batch, bulkloadPathOfPartition, bulkloadColumnPath, newFiles);
        // move to the next batch.
        offset += batch;
    }
    LOG.info("Compaction is finished. The number of mob files is changed from " + files.size() + " to " + newFiles.size());
    return newFiles;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) ArrayList(java.util.ArrayList) StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile)

Example 38 with StoreFile

use of org.apache.hadoop.hbase.regionserver.StoreFile in project hbase by apache.

the class MobFile method readCell.

/**
   * Reads a cell from the mob file.
   * @param search The cell need to be searched in the mob file.
   * @param cacheMobBlocks Should this scanner cache blocks.
   * @param readPt the read point.
   * @return The cell in the mob file.
   * @throws IOException
   */
public Cell readCell(Cell search, boolean cacheMobBlocks, long readPt) throws IOException {
    Cell result = null;
    StoreFileScanner scanner = null;
    List<StoreFile> sfs = new ArrayList<>();
    sfs.add(sf);
    try {
        List<StoreFileScanner> sfScanners = StoreFileScanner.getScannersForStoreFiles(sfs, cacheMobBlocks, true, false, false, readPt);
        if (!sfScanners.isEmpty()) {
            scanner = sfScanners.get(0);
            if (scanner.seek(search)) {
                result = scanner.peek();
            }
        }
    } finally {
        if (scanner != null) {
            scanner.close();
        }
    }
    return result;
}
Also used : ArrayList(java.util.ArrayList) StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile) Cell(org.apache.hadoop.hbase.Cell) StoreFileScanner(org.apache.hadoop.hbase.regionserver.StoreFileScanner)

Example 39 with StoreFile

use of org.apache.hadoop.hbase.regionserver.StoreFile in project hbase by apache.

the class SplitTableRegionProcedure method splitStoreFiles.

/**
   * Create Split directory
   * @param env MasterProcedureEnv
   * @throws IOException
   */
private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env, final HRegionFileSystem regionFs) throws IOException {
    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
    final Configuration conf = env.getMasterConfiguration();
    // The following code sets up a thread pool executor with as many slots as
    // there's files to split. It then fires up everything, waits for
    // completion and finally checks for any exception
    //
    // Note: splitStoreFiles creates daughter region dirs under the parent splits dir
    // Nothing to unroll here if failure -- re-run createSplitsDir will
    // clean this up.
    int nbFiles = 0;
    for (String family : regionFs.getFamilies()) {
        Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family);
        if (storeFiles != null) {
            nbFiles += storeFiles.size();
        }
    }
    if (nbFiles == 0) {
        // no file needs to be splitted.
        return new Pair<>(0, 0);
    }
    // Default max #threads to use is the smaller of table's configured number of blocking store
    // files or the available number of logical cores.
    int defMaxThreads = Math.min(conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT), Runtime.getRuntime().availableProcessors());
    // Max #threads is the smaller of the number of storefiles or the default max determined above.
    int maxThreads = Math.min(conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, defMaxThreads), nbFiles);
    LOG.info("Preparing to split " + nbFiles + " storefiles for region " + parentHRI + " using " + maxThreads + " threads");
    ThreadPoolExecutor threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(maxThreads, Threads.getNamedThreadFactory("StoreFileSplitter-%1$d"));
    List<Future<Pair<Path, Path>>> futures = new ArrayList<>(nbFiles);
    // Split each store file.
    final HTableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
    for (String family : regionFs.getFamilies()) {
        final HColumnDescriptor hcd = htd.getFamily(family.getBytes());
        final Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family);
        if (storeFiles != null && storeFiles.size() > 0) {
            final CacheConfig cacheConf = new CacheConfig(conf, hcd);
            for (StoreFileInfo storeFileInfo : storeFiles) {
                StoreFileSplitter sfs = new StoreFileSplitter(regionFs, family.getBytes(), new StoreFile(mfs.getFileSystem(), storeFileInfo, conf, cacheConf, hcd.getBloomFilterType()));
                futures.add(threadPool.submit(sfs));
            }
        }
    }
    // Shutdown the pool
    threadPool.shutdown();
    // Wait for all the tasks to finish
    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 30000);
    try {
        boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
        if (stillRunning) {
            threadPool.shutdownNow();
            // wait for the thread to shutdown completely.
            while (!threadPool.isTerminated()) {
                Thread.sleep(50);
            }
            throw new IOException("Took too long to split the" + " files and create the references, aborting split");
        }
    } catch (InterruptedException e) {
        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
    }
    int daughterA = 0;
    int daughterB = 0;
    // Look for any exception
    for (Future<Pair<Path, Path>> future : futures) {
        try {
            Pair<Path, Path> p = future.get();
            daughterA += p.getFirst() != null ? 1 : 0;
            daughterB += p.getSecond() != null ? 1 : 0;
        } catch (InterruptedException e) {
            throw (InterruptedIOException) new InterruptedIOException().initCause(e);
        } catch (ExecutionException e) {
            throw new IOException(e);
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Split storefiles for region " + parentHRI + " Daughter A: " + daughterA + " storefiles, Daughter B: " + daughterB + " storefiles.");
    }
    return new Pair<>(daughterA, daughterB);
}
Also used : MasterFileSystem(org.apache.hadoop.hbase.master.MasterFileSystem) InterruptedIOException(java.io.InterruptedIOException) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile) ExecutionException(java.util.concurrent.ExecutionException) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Pair(org.apache.hadoop.hbase.util.Pair) Path(org.apache.hadoop.fs.Path) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) InterruptedIOException(java.io.InterruptedIOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) IOException(java.io.IOException) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) Future(java.util.concurrent.Future) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) StoreFileInfo(org.apache.hadoop.hbase.regionserver.StoreFileInfo)

Example 40 with StoreFile

use of org.apache.hadoop.hbase.regionserver.StoreFile in project hbase by apache.

the class PartitionedMobCompactor method getListOfDelFilesForPartition.

@VisibleForTesting
List<StoreFile> getListOfDelFilesForPartition(final CompactionPartition partition, final List<CompactionDelPartition> delPartitions) {
    // Binary search for startKey and endKey
    List<StoreFile> result = new ArrayList<>();
    DelPartitionComparator comparator = new DelPartitionComparator(false);
    CompactionDelPartitionId id = new CompactionDelPartitionId(null, partition.getStartKey());
    CompactionDelPartition target = new CompactionDelPartition(id);
    int start = Collections.binarySearch(delPartitions, target, comparator);
    // Get the start index for partition
    if (start < 0) {
        // Calculate the insert point
        start = (start + 1) * (-1);
        if (start == delPartitions.size()) {
            // no overlap
            return result;
        } else {
            // Check another case which has no overlap
            if (Bytes.compareTo(partition.getEndKey(), delPartitions.get(start).getId().getStartKey()) < 0) {
                return result;
            }
        }
    }
    // Search for end index for the partition
    comparator.setCompareStartKey(true);
    id.setStartKey(partition.getEndKey());
    int end = Collections.binarySearch(delPartitions, target, comparator);
    if (end < 0) {
        end = (end + 1) * (-1);
        if (end == 0) {
            return result;
        } else {
            --end;
            if (Bytes.compareTo(partition.getStartKey(), delPartitions.get(end).getId().getEndKey()) > 0) {
                return result;
            }
        }
    }
    for (int i = start; i <= end; ++i) {
        result.addAll(delPartitions.get(i).getStoreFiles());
    }
    return result;
}
Also used : CompactionDelPartitionId(org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionDelPartitionId) ArrayList(java.util.ArrayList) StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile) CompactionDelPartition(org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionDelPartition) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Aggregations

StoreFile (org.apache.hadoop.hbase.regionserver.StoreFile)52 ArrayList (java.util.ArrayList)22 Path (org.apache.hadoop.fs.Path)15 Test (org.junit.Test)13 IOException (java.io.IOException)10 Store (org.apache.hadoop.hbase.regionserver.Store)6 StripeInformationProvider (org.apache.hadoop.hbase.regionserver.compactions.StripeCompactionPolicy.StripeInformationProvider)6 StoreFileReader (org.apache.hadoop.hbase.regionserver.StoreFileReader)5 ImmutableList (com.google.common.collect.ImmutableList)4 Configuration (org.apache.hadoop.conf.Configuration)4 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)4 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)4 Put (org.apache.hadoop.hbase.client.Put)4 StoreFileScanner (org.apache.hadoop.hbase.regionserver.StoreFileScanner)4 FileStatus (org.apache.hadoop.fs.FileStatus)3 Cell (org.apache.hadoop.hbase.Cell)3 CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)3 StoreFileWriter (org.apache.hadoop.hbase.regionserver.StoreFileWriter)3 ConcatenatedLists (org.apache.hadoop.hbase.util.ConcatenatedLists)3 FileNotFoundException (java.io.FileNotFoundException)2