Search in sources :

Example 6 with StoreFileInfo

use of org.apache.hadoop.hbase.regionserver.StoreFileInfo in project hbase by apache.

the class SnapshotManifestV1 method buildManifestFromDisk.

static SnapshotRegionManifest buildManifestFromDisk(final Configuration conf, final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
    HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(conf, fs, tableDir, regionInfo, true);
    SnapshotRegionManifest.Builder manifest = SnapshotRegionManifest.newBuilder();
    // 1. dump region meta info into the snapshot directory
    LOG.debug("Storing region-info for snapshot.");
    manifest.setRegionInfo(HRegionInfo.convert(regionInfo));
    // 2. iterate through all the stores in the region
    LOG.debug("Creating references for hfiles");
    // This ensures that we have an atomic view of the directory as long as we have < ls limit
    // (batch size of the files in a directory) on the namenode. Otherwise, we get back the files in
    // batches and may miss files being added/deleted. This could be more robust (iteratively
    // checking to see if we have all the files until we are sure), but the limit is currently 1000
    // files/batch, far more than the number of store files under a single column family.
    Collection<String> familyNames = regionFs.getFamilies();
    if (familyNames != null) {
        for (String familyName : familyNames) {
            Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(familyName, false);
            if (storeFiles == null) {
                LOG.debug("No files under family: " + familyName);
                continue;
            }
            // 2.1. build the snapshot reference for the store
            SnapshotRegionManifest.FamilyFiles.Builder family = SnapshotRegionManifest.FamilyFiles.newBuilder();
            family.setFamilyName(UnsafeByteOperations.unsafeWrap(Bytes.toBytes(familyName)));
            if (LOG.isDebugEnabled()) {
                LOG.debug("Adding snapshot references for " + storeFiles + " hfiles");
            }
            // 2.2. iterate through all the store's files and create "references".
            int i = 0;
            int sz = storeFiles.size();
            for (StoreFileInfo storeFile : storeFiles) {
                // create "reference" to this store file.
                LOG.debug("Adding reference for file (" + (++i) + "/" + sz + "): " + storeFile.getPath());
                SnapshotRegionManifest.StoreFile.Builder sfManifest = SnapshotRegionManifest.StoreFile.newBuilder();
                sfManifest.setName(storeFile.getPath().getName());
                family.addStoreFiles(sfManifest.build());
            }
            manifest.addFamilyFiles(family.build());
        }
    }
    return manifest.build();
}
Also used : HRegionFileSystem(org.apache.hadoop.hbase.regionserver.HRegionFileSystem) SnapshotRegionManifest(org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest) StoreFileInfo(org.apache.hadoop.hbase.regionserver.StoreFileInfo)

Example 7 with StoreFileInfo

use of org.apache.hadoop.hbase.regionserver.StoreFileInfo in project hbase by apache.

the class SplitTableRegionProcedure method splitStoreFiles.

/**
   * Create Split directory
   * @param env MasterProcedureEnv
   * @throws IOException
   */
private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env, final HRegionFileSystem regionFs) throws IOException {
    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
    final Configuration conf = env.getMasterConfiguration();
    // The following code sets up a thread pool executor with as many slots as
    // there's files to split. It then fires up everything, waits for
    // completion and finally checks for any exception
    //
    // Note: splitStoreFiles creates daughter region dirs under the parent splits dir
    // Nothing to unroll here if failure -- re-run createSplitsDir will
    // clean this up.
    int nbFiles = 0;
    for (String family : regionFs.getFamilies()) {
        Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family);
        if (storeFiles != null) {
            nbFiles += storeFiles.size();
        }
    }
    if (nbFiles == 0) {
        // no file needs to be splitted.
        return new Pair<>(0, 0);
    }
    // Default max #threads to use is the smaller of table's configured number of blocking store
    // files or the available number of logical cores.
    int defMaxThreads = Math.min(conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT), Runtime.getRuntime().availableProcessors());
    // Max #threads is the smaller of the number of storefiles or the default max determined above.
    int maxThreads = Math.min(conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, defMaxThreads), nbFiles);
    LOG.info("Preparing to split " + nbFiles + " storefiles for region " + parentHRI + " using " + maxThreads + " threads");
    ThreadPoolExecutor threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(maxThreads, Threads.getNamedThreadFactory("StoreFileSplitter-%1$d"));
    List<Future<Pair<Path, Path>>> futures = new ArrayList<>(nbFiles);
    // Split each store file.
    final HTableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
    for (String family : regionFs.getFamilies()) {
        final HColumnDescriptor hcd = htd.getFamily(family.getBytes());
        final Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family);
        if (storeFiles != null && storeFiles.size() > 0) {
            final CacheConfig cacheConf = new CacheConfig(conf, hcd);
            for (StoreFileInfo storeFileInfo : storeFiles) {
                StoreFileSplitter sfs = new StoreFileSplitter(regionFs, family.getBytes(), new StoreFile(mfs.getFileSystem(), storeFileInfo, conf, cacheConf, hcd.getBloomFilterType()));
                futures.add(threadPool.submit(sfs));
            }
        }
    }
    // Shutdown the pool
    threadPool.shutdown();
    // Wait for all the tasks to finish
    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 30000);
    try {
        boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
        if (stillRunning) {
            threadPool.shutdownNow();
            // wait for the thread to shutdown completely.
            while (!threadPool.isTerminated()) {
                Thread.sleep(50);
            }
            throw new IOException("Took too long to split the" + " files and create the references, aborting split");
        }
    } catch (InterruptedException e) {
        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
    }
    int daughterA = 0;
    int daughterB = 0;
    // Look for any exception
    for (Future<Pair<Path, Path>> future : futures) {
        try {
            Pair<Path, Path> p = future.get();
            daughterA += p.getFirst() != null ? 1 : 0;
            daughterB += p.getSecond() != null ? 1 : 0;
        } catch (InterruptedException e) {
            throw (InterruptedIOException) new InterruptedIOException().initCause(e);
        } catch (ExecutionException e) {
            throw new IOException(e);
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Split storefiles for region " + parentHRI + " Daughter A: " + daughterA + " storefiles, Daughter B: " + daughterB + " storefiles.");
    }
    return new Pair<>(daughterA, daughterB);
}
Also used : MasterFileSystem(org.apache.hadoop.hbase.master.MasterFileSystem) InterruptedIOException(java.io.InterruptedIOException) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile) ExecutionException(java.util.concurrent.ExecutionException) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Pair(org.apache.hadoop.hbase.util.Pair) Path(org.apache.hadoop.fs.Path) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) InterruptedIOException(java.io.InterruptedIOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) IOException(java.io.IOException) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) Future(java.util.concurrent.Future) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) StoreFileInfo(org.apache.hadoop.hbase.regionserver.StoreFileInfo)

Aggregations

StoreFileInfo (org.apache.hadoop.hbase.regionserver.StoreFileInfo)7 Path (org.apache.hadoop.fs.Path)3 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)3 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 Configuration (org.apache.hadoop.conf.Configuration)2 HTableDescriptor (org.apache.hadoop.hbase.HTableDescriptor)2 CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)2 MasterFileSystem (org.apache.hadoop.hbase.master.MasterFileSystem)2 HRegionFileSystem (org.apache.hadoop.hbase.regionserver.HRegionFileSystem)2 StoreFile (org.apache.hadoop.hbase.regionserver.StoreFile)2 InterruptedIOException (java.io.InterruptedIOException)1 ExecutionException (java.util.concurrent.ExecutionException)1 Future (java.util.concurrent.Future)1 ThreadPoolExecutor (java.util.concurrent.ThreadPoolExecutor)1 FileStatus (org.apache.hadoop.fs.FileStatus)1 DoNotRetryIOException (org.apache.hadoop.hbase.DoNotRetryIOException)1 SnapshotRegionManifest (org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest)1 Pair (org.apache.hadoop.hbase.util.Pair)1