Search in sources :

Example 11 with StoreFileInfo

use of org.apache.hadoop.hbase.regionserver.StoreFileInfo in project hbase by apache.

the class SplitTableRegionProcedure method splitStoreFiles.

/**
   * Create Split directory
   * @param env MasterProcedureEnv
   * @throws IOException
   */
private Pair<Integer, Integer> splitStoreFiles(final MasterProcedureEnv env, final HRegionFileSystem regionFs) throws IOException {
    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
    final Configuration conf = env.getMasterConfiguration();
    // The following code sets up a thread pool executor with as many slots as
    // there's files to split. It then fires up everything, waits for
    // completion and finally checks for any exception
    //
    // Note: splitStoreFiles creates daughter region dirs under the parent splits dir
    // Nothing to unroll here if failure -- re-run createSplitsDir will
    // clean this up.
    int nbFiles = 0;
    for (String family : regionFs.getFamilies()) {
        Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family);
        if (storeFiles != null) {
            nbFiles += storeFiles.size();
        }
    }
    if (nbFiles == 0) {
        // no file needs to be splitted.
        return new Pair<>(0, 0);
    }
    // Default max #threads to use is the smaller of table's configured number of blocking store
    // files or the available number of logical cores.
    int defMaxThreads = Math.min(conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT), Runtime.getRuntime().availableProcessors());
    // Max #threads is the smaller of the number of storefiles or the default max determined above.
    int maxThreads = Math.min(conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, defMaxThreads), nbFiles);
    LOG.info("Preparing to split " + nbFiles + " storefiles for region " + parentHRI + " using " + maxThreads + " threads");
    ThreadPoolExecutor threadPool = (ThreadPoolExecutor) Executors.newFixedThreadPool(maxThreads, Threads.getNamedThreadFactory("StoreFileSplitter-%1$d"));
    List<Future<Pair<Path, Path>>> futures = new ArrayList<>(nbFiles);
    // Split each store file.
    final HTableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
    for (String family : regionFs.getFamilies()) {
        final HColumnDescriptor hcd = htd.getFamily(family.getBytes());
        final Collection<StoreFileInfo> storeFiles = regionFs.getStoreFiles(family);
        if (storeFiles != null && storeFiles.size() > 0) {
            final CacheConfig cacheConf = new CacheConfig(conf, hcd);
            for (StoreFileInfo storeFileInfo : storeFiles) {
                StoreFileSplitter sfs = new StoreFileSplitter(regionFs, family.getBytes(), new StoreFile(mfs.getFileSystem(), storeFileInfo, conf, cacheConf, hcd.getBloomFilterType()));
                futures.add(threadPool.submit(sfs));
            }
        }
    }
    // Shutdown the pool
    threadPool.shutdown();
    // Wait for all the tasks to finish
    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", 30000);
    try {
        boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
        if (stillRunning) {
            threadPool.shutdownNow();
            // wait for the thread to shutdown completely.
            while (!threadPool.isTerminated()) {
                Thread.sleep(50);
            }
            throw new IOException("Took too long to split the" + " files and create the references, aborting split");
        }
    } catch (InterruptedException e) {
        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
    }
    int daughterA = 0;
    int daughterB = 0;
    // Look for any exception
    for (Future<Pair<Path, Path>> future : futures) {
        try {
            Pair<Path, Path> p = future.get();
            daughterA += p.getFirst() != null ? 1 : 0;
            daughterB += p.getSecond() != null ? 1 : 0;
        } catch (InterruptedException e) {
            throw (InterruptedIOException) new InterruptedIOException().initCause(e);
        } catch (ExecutionException e) {
            throw new IOException(e);
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("Split storefiles for region " + parentHRI + " Daughter A: " + daughterA + " storefiles, Daughter B: " + daughterB + " storefiles.");
    }
    return new Pair<>(daughterA, daughterB);
}
Also used : MasterFileSystem(org.apache.hadoop.hbase.master.MasterFileSystem) InterruptedIOException(java.io.InterruptedIOException) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile) ExecutionException(java.util.concurrent.ExecutionException) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Pair(org.apache.hadoop.hbase.util.Pair) Path(org.apache.hadoop.fs.Path) HColumnDescriptor(org.apache.hadoop.hbase.HColumnDescriptor) InterruptedIOException(java.io.InterruptedIOException) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) IOException(java.io.IOException) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) Future(java.util.concurrent.Future) ThreadPoolExecutor(java.util.concurrent.ThreadPoolExecutor) StoreFileInfo(org.apache.hadoop.hbase.regionserver.StoreFileInfo)

Example 12 with StoreFileInfo

use of org.apache.hadoop.hbase.regionserver.StoreFileInfo in project hbase by apache.

the class SplitTableRegionProcedure method splitStoreFiles.

/**
 * Create Split directory
 * @param env MasterProcedureEnv
 */
private Pair<List<Path>, List<Path>> splitStoreFiles(final MasterProcedureEnv env, final HRegionFileSystem regionFs) throws IOException {
    final Configuration conf = env.getMasterConfiguration();
    TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(getTableName());
    // The following code sets up a thread pool executor with as many slots as
    // there's files to split. It then fires up everything, waits for
    // completion and finally checks for any exception
    // 
    // Note: From HBASE-26187, splitStoreFiles now creates daughter region dirs straight under the
    // table dir. In case of failure, the proc would go through this again, already existing
    // region dirs and split files would just be ignored, new split files should get created.
    int nbFiles = 0;
    final Map<String, Collection<StoreFileInfo>> files = new HashMap<String, Collection<StoreFileInfo>>(htd.getColumnFamilyCount());
    for (ColumnFamilyDescriptor cfd : htd.getColumnFamilies()) {
        String family = cfd.getNameAsString();
        StoreFileTracker tracker = StoreFileTrackerFactory.create(env.getMasterConfiguration(), htd, cfd, regionFs);
        Collection<StoreFileInfo> sfis = tracker.load();
        if (sfis == null) {
            continue;
        }
        Collection<StoreFileInfo> filteredSfis = null;
        for (StoreFileInfo sfi : sfis) {
            // splitable.
            if (sfi.isReference()) {
                LOG.info("Skipping split of " + sfi + "; presuming ready for archiving.");
                continue;
            }
            if (filteredSfis == null) {
                filteredSfis = new ArrayList<StoreFileInfo>(sfis.size());
                files.put(family, filteredSfis);
            }
            filteredSfis.add(sfi);
            nbFiles++;
        }
    }
    if (nbFiles == 0) {
        // no file needs to be splitted.
        return new Pair<>(Collections.emptyList(), Collections.emptyList());
    }
    // Max #threads is the smaller of the number of storefiles or the default max determined above.
    int maxThreads = Math.min(conf.getInt(HConstants.REGION_SPLIT_THREADS_MAX, conf.getInt(HStore.BLOCKING_STOREFILES_KEY, HStore.DEFAULT_BLOCKING_STOREFILE_COUNT)), nbFiles);
    LOG.info("pid=" + getProcId() + " splitting " + nbFiles + " storefiles, region=" + getParentRegion().getShortNameToLog() + ", threads=" + maxThreads);
    final ExecutorService threadPool = Executors.newFixedThreadPool(maxThreads, new ThreadFactoryBuilder().setNameFormat("StoreFileSplitter-pool-%d").setDaemon(true).setUncaughtExceptionHandler(Threads.LOGGING_EXCEPTION_HANDLER).build());
    final List<Future<Pair<Path, Path>>> futures = new ArrayList<Future<Pair<Path, Path>>>(nbFiles);
    // Split each store file.
    for (Map.Entry<String, Collection<StoreFileInfo>> e : files.entrySet()) {
        byte[] familyName = Bytes.toBytes(e.getKey());
        final ColumnFamilyDescriptor hcd = htd.getColumnFamily(familyName);
        final Collection<StoreFileInfo> storeFiles = e.getValue();
        if (storeFiles != null && storeFiles.size() > 0) {
            final Configuration storeConfiguration = StoreUtils.createStoreConfiguration(env.getMasterConfiguration(), htd, hcd);
            for (StoreFileInfo storeFileInfo : storeFiles) {
                // As this procedure is running on master, use CacheConfig.DISABLED means
                // don't cache any block.
                // We also need to pass through a suitable CompoundConfiguration as if this
                // is running in a regionserver's Store context, or we might not be able
                // to read the hfiles.
                storeFileInfo.setConf(storeConfiguration);
                StoreFileSplitter sfs = new StoreFileSplitter(regionFs, familyName, new HStoreFile(storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED));
                futures.add(threadPool.submit(sfs));
            }
        }
    }
    // Shutdown the pool
    threadPool.shutdown();
    // Wait for all the tasks to finish.
    // When splits ran on the RegionServer, how-long-to-wait-configuration was named
    // hbase.regionserver.fileSplitTimeout. If set, use its value.
    long fileSplitTimeout = conf.getLong("hbase.master.fileSplitTimeout", conf.getLong("hbase.regionserver.fileSplitTimeout", 600000));
    try {
        boolean stillRunning = !threadPool.awaitTermination(fileSplitTimeout, TimeUnit.MILLISECONDS);
        if (stillRunning) {
            threadPool.shutdownNow();
            // wait for the thread to shutdown completely.
            while (!threadPool.isTerminated()) {
                Thread.sleep(50);
            }
            throw new IOException("Took too long to split the" + " files and create the references, aborting split");
        }
    } catch (InterruptedException e) {
        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
    }
    List<Path> daughterA = new ArrayList<>();
    List<Path> daughterB = new ArrayList<>();
    // Look for any exception
    for (Future<Pair<Path, Path>> future : futures) {
        try {
            Pair<Path, Path> p = future.get();
            if (p.getFirst() != null) {
                daughterA.add(p.getFirst());
            }
            if (p.getSecond() != null) {
                daughterB.add(p.getSecond());
            }
        } catch (InterruptedException e) {
            throw (InterruptedIOException) new InterruptedIOException().initCause(e);
        } catch (ExecutionException e) {
            throw new IOException(e);
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug("pid=" + getProcId() + " split storefiles for region " + getParentRegion().getShortNameToLog() + " Daughter A: " + daughterA + " storefiles, Daughter B: " + daughterB + " storefiles.");
    }
    return new Pair<>(daughterA, daughterB);
}
Also used : InterruptedIOException(java.io.InterruptedIOException) Configuration(org.apache.hadoop.conf.Configuration) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor) ThreadFactoryBuilder(org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder) StoreFileTracker(org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker) ExecutionException(java.util.concurrent.ExecutionException) Pair(org.apache.hadoop.hbase.util.Pair) Path(org.apache.hadoop.fs.Path) DoNotRetryIOException(org.apache.hadoop.hbase.DoNotRetryIOException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) TableDescriptor(org.apache.hadoop.hbase.client.TableDescriptor) ExecutorService(java.util.concurrent.ExecutorService) Collection(java.util.Collection) Future(java.util.concurrent.Future) HStoreFile(org.apache.hadoop.hbase.regionserver.HStoreFile) Map(java.util.Map) HashMap(java.util.HashMap) StoreFileInfo(org.apache.hadoop.hbase.regionserver.StoreFileInfo)

Example 13 with StoreFileInfo

use of org.apache.hadoop.hbase.regionserver.StoreFileInfo in project hbase by apache.

the class MergeTableRegionsProcedure method mergeStoreFiles.

private List<Path> mergeStoreFiles(MasterProcedureEnv env, HRegionFileSystem regionFs, HRegionFileSystem mergeRegionFs, RegionInfo mergedRegion) throws IOException {
    final TableDescriptor htd = env.getMasterServices().getTableDescriptors().get(mergedRegion.getTable());
    List<Path> mergedFiles = new ArrayList<>();
    for (ColumnFamilyDescriptor hcd : htd.getColumnFamilies()) {
        String family = hcd.getNameAsString();
        StoreFileTracker tracker = StoreFileTrackerFactory.create(env.getMasterConfiguration(), htd, hcd, regionFs);
        final Collection<StoreFileInfo> storeFiles = tracker.load();
        if (storeFiles != null && storeFiles.size() > 0) {
            final Configuration storeConfiguration = StoreUtils.createStoreConfiguration(env.getMasterConfiguration(), htd, hcd);
            for (StoreFileInfo storeFileInfo : storeFiles) {
                // Create reference file(s) to parent region file here in mergedDir.
                // As this procedure is running on master, use CacheConfig.DISABLED means
                // don't cache any block.
                // We also need to pass through a suitable CompoundConfiguration as if this
                // is running in a regionserver's Store context, or we might not be able
                // to read the hfiles.
                storeFileInfo.setConf(storeConfiguration);
                Path refFile = mergeRegionFs.mergeStoreFile(regionFs.getRegionInfo(), family, new HStoreFile(storeFileInfo, hcd.getBloomFilterType(), CacheConfig.DISABLED));
                mergedFiles.add(refFile);
            }
        }
    }
    return mergedFiles;
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) HStoreFile(org.apache.hadoop.hbase.regionserver.HStoreFile) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor) StoreFileTracker(org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker) TableDescriptor(org.apache.hadoop.hbase.client.TableDescriptor) StoreFileInfo(org.apache.hadoop.hbase.regionserver.StoreFileInfo)

Example 14 with StoreFileInfo

use of org.apache.hadoop.hbase.regionserver.StoreFileInfo in project hbase by apache.

the class FileBasedStoreFileTracker method load.

@Override
public List<StoreFileInfo> load() throws IOException {
    StoreFileList list = backedFile.load();
    if (list == null) {
        return Collections.emptyList();
    }
    FileSystem fs = ctx.getRegionFileSystem().getFileSystem();
    List<StoreFileInfo> infos = new ArrayList<>();
    for (StoreFileEntry entry : list.getStoreFileList()) {
        infos.add(ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, ctx.getRegionInfo(), ctx.getRegionFileSystem().getRegionInfoForFS(), ctx.getFamily().getNameAsString(), new Path(ctx.getFamilyStoreDirectoryPath(), entry.getName())));
    }
    // for safety, let's still keep the synchronized here.
    synchronized (storefiles) {
        for (StoreFileInfo info : infos) {
            storefiles.put(info.getPath().getName(), info);
        }
    }
    return infos;
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) StoreFileList(org.apache.hadoop.hbase.shaded.protobuf.generated.StoreFileTrackerProtos.StoreFileList) StoreFileInfo(org.apache.hadoop.hbase.regionserver.StoreFileInfo) StoreFileEntry(org.apache.hadoop.hbase.shaded.protobuf.generated.StoreFileTrackerProtos.StoreFileEntry)

Example 15 with StoreFileInfo

use of org.apache.hadoop.hbase.regionserver.StoreFileInfo in project hbase by apache.

the class SnapshotManifest method addMobRegion.

protected void addMobRegion(RegionInfo regionInfo, RegionVisitor visitor) throws IOException {
    // 1. dump region meta info into the snapshot directory
    final String snapshotName = desc.getName();
    LOG.debug("Storing mob region '" + regionInfo + "' region-info for snapshot=" + snapshotName);
    Object regionData = visitor.regionOpen(regionInfo);
    monitor.rethrowException();
    // 2. iterate through all the stores in the region
    LOG.debug("Creating references for mob files");
    Path mobRegionPath = MobUtils.getMobRegionPath(conf, regionInfo.getTable());
    for (ColumnFamilyDescriptor hcd : htd.getColumnFamilies()) {
        // 2.1. build the snapshot reference for the store if it's a mob store
        if (!hcd.isMobEnabled()) {
            continue;
        }
        Object familyData = visitor.familyOpen(regionData, hcd.getName());
        monitor.rethrowException();
        Path storePath = MobUtils.getMobFamilyPath(mobRegionPath, hcd.getNameAsString());
        List<StoreFileInfo> storeFiles = getStoreFiles(storePath);
        if (storeFiles == null) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("No mob files under family: " + hcd.getNameAsString());
            }
            continue;
        }
        addReferenceFiles(visitor, regionData, familyData, storeFiles, true);
        visitor.familyClose(regionData, familyData);
    }
    visitor.regionClose(regionData);
}
Also used : Path(org.apache.hadoop.fs.Path) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor) StoreFileInfo(org.apache.hadoop.hbase.regionserver.StoreFileInfo)

Aggregations

StoreFileInfo (org.apache.hadoop.hbase.regionserver.StoreFileInfo)22 Path (org.apache.hadoop.fs.Path)14 HRegionFileSystem (org.apache.hadoop.hbase.regionserver.HRegionFileSystem)9 ArrayList (java.util.ArrayList)8 IOException (java.io.IOException)6 Configuration (org.apache.hadoop.conf.Configuration)6 StoreFileTracker (org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker)6 ColumnFamilyDescriptor (org.apache.hadoop.hbase.client.ColumnFamilyDescriptor)5 SnapshotRegionManifest (org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest)5 HashMap (java.util.HashMap)4 StoreFileList (org.apache.hadoop.hbase.shaded.protobuf.generated.StoreFileTrackerProtos.StoreFileList)4 InterruptedIOException (java.io.InterruptedIOException)3 Map (java.util.Map)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 HColumnDescriptor (org.apache.hadoop.hbase.HColumnDescriptor)3 TableDescriptor (org.apache.hadoop.hbase.client.TableDescriptor)3 Collection (java.util.Collection)2 List (java.util.List)2 ExecutionException (java.util.concurrent.ExecutionException)2 Future (java.util.concurrent.Future)2