Search in sources :

Example 61 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class DefaultMobStoreFlusher method flushSnapshot.

   * Flushes the snapshot of the MemStore.
   * If this store is not a mob store, flush the cells in the snapshot to store files of HBase.
   * If the store is a mob one, the flusher flushes the MemStore into two places.
   * One is the store files of HBase, the other is the mob files.
   * <ol>
   * <li>Cells that are not PUT type or have the delete mark will be directly flushed to
   * HBase.</li>
   * <li>If the size of a cell value is larger than a threshold, it'll be
   * flushed to a mob file, another cell with the path of this file will be flushed to HBase.</li>
   * <li>If the size of a cell value is smaller than or equal with a threshold, it'll be flushed to
   * HBase directly.</li>
   * </ol>
public List<Path> flushSnapshot(MemStoreSnapshot snapshot, long cacheFlushId, MonitoredTask status, ThroughputController throughputController) throws IOException {
    ArrayList<Path> result = new ArrayList<>();
    long cellsCount = snapshot.getCellsCount();
    // don't flush if there are no entries
    if (cellsCount == 0)
        return result;
    // Use a store scanner to find which rows to flush.
    long smallestReadPoint = store.getSmallestReadPoint();
    InternalScanner scanner = createScanner(snapshot.getScanner(), smallestReadPoint);
    if (scanner == null) {
        // NULL scanner returned from coprocessor hooks means skip normal processing
        return result;
    StoreFileWriter writer;
    try {
        // list of store files. Add cleanup of anything put on filesystem if we fail.
        synchronized (flushLock) {
            status.setStatus("Flushing " + store + ": creating writer");
            // Write the map out to the disk
            writer = store.createWriterInTmp(cellsCount, store.getFamily().getCompressionType(), false, true, true, false, snapshot.getTimeRangeTracker());
            IOException e = null;
            try {
                // It's a mob store, flush the cells in a mob way. This is the difference of flushing
                // between a normal and a mob store.
                performMobFlush(snapshot, cacheFlushId, scanner, writer, status, throughputController);
            } catch (IOException ioe) {
                e = ioe;
                // throw the exception out
                throw ioe;
            } finally {
                if (e != null) {
                } else {
                    finalizeWriter(writer, cacheFlushId, status);
    } finally {
    }"Mob store is flushed, sequenceid=" + cacheFlushId + ", memsize=" + StringUtils.TraditionalBinaryPrefix.long2String(snapshot.getDataSize(), "", 1) + ", hasBloomFilter=" + writer.hasGeneralBloom() + ", into tmp file " + writer.getPath());
    return result;
Also used : Path(org.apache.hadoop.fs.Path) StoreFileWriter(org.apache.hadoop.hbase.regionserver.StoreFileWriter) InternalScanner(org.apache.hadoop.hbase.regionserver.InternalScanner) ArrayList(java.util.ArrayList) InterruptedIOException( IOException(

Example 62 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class PartitionedMobCompactor method compactDelFiles.

   * Compacts the del files in batches which avoids opening too many files.
   * @param request The compaction request.
   * @param delFilePaths Del file paths to compact
   * @return The paths of new del files after merging or the original files if no merging
   *         is necessary.
   * @throws IOException if IO failure is encountered
protected List<Path> compactDelFiles(PartitionedMobCompactionRequest request, List<Path> delFilePaths) throws IOException {
    if (delFilePaths.size() <= delFileMaxCount) {
        return delFilePaths;
    // when there are more del files than the number that is allowed, merge it firstly.
    int offset = 0;
    List<Path> paths = new ArrayList<>();
    while (offset < delFilePaths.size()) {
        // get the batch
        int batch = compactionBatchSize;
        if (delFilePaths.size() - offset < compactionBatchSize) {
            batch = delFilePaths.size() - offset;
        List<StoreFile> batchedDelFiles = new ArrayList<>();
        if (batch == 1) {
            // only one file left, do not compact it, directly add it to the new files.
        for (int i = offset; i < batch + offset; i++) {
            batchedDelFiles.add(new StoreFile(fs, delFilePaths.get(i), conf, compactionCacheConfig, BloomType.NONE));
        // compact the del files in a batch.
        paths.add(compactDelFilesInBatch(request, batchedDelFiles));
        // move to the next batch.
        offset += batch;
    return compactDelFiles(request, paths);
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile)

Example 63 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class RestoreSnapshotHelper method restoreReferenceFile.

   * Create a new {@link Reference} as copy of the source one.
   * <p><blockquote><pre>
   * The source table looks like:
   *    1234/abc      (original file)
   *    5678/abc.1234 (reference file)
   * After the clone operation looks like:
   *   wxyz/table=1234-abc
   *   stuv/table=1234-abc.wxyz
   * NOTE that the region name in the clone changes (md5 of regioninfo)
   * and the reference should reflect that change.
   * </pre></blockquote>
   * @param familyDir destination directory for the store file
   * @param regionInfo destination region info for the table
   * @param storeFile reference file name
private void restoreReferenceFile(final Path familyDir, final HRegionInfo regionInfo, final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
    String hfileName = storeFile.getName();
    // Extract the referred information (hfile name and parent region)
    Path refPath = StoreFileInfo.getReferredToFile(new Path(new Path(new Path(new Path(snapshotTable.getNamespaceAsString(), snapshotTable.getQualifierAsString()), regionInfo.getEncodedName()), familyDir.getName()), hfileName));
    String snapshotRegionName = refPath.getParent().getParent().getName();
    String fileName = refPath.getName();
    // The new reference should have the cloned region name as parent, if it is a clone.
    String clonedRegionName = Bytes.toString(regionsMap.get(Bytes.toBytes(snapshotRegionName)));
    if (clonedRegionName == null)
        clonedRegionName = snapshotRegionName;
    // The output file should be a reference link table=snapshotRegion-fileName.clonedRegionName
    Path linkPath = null;
    String refLink = fileName;
    if (!HFileLink.isHFileLink(fileName)) {
        refLink = HFileLink.createHFileLinkName(snapshotTable, snapshotRegionName, fileName);
        linkPath = new Path(familyDir, HFileLink.createHFileLinkName(snapshotTable, regionInfo.getEncodedName(), hfileName));
    Path outPath = new Path(familyDir, refLink + '.' + clonedRegionName);
    // Create the new reference
    if (storeFile.hasReference()) {
        Reference reference = Reference.convert(storeFile.getReference());
        reference.write(fs, outPath);
    } else {
        InputStream in;
        if (linkPath != null) {
            in = HFileLink.buildFromHFileLinkPattern(conf, linkPath).open(fs);
        } else {
            linkPath = new Path(new Path(HRegion.getRegionDir(snapshotManifest.getSnapshotDir(), regionInfo.getEncodedName()), familyDir.getName()), hfileName);
            in =;
        OutputStream out = fs.create(outPath);
        IOUtils.copyBytes(in, out, conf);
    // Add the daughter region to the map
    String regionName = Bytes.toString(regionsMap.get(regionInfo.getEncodedNameAsBytes()));
    LOG.debug("Restore reference " + regionName + " to " + clonedRegionName);
    synchronized (parentsMap) {
        Pair<String, String> daughters = parentsMap.get(clonedRegionName);
        if (daughters == null) {
            daughters = new Pair<>(regionName, null);
            parentsMap.put(clonedRegionName, daughters);
        } else if (!regionName.equals(daughters.getFirst())) {
Also used : Path(org.apache.hadoop.fs.Path) Reference( InputStream( OutputStream(

Example 64 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class RestoreSnapshotHelper method cloneHdfsMobRegion.

   * Clone the mob region. For the region create a new region
   * and create a HFileLink for each hfile.
private void cloneHdfsMobRegion(final Map<String, SnapshotRegionManifest> regionManifests, final HRegionInfo region) throws IOException {
    // clone region info (change embedded tableName with the new one)
    Path clonedRegionPath = MobUtils.getMobRegionPath(conf, tableDesc.getTableName());
    cloneRegion(clonedRegionPath, region, regionManifests.get(region.getEncodedName()));
Also used : Path(org.apache.hadoop.fs.Path)

Example 65 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class RestoreSnapshotHelper method restoreRegion.

   * Restore region by removing files not in the snapshot
   * and adding the missing ones from the snapshot.
private void restoreRegion(final HRegionInfo regionInfo, final SnapshotRegionManifest regionManifest, Path regionDir) throws IOException {
    Map<String, List<SnapshotRegionManifest.StoreFile>> snapshotFiles = getRegionHFileReferences(regionManifest);
    String tableName = tableDesc.getTableName().getNameAsString();
    // Restore families present in the table
    for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) {
        byte[] family = Bytes.toBytes(familyDir.getName());
        Set<String> familyFiles = getTableRegionFamilyFiles(familyDir);
        List<SnapshotRegionManifest.StoreFile> snapshotFamilyFiles = snapshotFiles.remove(familyDir.getName());
        if (snapshotFamilyFiles != null) {
            List<SnapshotRegionManifest.StoreFile> hfilesToAdd = new ArrayList<>();
            for (SnapshotRegionManifest.StoreFile storeFile : snapshotFamilyFiles) {
                if (familyFiles.contains(storeFile.getName())) {
                    // HFile already present
                } else {
                    // HFile missing
            // Remove hfiles not present in the snapshot
            for (String hfileName : familyFiles) {
                Path hfile = new Path(familyDir, hfileName);
                LOG.trace("Removing hfile=" + hfileName + " from region=" + regionInfo.getEncodedName() + " table=" + tableName);
                HFileArchiver.archiveStoreFile(conf, fs, regionInfo, tableDir, family, hfile);
            // Restore Missing files
            for (SnapshotRegionManifest.StoreFile storeFile : hfilesToAdd) {
                LOG.debug("Adding HFileLink " + storeFile.getName() + " to region=" + regionInfo.getEncodedName() + " table=" + tableName);
                restoreStoreFile(familyDir, regionInfo, storeFile, createBackRefs);
        } else {
            // Family doesn't exists in the snapshot
            LOG.trace("Removing family=" + Bytes.toString(family) + " from region=" + regionInfo.getEncodedName() + " table=" + tableName);
            HFileArchiver.archiveFamilyByFamilyDir(fs, conf, regionInfo, familyDir, family);
            fs.delete(familyDir, true);
    // Add families not present in the table
    for (Map.Entry<String, List<SnapshotRegionManifest.StoreFile>> familyEntry : snapshotFiles.entrySet()) {
        Path familyDir = new Path(regionDir, familyEntry.getKey());
        if (!fs.mkdirs(familyDir)) {
            throw new IOException("Unable to create familyDir=" + familyDir);
        for (SnapshotRegionManifest.StoreFile storeFile : familyEntry.getValue()) {
            LOG.trace("Adding HFileLink " + storeFile.getName() + " to table=" + tableName);
            restoreStoreFile(familyDir, regionInfo, storeFile, createBackRefs);
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) SnapshotRegionManifest(org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest) IOException( ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap)


Path (org.apache.hadoop.fs.Path)11752 Test (org.junit.Test)4193 FileSystem (org.apache.hadoop.fs.FileSystem)3587 IOException ( Configuration (org.apache.hadoop.conf.Configuration)2621 FileStatus (org.apache.hadoop.fs.FileStatus)1568 ArrayList (java.util.ArrayList)1145 File ( FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)924 HashMap (java.util.HashMap)570 Job (org.apache.hadoop.mapreduce.Job)492 JobConf (org.apache.hadoop.mapred.JobConf)477 URI ( FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)462 FileNotFoundException ( FsPermission (org.apache.hadoop.fs.permission.FsPermission)375 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)362 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)355 Map (java.util.Map)326 List (java.util.List)316