Search in sources :

Example 61 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class DefaultMobStoreFlusher method flushSnapshot.

/**
   * Flushes the snapshot of the MemStore.
   * If this store is not a mob store, flush the cells in the snapshot to store files of HBase.
   * If the store is a mob one, the flusher flushes the MemStore into two places.
   * One is the store files of HBase, the other is the mob files.
   * <ol>
   * <li>Cells that are not PUT type or have the delete mark will be directly flushed to
   * HBase.</li>
   * <li>If the size of a cell value is larger than a threshold, it'll be
   * flushed to a mob file, another cell with the path of this file will be flushed to HBase.</li>
   * <li>If the size of a cell value is smaller than or equal with a threshold, it'll be flushed to
   * HBase directly.</li>
   * </ol>
   */
@Override
public List<Path> flushSnapshot(MemStoreSnapshot snapshot, long cacheFlushId, MonitoredTask status, ThroughputController throughputController) throws IOException {
    ArrayList<Path> result = new ArrayList<>();
    long cellsCount = snapshot.getCellsCount();
    // don't flush if there are no entries
    if (cellsCount == 0)
        return result;
    // Use a store scanner to find which rows to flush.
    long smallestReadPoint = store.getSmallestReadPoint();
    InternalScanner scanner = createScanner(snapshot.getScanner(), smallestReadPoint);
    if (scanner == null) {
        // NULL scanner returned from coprocessor hooks means skip normal processing
        return result;
    }
    StoreFileWriter writer;
    try {
        // list of store files. Add cleanup of anything put on filesystem if we fail.
        synchronized (flushLock) {
            status.setStatus("Flushing " + store + ": creating writer");
            // Write the map out to the disk
            writer = store.createWriterInTmp(cellsCount, store.getFamily().getCompressionType(), false, true, true, false, snapshot.getTimeRangeTracker());
            IOException e = null;
            try {
                // It's a mob store, flush the cells in a mob way. This is the difference of flushing
                // between a normal and a mob store.
                performMobFlush(snapshot, cacheFlushId, scanner, writer, status, throughputController);
            } catch (IOException ioe) {
                e = ioe;
                // throw the exception out
                throw ioe;
            } finally {
                if (e != null) {
                    writer.close();
                } else {
                    finalizeWriter(writer, cacheFlushId, status);
                }
            }
        }
    } finally {
        scanner.close();
    }
    LOG.info("Mob store is flushed, sequenceid=" + cacheFlushId + ", memsize=" + StringUtils.TraditionalBinaryPrefix.long2String(snapshot.getDataSize(), "", 1) + ", hasBloomFilter=" + writer.hasGeneralBloom() + ", into tmp file " + writer.getPath());
    result.add(writer.getPath());
    return result;
}
Also used : Path(org.apache.hadoop.fs.Path) StoreFileWriter(org.apache.hadoop.hbase.regionserver.StoreFileWriter) InternalScanner(org.apache.hadoop.hbase.regionserver.InternalScanner) ArrayList(java.util.ArrayList) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException)

Example 62 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class PartitionedMobCompactor method compactDelFiles.

/**
   * Compacts the del files in batches which avoids opening too many files.
   * @param request The compaction request.
   * @param delFilePaths Del file paths to compact
   * @return The paths of new del files after merging or the original files if no merging
   *         is necessary.
   * @throws IOException if IO failure is encountered
   */
protected List<Path> compactDelFiles(PartitionedMobCompactionRequest request, List<Path> delFilePaths) throws IOException {
    if (delFilePaths.size() <= delFileMaxCount) {
        return delFilePaths;
    }
    // when there are more del files than the number that is allowed, merge it firstly.
    int offset = 0;
    List<Path> paths = new ArrayList<>();
    while (offset < delFilePaths.size()) {
        // get the batch
        int batch = compactionBatchSize;
        if (delFilePaths.size() - offset < compactionBatchSize) {
            batch = delFilePaths.size() - offset;
        }
        List<StoreFile> batchedDelFiles = new ArrayList<>();
        if (batch == 1) {
            // only one file left, do not compact it, directly add it to the new files.
            paths.add(delFilePaths.get(offset));
            offset++;
            continue;
        }
        for (int i = offset; i < batch + offset; i++) {
            batchedDelFiles.add(new StoreFile(fs, delFilePaths.get(i), conf, compactionCacheConfig, BloomType.NONE));
        }
        // compact the del files in a batch.
        paths.add(compactDelFilesInBatch(request, batchedDelFiles));
        // move to the next batch.
        offset += batch;
    }
    return compactDelFiles(request, paths);
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) StoreFile(org.apache.hadoop.hbase.regionserver.StoreFile)

Example 63 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class RestoreSnapshotHelper method restoreReferenceFile.

/**
   * Create a new {@link Reference} as copy of the source one.
   * <p><blockquote><pre>
   * The source table looks like:
   *    1234/abc      (original file)
   *    5678/abc.1234 (reference file)
   *
   * After the clone operation looks like:
   *   wxyz/table=1234-abc
   *   stuv/table=1234-abc.wxyz
   *
   * NOTE that the region name in the clone changes (md5 of regioninfo)
   * and the reference should reflect that change.
   * </pre></blockquote>
   * @param familyDir destination directory for the store file
   * @param regionInfo destination region info for the table
   * @param storeFile reference file name
   */
private void restoreReferenceFile(final Path familyDir, final HRegionInfo regionInfo, final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
    String hfileName = storeFile.getName();
    // Extract the referred information (hfile name and parent region)
    Path refPath = StoreFileInfo.getReferredToFile(new Path(new Path(new Path(new Path(snapshotTable.getNamespaceAsString(), snapshotTable.getQualifierAsString()), regionInfo.getEncodedName()), familyDir.getName()), hfileName));
    String snapshotRegionName = refPath.getParent().getParent().getName();
    String fileName = refPath.getName();
    // The new reference should have the cloned region name as parent, if it is a clone.
    String clonedRegionName = Bytes.toString(regionsMap.get(Bytes.toBytes(snapshotRegionName)));
    if (clonedRegionName == null)
        clonedRegionName = snapshotRegionName;
    // The output file should be a reference link table=snapshotRegion-fileName.clonedRegionName
    Path linkPath = null;
    String refLink = fileName;
    if (!HFileLink.isHFileLink(fileName)) {
        refLink = HFileLink.createHFileLinkName(snapshotTable, snapshotRegionName, fileName);
        linkPath = new Path(familyDir, HFileLink.createHFileLinkName(snapshotTable, regionInfo.getEncodedName(), hfileName));
    }
    Path outPath = new Path(familyDir, refLink + '.' + clonedRegionName);
    // Create the new reference
    if (storeFile.hasReference()) {
        Reference reference = Reference.convert(storeFile.getReference());
        reference.write(fs, outPath);
    } else {
        InputStream in;
        if (linkPath != null) {
            in = HFileLink.buildFromHFileLinkPattern(conf, linkPath).open(fs);
        } else {
            linkPath = new Path(new Path(HRegion.getRegionDir(snapshotManifest.getSnapshotDir(), regionInfo.getEncodedName()), familyDir.getName()), hfileName);
            in = fs.open(linkPath);
        }
        OutputStream out = fs.create(outPath);
        IOUtils.copyBytes(in, out, conf);
    }
    // Add the daughter region to the map
    String regionName = Bytes.toString(regionsMap.get(regionInfo.getEncodedNameAsBytes()));
    LOG.debug("Restore reference " + regionName + " to " + clonedRegionName);
    synchronized (parentsMap) {
        Pair<String, String> daughters = parentsMap.get(clonedRegionName);
        if (daughters == null) {
            daughters = new Pair<>(regionName, null);
            parentsMap.put(clonedRegionName, daughters);
        } else if (!regionName.equals(daughters.getFirst())) {
            daughters.setSecond(regionName);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Reference(org.apache.hadoop.hbase.io.Reference) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream)

Example 64 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class RestoreSnapshotHelper method cloneHdfsMobRegion.

/**
   * Clone the mob region. For the region create a new region
   * and create a HFileLink for each hfile.
   */
private void cloneHdfsMobRegion(final Map<String, SnapshotRegionManifest> regionManifests, final HRegionInfo region) throws IOException {
    // clone region info (change embedded tableName with the new one)
    Path clonedRegionPath = MobUtils.getMobRegionPath(conf, tableDesc.getTableName());
    cloneRegion(clonedRegionPath, region, regionManifests.get(region.getEncodedName()));
}
Also used : Path(org.apache.hadoop.fs.Path)

Example 65 with Path

use of org.apache.hadoop.fs.Path in project hbase by apache.

the class RestoreSnapshotHelper method restoreRegion.

/**
   * Restore region by removing files not in the snapshot
   * and adding the missing ones from the snapshot.
   */
private void restoreRegion(final HRegionInfo regionInfo, final SnapshotRegionManifest regionManifest, Path regionDir) throws IOException {
    Map<String, List<SnapshotRegionManifest.StoreFile>> snapshotFiles = getRegionHFileReferences(regionManifest);
    String tableName = tableDesc.getTableName().getNameAsString();
    // Restore families present in the table
    for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) {
        byte[] family = Bytes.toBytes(familyDir.getName());
        Set<String> familyFiles = getTableRegionFamilyFiles(familyDir);
        List<SnapshotRegionManifest.StoreFile> snapshotFamilyFiles = snapshotFiles.remove(familyDir.getName());
        if (snapshotFamilyFiles != null) {
            List<SnapshotRegionManifest.StoreFile> hfilesToAdd = new ArrayList<>();
            for (SnapshotRegionManifest.StoreFile storeFile : snapshotFamilyFiles) {
                if (familyFiles.contains(storeFile.getName())) {
                    // HFile already present
                    familyFiles.remove(storeFile.getName());
                } else {
                    // HFile missing
                    hfilesToAdd.add(storeFile);
                }
            }
            // Remove hfiles not present in the snapshot
            for (String hfileName : familyFiles) {
                Path hfile = new Path(familyDir, hfileName);
                LOG.trace("Removing hfile=" + hfileName + " from region=" + regionInfo.getEncodedName() + " table=" + tableName);
                HFileArchiver.archiveStoreFile(conf, fs, regionInfo, tableDir, family, hfile);
            }
            // Restore Missing files
            for (SnapshotRegionManifest.StoreFile storeFile : hfilesToAdd) {
                LOG.debug("Adding HFileLink " + storeFile.getName() + " to region=" + regionInfo.getEncodedName() + " table=" + tableName);
                restoreStoreFile(familyDir, regionInfo, storeFile, createBackRefs);
            }
        } else {
            // Family doesn't exists in the snapshot
            LOG.trace("Removing family=" + Bytes.toString(family) + " from region=" + regionInfo.getEncodedName() + " table=" + tableName);
            HFileArchiver.archiveFamilyByFamilyDir(fs, conf, regionInfo, familyDir, family);
            fs.delete(familyDir, true);
        }
    }
    // Add families not present in the table
    for (Map.Entry<String, List<SnapshotRegionManifest.StoreFile>> familyEntry : snapshotFiles.entrySet()) {
        Path familyDir = new Path(regionDir, familyEntry.getKey());
        if (!fs.mkdirs(familyDir)) {
            throw new IOException("Unable to create familyDir=" + familyDir);
        }
        for (SnapshotRegionManifest.StoreFile storeFile : familyEntry.getValue()) {
            LOG.trace("Adding HFileLink " + storeFile.getName() + " to table=" + tableName);
            restoreStoreFile(familyDir, regionInfo, storeFile, createBackRefs);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) SnapshotRegionManifest(org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest) IOException(java.io.IOException) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) TreeMap(java.util.TreeMap)

Aggregations

Path (org.apache.hadoop.fs.Path)11752 Test (org.junit.Test)4193 FileSystem (org.apache.hadoop.fs.FileSystem)3587 IOException (java.io.IOException)2631 Configuration (org.apache.hadoop.conf.Configuration)2621 FileStatus (org.apache.hadoop.fs.FileStatus)1568 ArrayList (java.util.ArrayList)1145 File (java.io.File)987 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)924 HashMap (java.util.HashMap)570 Job (org.apache.hadoop.mapreduce.Job)492 JobConf (org.apache.hadoop.mapred.JobConf)477 URI (java.net.URI)465 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)462 FileNotFoundException (java.io.FileNotFoundException)441 FsPermission (org.apache.hadoop.fs.permission.FsPermission)375 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)362 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)355 Map (java.util.Map)326 List (java.util.List)316