Search in sources :

Example 6 with HFileLink

use of org.apache.hadoop.hbase.io.HFileLink in project hbase by apache.

the class PartitionedMobCompactor method select.

/**
   * Selects the compacted mob/del files.
   * Iterates the candidates to find out all the del files and small mob files.
   * @param candidates All the candidates.
   * @param allFiles Whether add all mob files into the compaction.
   * @return A compaction request.
   * @throws IOException if IO failure is encountered
   */
protected PartitionedMobCompactionRequest select(List<FileStatus> candidates, boolean allFiles) throws IOException {
    final Map<CompactionPartitionId, CompactionPartition> filesToCompact = new HashMap<>();
    final CompactionPartitionId id = new CompactionPartitionId();
    final NavigableMap<CompactionDelPartitionId, CompactionDelPartition> delFilesToCompact = new TreeMap<>();
    final CompactionDelPartitionId delId = new CompactionDelPartitionId();
    final ArrayList<CompactionDelPartition> allDelPartitions = new ArrayList<>();
    int selectedFileCount = 0;
    int irrelevantFileCount = 0;
    int totalDelFiles = 0;
    MobCompactPartitionPolicy policy = column.getMobCompactPartitionPolicy();
    Calendar calendar = Calendar.getInstance();
    Date currentDate = new Date();
    Date firstDayOfCurrentMonth = null;
    Date firstDayOfCurrentWeek = null;
    if (policy == MobCompactPartitionPolicy.MONTHLY) {
        firstDayOfCurrentMonth = MobUtils.getFirstDayOfMonth(calendar, currentDate);
        firstDayOfCurrentWeek = MobUtils.getFirstDayOfWeek(calendar, currentDate);
    } else if (policy == MobCompactPartitionPolicy.WEEKLY) {
        firstDayOfCurrentWeek = MobUtils.getFirstDayOfWeek(calendar, currentDate);
    }
    // We check if there is any del files so the logic can be optimized for the following processing
    // First step is to check if there is any delete files. If there is any delete files,
    // For each Partition, it needs to read its startKey and endKey from files.
    // If there is no delete file, there is no need to read startKey and endKey from files, this
    // is an optimization.
    boolean withDelFiles = false;
    for (FileStatus file : candidates) {
        if (!file.isFile()) {
            continue;
        }
        // group the del files and small files.
        FileStatus linkedFile = file;
        if (HFileLink.isHFileLink(file.getPath())) {
            HFileLink link = HFileLink.buildFromHFileLinkPattern(conf, file.getPath());
            linkedFile = getLinkedFileStatus(link);
            if (linkedFile == null) {
                continue;
            }
        }
        if (StoreFileInfo.isDelFile(linkedFile.getPath())) {
            withDelFiles = true;
            break;
        }
    }
    for (FileStatus file : candidates) {
        if (!file.isFile()) {
            irrelevantFileCount++;
            continue;
        }
        // group the del files and small files.
        FileStatus linkedFile = file;
        if (HFileLink.isHFileLink(file.getPath())) {
            HFileLink link = HFileLink.buildFromHFileLinkPattern(conf, file.getPath());
            linkedFile = getLinkedFileStatus(link);
            if (linkedFile == null) {
                // If the linked file cannot be found, regard it as an irrelevantFileCount file
                irrelevantFileCount++;
                continue;
            }
        }
        if (withDelFiles && StoreFileInfo.isDelFile(linkedFile.getPath())) {
            // File in the Del Partition List
            // Get delId from the file
            Reader reader = HFile.createReader(fs, linkedFile.getPath(), CacheConfig.DISABLED, conf);
            try {
                delId.setStartKey(reader.getFirstRowKey());
                delId.setEndKey(reader.getLastRowKey());
            } finally {
                reader.close();
            }
            CompactionDelPartition delPartition = delFilesToCompact.get(delId);
            if (delPartition == null) {
                CompactionDelPartitionId newDelId = new CompactionDelPartitionId(delId.getStartKey(), delId.getEndKey());
                delPartition = new CompactionDelPartition(newDelId);
                delFilesToCompact.put(newDelId, delPartition);
            }
            delPartition.addDelFile(file);
            totalDelFiles++;
        } else {
            String fileName = linkedFile.getPath().getName();
            String date = MobFileName.getDateFromName(fileName);
            boolean skipCompaction = MobUtils.fillPartitionId(id, firstDayOfCurrentMonth, firstDayOfCurrentWeek, date, policy, calendar, mergeableSize);
            if (allFiles || (!skipCompaction && (linkedFile.getLen() < id.getThreshold()))) {
                // add all files if allFiles is true,
                // otherwise add the small files to the merge pool
                // filter out files which are not supposed to be compacted with the
                // current policy
                id.setStartKey(MobFileName.getStartKeyFromName(fileName));
                CompactionPartition compactionPartition = filesToCompact.get(id);
                if (compactionPartition == null) {
                    CompactionPartitionId newId = new CompactionPartitionId(id.getStartKey(), id.getDate());
                    compactionPartition = new CompactionPartition(newId);
                    compactionPartition.addFile(file);
                    filesToCompact.put(newId, compactionPartition);
                    newId.updateLatestDate(date);
                } else {
                    compactionPartition.addFile(file);
                    compactionPartition.getPartitionId().updateLatestDate(date);
                }
                if (withDelFiles) {
                    // get startKey and endKey from the file and update partition
                    // TODO: is it possible to skip read of most hfiles?
                    Reader reader = HFile.createReader(fs, linkedFile.getPath(), CacheConfig.DISABLED, conf);
                    try {
                        compactionPartition.setStartKey(reader.getFirstRowKey());
                        compactionPartition.setEndKey(reader.getLastRowKey());
                    } finally {
                        reader.close();
                    }
                }
                selectedFileCount++;
            }
        }
    }
    /*
     * Merge del files so there are only non-overlapped del file lists
     */
    for (Map.Entry<CompactionDelPartitionId, CompactionDelPartition> entry : delFilesToCompact.entrySet()) {
        if (allDelPartitions.size() > 0) {
            // check if the current key range overlaps the previous one
            CompactionDelPartition prev = allDelPartitions.get(allDelPartitions.size() - 1);
            if (Bytes.compareTo(prev.getId().getEndKey(), entry.getKey().getStartKey()) >= 0) {
                // merge them together
                prev.getId().setEndKey(entry.getValue().getId().getEndKey());
                prev.addDelFileList(entry.getValue().listDelFiles());
            } else {
                allDelPartitions.add(entry.getValue());
            }
        } else {
            allDelPartitions.add(entry.getValue());
        }
    }
    PartitionedMobCompactionRequest request = new PartitionedMobCompactionRequest(filesToCompact.values(), allDelPartitions);
    if (candidates.size() == (totalDelFiles + selectedFileCount + irrelevantFileCount)) {
        // all the files are selected
        request.setCompactionType(CompactionType.ALL_FILES);
    }
    LOG.info("The compaction type is " + request.getCompactionType() + ", the request has " + totalDelFiles + " del files, " + selectedFileCount + " selected files, and " + irrelevantFileCount + " irrelevant files");
    return request;
}
Also used : CompactionDelPartitionId(org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionDelPartitionId) HFileLink(org.apache.hadoop.hbase.io.HFileLink) CompactionPartition(org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionPartition) FileStatus(org.apache.hadoop.fs.FileStatus) HashMap(java.util.HashMap) Calendar(java.util.Calendar) ArrayList(java.util.ArrayList) Reader(org.apache.hadoop.hbase.io.hfile.HFile.Reader) CompactionPartitionId(org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionPartitionId) TreeMap(java.util.TreeMap) Date(java.util.Date) CompactionDelPartition(org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionDelPartition) MobCompactPartitionPolicy(org.apache.hadoop.hbase.client.MobCompactPartitionPolicy) Map(java.util.Map) NavigableMap(java.util.NavigableMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap)

Example 7 with HFileLink

use of org.apache.hadoop.hbase.io.HFileLink in project hbase by apache.

the class SnapshotTestingUtils method corruptSnapshot.

/**
   * Corrupt the specified snapshot by deleting some files.
   *
   * @param util {@link HBaseTestingUtility}
   * @param snapshotName name of the snapshot to corrupt
   * @return array of the corrupted HFiles
   * @throws IOException on unexecpted error reading the FS
   */
public static ArrayList corruptSnapshot(final HBaseTestingUtility util, final String snapshotName) throws IOException {
    final MasterFileSystem mfs = util.getHBaseCluster().getMaster().getMasterFileSystem();
    final FileSystem fs = mfs.getFileSystem();
    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, mfs.getRootDir());
    HBaseProtos.SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
    final TableName table = TableName.valueOf(snapshotDesc.getTable());
    final ArrayList corruptedFiles = new ArrayList();
    final Configuration conf = util.getConfiguration();
    SnapshotReferenceUtil.visitTableStoreFiles(conf, fs, snapshotDir, snapshotDesc, new SnapshotReferenceUtil.StoreFileVisitor() {

        @Override
        public void storeFile(final HRegionInfo regionInfo, final String family, final SnapshotRegionManifest.StoreFile storeFile) throws IOException {
            String region = regionInfo.getEncodedName();
            String hfile = storeFile.getName();
            HFileLink link = HFileLink.build(conf, table, region, family, hfile);
            if (corruptedFiles.size() % 2 == 0) {
                fs.delete(link.getAvailablePath(fs), true);
                corruptedFiles.add(hfile);
            }
        }
    });
    assertTrue(corruptedFiles.size() > 0);
    return corruptedFiles;
}
Also used : MasterFileSystem(org.apache.hadoop.hbase.master.MasterFileSystem) Path(org.apache.hadoop.fs.Path) HFileLink(org.apache.hadoop.hbase.io.HFileLink) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) SnapshotRegionManifest(org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest) IOException(java.io.IOException) HBaseProtos(org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TableName(org.apache.hadoop.hbase.TableName) FileSystem(org.apache.hadoop.fs.FileSystem) MasterFileSystem(org.apache.hadoop.hbase.master.MasterFileSystem) HRegionFileSystem(org.apache.hadoop.hbase.regionserver.HRegionFileSystem)

Example 8 with HFileLink

use of org.apache.hadoop.hbase.io.HFileLink in project hbase by apache.

the class HBaseFsck method offlineHLinkFileRepair.

/**
   * Scan all the store file names to find any lingering HFileLink files,
   * which refer to some none-exiting files. If "fix" option is enabled,
   * any lingering HFileLink file will be sidelined if found.
   */
private void offlineHLinkFileRepair() throws IOException, InterruptedException {
    Configuration conf = getConf();
    Path hbaseRoot = FSUtils.getRootDir(conf);
    FileSystem fs = hbaseRoot.getFileSystem(conf);
    LOG.info("Computing mapping of all link files");
    Map<String, Path> allFiles = FSUtils.getTableStoreFilePathMap(fs, hbaseRoot, new FSUtils.HFileLinkFilter(), executor, errors);
    errors.print("");
    LOG.info("Validating mapping using HDFS state");
    for (Path path : allFiles.values()) {
        // building HFileLink object to gather locations
        HFileLink actualLink = HFileLink.buildFromHFileLinkPattern(conf, path);
        // good, expected
        if (actualLink.exists(fs))
            continue;
        // Found a lingering HFileLink
        errors.reportError(ERROR_CODE.LINGERING_HFILELINK, "Found lingering HFileLink " + path);
        if (!shouldFixHFileLinks())
            continue;
        // Now, trying to fix it since requested
        setShouldRerun();
        // An HFileLink path should be like
        // ${hbase.rootdir}/data/namespace/table_name/region_id/family_name/linkedtable=linkedregionname-linkedhfilename
        // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
        boolean success = sidelineFile(fs, hbaseRoot, path);
        if (!success) {
            LOG.error("Failed to sideline HFileLink file " + path);
        }
        // An HFileLink backreference path should be like
        // ${hbase.rootdir}/archive/data/namespace/table_name/region_id/family_name/.links-linkedhfilename
        // sidelineing will happen in the ${hbase.rootdir}/${sidelinedir} directory with the same folder structure.
        Path backRefPath = FileLink.getBackReferencesDir(HFileArchiveUtil.getStoreArchivePath(conf, HFileLink.getReferencedTableName(path.getName().toString()), HFileLink.getReferencedRegionName(path.getName().toString()), path.getParent().getName()), HFileLink.getReferencedHFileName(path.getName().toString()));
        success = sidelineFile(fs, hbaseRoot, backRefPath);
        if (!success) {
            LOG.error("Failed to sideline HFileLink backreference file " + path);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) HFileLink(org.apache.hadoop.hbase.io.HFileLink) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) FileSystem(org.apache.hadoop.fs.FileSystem) MasterFileSystem(org.apache.hadoop.hbase.master.MasterFileSystem) HRegionFileSystem(org.apache.hadoop.hbase.regionserver.HRegionFileSystem)

Example 9 with HFileLink

use of org.apache.hadoop.hbase.io.HFileLink in project hbase by apache.

the class TestStoreFileInfo method testEqualsWithLink.

@Test
public void testEqualsWithLink() throws IOException {
    Path origin = new Path("/origin");
    Path tmp = TEST_UTIL.getDataTestDir();
    Path mob = new Path("/mob");
    Path archive = new Path("/archive");
    HFileLink link1 = new HFileLink(new Path(origin, "f1"), new Path(tmp, "f1"), new Path(mob, "f1"), new Path(archive, "f1"));
    HFileLink link2 = new HFileLink(new Path(origin, "f1"), new Path(tmp, "f1"), new Path(mob, "f1"), new Path(archive, "f1"));
    StoreFileInfo info1 = new StoreFileInfo(TEST_UTIL.getConfiguration(), TEST_UTIL.getTestFileSystem(), null, link1);
    StoreFileInfo info2 = new StoreFileInfo(TEST_UTIL.getConfiguration(), TEST_UTIL.getTestFileSystem(), null, link2);
    assertEquals(info1, info2);
    assertEquals(info1.hashCode(), info2.hashCode());
}
Also used : Path(org.apache.hadoop.fs.Path) HFileLink(org.apache.hadoop.hbase.io.HFileLink) Test(org.junit.Test)

Aggregations

HFileLink (org.apache.hadoop.hbase.io.HFileLink)9 Path (org.apache.hadoop.fs.Path)7 IOException (java.io.IOException)4 FileSystem (org.apache.hadoop.fs.FileSystem)4 ArrayList (java.util.ArrayList)3 Configuration (org.apache.hadoop.conf.Configuration)3 FileStatus (org.apache.hadoop.fs.FileStatus)3 TableName (org.apache.hadoop.hbase.TableName)3 FileNotFoundException (java.io.FileNotFoundException)2 Calendar (java.util.Calendar)2 Date (java.util.Date)2 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)2 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)2 MasterFileSystem (org.apache.hadoop.hbase.master.MasterFileSystem)2 HRegionFileSystem (org.apache.hadoop.hbase.regionserver.HRegionFileSystem)2 HBaseProtos (org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos)2 SnapshotRegionManifest (org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest)2 Test (org.junit.Test)2 ParseException (java.text.ParseException)1 HashMap (java.util.HashMap)1