Search in sources :

Example 16 with HFileScanner

use of org.apache.hadoop.hbase.io.hfile.HFileScanner in project hbase by apache.

the class HStore method assertBulkLoadHFileOk.

/**
   * This throws a WrongRegionException if the HFile does not fit in this region, or an
   * InvalidHFileException if the HFile is not valid.
   */
public void assertBulkLoadHFileOk(Path srcPath) throws IOException {
    HFile.Reader reader = null;
    try {
        LOG.info("Validating hfile at " + srcPath + " for inclusion in " + "store " + this + " region " + this.getRegionInfo().getRegionNameAsString());
        reader = HFile.createReader(srcPath.getFileSystem(conf), srcPath, cacheConf, conf);
        reader.loadFileInfo();
        byte[] firstKey = reader.getFirstRowKey();
        Preconditions.checkState(firstKey != null, "First key can not be null");
        Cell lk = reader.getLastKey();
        Preconditions.checkState(lk != null, "Last key can not be null");
        byte[] lastKey = CellUtil.cloneRow(lk);
        LOG.debug("HFile bounds: first=" + Bytes.toStringBinary(firstKey) + " last=" + Bytes.toStringBinary(lastKey));
        LOG.debug("Region bounds: first=" + Bytes.toStringBinary(getRegionInfo().getStartKey()) + " last=" + Bytes.toStringBinary(getRegionInfo().getEndKey()));
        if (!this.getRegionInfo().containsRange(firstKey, lastKey)) {
            throw new WrongRegionException("Bulk load file " + srcPath.toString() + " does not fit inside region " + this.getRegionInfo().getRegionNameAsString());
        }
        if (reader.length() > conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE)) {
            LOG.warn("Trying to bulk load hfile " + srcPath.toString() + " with size: " + reader.length() + " bytes can be problematic as it may lead to oversplitting.");
        }
        if (verifyBulkLoads) {
            long verificationStartTime = EnvironmentEdgeManager.currentTime();
            LOG.info("Full verification started for bulk load hfile: " + srcPath.toString());
            Cell prevCell = null;
            HFileScanner scanner = reader.getScanner(false, false, false);
            scanner.seekTo();
            do {
                Cell cell = scanner.getCell();
                if (prevCell != null) {
                    if (comparator.compareRows(prevCell, cell) > 0) {
                        throw new InvalidHFileException("Previous row is greater than" + " current row: path=" + srcPath + " previous=" + CellUtil.getCellKeyAsString(prevCell) + " current=" + CellUtil.getCellKeyAsString(cell));
                    }
                    if (CellComparator.compareFamilies(prevCell, cell) != 0) {
                        throw new InvalidHFileException("Previous key had different" + " family compared to current key: path=" + srcPath + " previous=" + Bytes.toStringBinary(prevCell.getFamilyArray(), prevCell.getFamilyOffset(), prevCell.getFamilyLength()) + " current=" + Bytes.toStringBinary(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()));
                    }
                }
                prevCell = cell;
            } while (scanner.next());
            LOG.info("Full verification complete for bulk load hfile: " + srcPath.toString() + " took " + (EnvironmentEdgeManager.currentTime() - verificationStartTime) + " ms");
        }
    } finally {
        if (reader != null)
            reader.close();
    }
}
Also used : InvalidHFileException(org.apache.hadoop.hbase.io.hfile.InvalidHFileException) HFileScanner(org.apache.hadoop.hbase.io.hfile.HFileScanner) HFile(org.apache.hadoop.hbase.io.hfile.HFile)

Example 17 with HFileScanner

use of org.apache.hadoop.hbase.io.hfile.HFileScanner in project hbase by apache.

the class TestStoreFile method testHFileLink.

@Test
public void testHFileLink() throws IOException {
    final HRegionInfo hri = new HRegionInfo(TableName.valueOf("testHFileLinkTb"));
    // force temp data in hbase/target/test-data instead of /tmp/hbase-xxxx/
    Configuration testConf = new Configuration(this.conf);
    FSUtils.setRootDir(testConf, testDir);
    HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, FSUtils.getTableDir(testDir, hri.getTable()), hri);
    HFileContext meta = new HFileContextBuilder().withBlockSize(8 * 1024).build();
    // Make a store file and write data to it.
    StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf, this.fs).withFilePath(regionFs.createTempName()).withFileContext(meta).build();
    writeStoreFile(writer);
    Path storeFilePath = regionFs.commitStoreFile(TEST_FAMILY, writer.getPath());
    Path dstPath = new Path(regionFs.getTableDir(), new Path("test-region", TEST_FAMILY));
    HFileLink.create(testConf, this.fs, dstPath, hri, storeFilePath.getName());
    Path linkFilePath = new Path(dstPath, HFileLink.createHFileLinkName(hri, storeFilePath.getName()));
    // Try to open store file from link
    StoreFileInfo storeFileInfo = new StoreFileInfo(testConf, this.fs, linkFilePath);
    StoreFile hsf = new StoreFile(this.fs, storeFileInfo, testConf, cacheConf, BloomType.NONE);
    assertTrue(storeFileInfo.isLink());
    // Now confirm that I can read from the link
    int count = 1;
    HFileScanner s = hsf.createReader().getScanner(false, false);
    s.seekTo();
    while (s.next()) {
        count++;
    }
    assertEquals((LAST_CHAR - FIRST_CHAR + 1) * (LAST_CHAR - FIRST_CHAR + 1), count);
}
Also used : HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) HFileScanner(org.apache.hadoop.hbase.io.hfile.HFileScanner) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) Test(org.junit.Test)

Aggregations

HFileScanner (org.apache.hadoop.hbase.io.hfile.HFileScanner)17 CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)8 Configuration (org.apache.hadoop.conf.Configuration)7 HFile (org.apache.hadoop.hbase.io.hfile.HFile)7 Path (org.apache.hadoop.fs.Path)6 Cell (org.apache.hadoop.hbase.Cell)6 HFileContext (org.apache.hadoop.hbase.io.hfile.HFileContext)6 HFileContextBuilder (org.apache.hadoop.hbase.io.hfile.HFileContextBuilder)6 Test (org.junit.Test)6 HRegionInfo (org.apache.hadoop.hbase.HRegionInfo)4 FileSystem (org.apache.hadoop.fs.FileSystem)3 KeyValue (org.apache.hadoop.hbase.KeyValue)3 IOException (java.io.IOException)2 ByteBuffer (java.nio.ByteBuffer)2 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 FilterFileSystem (org.apache.hadoop.fs.FilterFileSystem)1