Search in sources :

Example 6 with HFile

use of org.apache.hadoop.hbase.io.hfile.HFile in project hbase by apache.

the class TestImportTsv method getKVCountFromHfile.

/**
   * Method returns the total KVs in given hfile
   * @param fs File System
   * @param p HFile path
   * @return KV count in the given hfile
   * @throws IOException
   */
private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
    Configuration conf = util.getConfiguration();
    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), conf);
    reader.loadFileInfo();
    HFileScanner scanner = reader.getScanner(false, false);
    scanner.seekTo();
    int count = 0;
    do {
        count++;
    } while (scanner.next());
    reader.close();
    return count;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HFileScanner(org.apache.hadoop.hbase.io.hfile.HFileScanner) HFile(org.apache.hadoop.hbase.io.hfile.HFile) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig)

Example 7 with HFile

use of org.apache.hadoop.hbase.io.hfile.HFile in project hbase by apache.

the class TestMobStoreCompaction method createHFile.

/**
   * Create an HFile with the given number of bytes
   */
private void createHFile(Path path, int rowIdx, byte[] dummyData) throws IOException {
    HFileContext meta = new HFileContextBuilder().build();
    HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path).withFileContext(meta).create();
    long now = System.currentTimeMillis();
    try {
        KeyValue kv = new KeyValue(Bytes.add(STARTROW, Bytes.toBytes(rowIdx)), COLUMN_FAMILY, Bytes.toBytes("colX"), now, dummyData);
        writer.append(kv);
    } finally {
        writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
        writer.close();
    }
}
Also used : HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) HFile(org.apache.hadoop.hbase.io.hfile.HFile) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext)

Example 8 with HFile

use of org.apache.hadoop.hbase.io.hfile.HFile in project hbase by apache.

the class TestHBaseFsckTwoRS method testLingeringHFileLinks.

/**
   * Test fixing lingering HFileLinks.
   */
@Test(timeout = 180000)
public void testLingeringHFileLinks() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    try {
        setupTable(tableName);
        FileSystem fs = FileSystem.get(conf);
        Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), tableName);
        Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
        String regionName = regionDir.getName();
        Path famDir = new Path(regionDir, FAM_STR);
        String HFILE_NAME = "01234567abcd";
        Path hFilePath = new Path(famDir, HFILE_NAME);
        // creating HFile
        HFileContext context = new HFileContextBuilder().withIncludesTags(false).build();
        HFile.Writer w = HFile.getWriterFactoryNoCache(conf).withPath(fs, hFilePath).withFileContext(context).create();
        w.close();
        HFileLink.create(conf, fs, famDir, tableName, regionName, HFILE_NAME);
        // should report no error
        HBaseFsck hbck = doFsck(conf, false);
        assertNoErrors(hbck);
        // Delete linked file
        fs.delete(hFilePath, true);
        // Check without fix should show the error
        hbck = doFsck(conf, false);
        assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
        // Fixing the error
        hbck = doFsck(conf, true);
        assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
        // Fix should sideline these files, thus preventing the error
        hbck = doFsck(conf, false);
        assertNoErrors(hbck);
    } finally {
        cleanupTable(tableName);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) HFile(org.apache.hadoop.hbase.io.hfile.HFile) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) Test(org.junit.Test)

Example 9 with HFile

use of org.apache.hadoop.hbase.io.hfile.HFile in project hbase by apache.

the class RestoreTool method generateBoundaryKeys.

/**
   * Calculate region boundaries and add all the column families to the table descriptor
   * @param regionDirList region dir list
   * @return a set of keys to store the boundaries
   */
byte[][] generateBoundaryKeys(ArrayList<Path> regionDirList) throws FileNotFoundException, IOException {
    TreeMap<byte[], Integer> map = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    // calculate region boundaries and add all the column families to the table descriptor
    for (Path regionDir : regionDirList) {
        LOG.debug("Parsing region dir: " + regionDir);
        Path hfofDir = regionDir;
        if (!fs.exists(hfofDir)) {
            LOG.warn("HFileOutputFormat dir " + hfofDir + " not found");
        }
        FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
        if (familyDirStatuses == null) {
            throw new IOException("No families found in " + hfofDir);
        }
        for (FileStatus stat : familyDirStatuses) {
            if (!stat.isDirectory()) {
                LOG.warn("Skipping non-directory " + stat.getPath());
                continue;
            }
            boolean isIgnore = false;
            String pathName = stat.getPath().getName();
            for (String ignore : ignoreDirs) {
                if (pathName.contains(ignore)) {
                    LOG.warn("Skipping non-family directory" + pathName);
                    isIgnore = true;
                    break;
                }
            }
            if (isIgnore) {
                continue;
            }
            Path familyDir = stat.getPath();
            LOG.debug("Parsing family dir [" + familyDir.toString() + " in region [" + regionDir + "]");
            // Skip _logs, etc
            if (familyDir.getName().startsWith("_") || familyDir.getName().startsWith(".")) {
                continue;
            }
            // start to parse hfile inside one family dir
            Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
            for (Path hfile : hfiles) {
                if (hfile.getName().startsWith("_") || hfile.getName().startsWith(".") || StoreFileInfo.isReference(hfile.getName()) || HFileLink.isHFileLink(hfile.getName())) {
                    continue;
                }
                HFile.Reader reader = HFile.createReader(fs, hfile, conf);
                final byte[] first, last;
                try {
                    reader.loadFileInfo();
                    first = reader.getFirstRowKey();
                    last = reader.getLastRowKey();
                    LOG.debug("Trying to figure out region boundaries hfile=" + hfile + " first=" + Bytes.toStringBinary(first) + " last=" + Bytes.toStringBinary(last));
                    // To eventually infer start key-end key boundaries
                    Integer value = map.containsKey(first) ? (Integer) map.get(first) : 0;
                    map.put(first, value + 1);
                    value = map.containsKey(last) ? (Integer) map.get(last) : 0;
                    map.put(last, value - 1);
                } finally {
                    reader.close();
                }
            }
        }
    }
    return LoadIncrementalHFiles.inferBoundaries(map);
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) IOException(java.io.IOException) TreeMap(java.util.TreeMap) HFile(org.apache.hadoop.hbase.io.hfile.HFile)

Example 10 with HFile

use of org.apache.hadoop.hbase.io.hfile.HFile in project hbase by apache.

the class TestHalfStoreFileReader method testHalfScanAndReseek.

/**
   * Test the scanner and reseek of a half hfile scanner. The scanner API
   * demands that seekTo and reseekTo() only return < 0 if the key lies
   * before the start of the file (with no position on the scanner). Returning
   * 0 if perfect match (rare), and return > 1 if we got an imperfect match.
   *
   * The latter case being the most common, we should generally be returning 1,
   * and if we do, there may or may not be a 'next' in the scanner/file.
   *
   * A bug in the half file scanner was returning -1 at the end of the bottom
   * half, and that was causing the infrastructure above to go null causing NPEs
   * and other problems.  This test reproduces that failure, and also tests
   * both the bottom and top of the file while we are at it.
   *
   * @throws IOException
   */
@Test
public void testHalfScanAndReseek() throws IOException {
    String root_dir = TEST_UTIL.getDataTestDir().toString();
    Path p = new Path(root_dir, "test");
    Configuration conf = TEST_UTIL.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    CacheConfig cacheConf = new CacheConfig(conf);
    HFileContext meta = new HFileContextBuilder().withBlockSize(1024).build();
    HFile.Writer w = HFile.getWriterFactory(conf, cacheConf).withPath(fs, p).withFileContext(meta).create();
    // write some things.
    List<KeyValue> items = genSomeKeys();
    for (KeyValue kv : items) {
        w.append(kv);
    }
    w.close();
    HFile.Reader r = HFile.createReader(fs, p, cacheConf, conf);
    r.loadFileInfo();
    Cell midKV = r.midkey();
    byte[] midkey = CellUtil.cloneRow(midKV);
    //System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey));
    Reference bottom = new Reference(midkey, Reference.Range.bottom);
    doTestOfScanAndReseek(p, fs, bottom, cacheConf);
    Reference top = new Reference(midkey, Reference.Range.top);
    doTestOfScanAndReseek(p, fs, top, cacheConf);
    r.close();
}
Also used : Path(org.apache.hadoop.fs.Path) KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) FileSystem(org.apache.hadoop.fs.FileSystem) HFile(org.apache.hadoop.hbase.io.hfile.HFile) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Cell(org.apache.hadoop.hbase.Cell) Test(org.junit.Test)

Aggregations

HFile (org.apache.hadoop.hbase.io.hfile.HFile)14 CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)11 Path (org.apache.hadoop.fs.Path)8 Configuration (org.apache.hadoop.conf.Configuration)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 Cell (org.apache.hadoop.hbase.Cell)6 KeyValue (org.apache.hadoop.hbase.KeyValue)6 HFileContext (org.apache.hadoop.hbase.io.hfile.HFileContext)6 HFileContextBuilder (org.apache.hadoop.hbase.io.hfile.HFileContextBuilder)6 Test (org.junit.Test)5 HFileScanner (org.apache.hadoop.hbase.io.hfile.HFileScanner)4 IOException (java.io.IOException)3 FileStatus (org.apache.hadoop.fs.FileStatus)3 InterruptedIOException (java.io.InterruptedIOException)2 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)2 ArrayBackedTag (org.apache.hadoop.hbase.ArrayBackedTag)2 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)2 Tag (org.apache.hadoop.hbase.Tag)2 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)2 Reader (org.apache.hadoop.hbase.io.hfile.HFile.Reader)2