Search in sources :

Example 1 with HFile

use of org.apache.hadoop.hbase.io.hfile.HFile in project hbase by apache.

the class TestMobStoreCompaction method createHFile.

/**
   * Create an HFile with the given number of bytes
   */
private void createHFile(Path path, int rowIdx, byte[] dummyData) throws IOException {
    HFileContext meta = new HFileContextBuilder().build();
    HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path).withFileContext(meta).create();
    long now = System.currentTimeMillis();
    try {
        KeyValue kv = new KeyValue(Bytes.add(STARTROW, Bytes.toBytes(rowIdx)), COLUMN_FAMILY, Bytes.toBytes("colX"), now, dummyData);
        writer.append(kv);
    } finally {
        writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis()));
        writer.close();
    }
}
Also used : HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) HFile(org.apache.hadoop.hbase.io.hfile.HFile) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext)

Example 2 with HFile

use of org.apache.hadoop.hbase.io.hfile.HFile in project hbase by apache.

the class TestHBaseFsckTwoRS method testLingeringHFileLinks.

/**
   * Test fixing lingering HFileLinks.
   */
@Test(timeout = 180000)
public void testLingeringHFileLinks() throws Exception {
    final TableName tableName = TableName.valueOf(name.getMethodName());
    try {
        setupTable(tableName);
        FileSystem fs = FileSystem.get(conf);
        Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(conf), tableName);
        Path regionDir = FSUtils.getRegionDirs(fs, tableDir).get(0);
        String regionName = regionDir.getName();
        Path famDir = new Path(regionDir, FAM_STR);
        String HFILE_NAME = "01234567abcd";
        Path hFilePath = new Path(famDir, HFILE_NAME);
        // creating HFile
        HFileContext context = new HFileContextBuilder().withIncludesTags(false).build();
        HFile.Writer w = HFile.getWriterFactoryNoCache(conf).withPath(fs, hFilePath).withFileContext(context).create();
        w.close();
        HFileLink.create(conf, fs, famDir, tableName, regionName, HFILE_NAME);
        // should report no error
        HBaseFsck hbck = doFsck(conf, false);
        assertNoErrors(hbck);
        // Delete linked file
        fs.delete(hFilePath, true);
        // Check without fix should show the error
        hbck = doFsck(conf, false);
        assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
        // Fixing the error
        hbck = doFsck(conf, true);
        assertErrors(hbck, new HBaseFsck.ErrorReporter.ERROR_CODE[] { HBaseFsck.ErrorReporter.ERROR_CODE.LINGERING_HFILELINK });
        // Fix should sideline these files, thus preventing the error
        hbck = doFsck(conf, false);
        assertNoErrors(hbck);
    } finally {
        cleanupTable(tableName);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) HFile(org.apache.hadoop.hbase.io.hfile.HFile) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) Test(org.junit.Test)

Example 3 with HFile

use of org.apache.hadoop.hbase.io.hfile.HFile in project hbase by apache.

the class LoadIncrementalHFiles method groupOrSplit.

/**
   * Attempt to assign the given load queue item into its target region group.
   * If the hfile boundary no longer fits into a region, physically splits
   * the hfile such that the new bottom half will fit and returns the list of
   * LQI's corresponding to the resultant hfiles.
   *
   * protected for testing
   * @throws IOException if an IO failure is encountered
   */
protected Pair<List<LoadQueueItem>, String> groupOrSplit(Multimap<ByteBuffer, LoadQueueItem> regionGroups, final LoadQueueItem item, final Table table, final Pair<byte[][], byte[][]> startEndKeys) throws IOException {
    final Path hfilePath = item.hfilePath;
    // fs is the source filesystem
    if (fs == null) {
        fs = hfilePath.getFileSystem(getConf());
    }
    HFile.Reader hfr = null;
    try {
        hfr = HFile.createReader(fs, hfilePath, new CacheConfig(getConf()), getConf());
    } catch (FileNotFoundException fnfe) {
        LOG.debug("encountered", fnfe);
        return new Pair<>(null, hfilePath.getName());
    }
    final byte[] first, last;
    try {
        hfr.loadFileInfo();
        first = hfr.getFirstRowKey();
        last = hfr.getLastRowKey();
    } finally {
        hfr.close();
    }
    LOG.info("Trying to load hfile=" + hfilePath + " first=" + Bytes.toStringBinary(first) + " last=" + Bytes.toStringBinary(last));
    if (first == null || last == null) {
        assert first == null && last == null;
        // TODO what if this is due to a bad HFile?
        LOG.info("hfile " + hfilePath + " has no entries, skipping");
        return null;
    }
    if (Bytes.compareTo(first, last) > 0) {
        throw new IllegalArgumentException("Invalid range: " + Bytes.toStringBinary(first) + " > " + Bytes.toStringBinary(last));
    }
    int idx = Arrays.binarySearch(startEndKeys.getFirst(), first, Bytes.BYTES_COMPARATOR);
    if (idx < 0) {
        // not on boundary, returns -(insertion index).  Calculate region it
        // would be in.
        idx = -(idx + 1) - 1;
    }
    final int indexForCallable = idx;
    /**
     * we can consider there is a region hole in following conditions. 1) if idx < 0,then first
     * region info is lost. 2) if the endkey of a region is not equal to the startkey of the next
     * region. 3) if the endkey of the last region is not empty.
     */
    if (indexForCallable < 0) {
        throw new IOException("The first region info for table " + table.getName() + " can't be found in hbase:meta.Please use hbck tool to fix it first.");
    } else if ((indexForCallable == startEndKeys.getFirst().length - 1) && !Bytes.equals(startEndKeys.getSecond()[indexForCallable], HConstants.EMPTY_BYTE_ARRAY)) {
        throw new IOException("The last region info for table " + table.getName() + " can't be found in hbase:meta.Please use hbck tool to fix it first.");
    } else if (indexForCallable + 1 < startEndKeys.getFirst().length && !(Bytes.compareTo(startEndKeys.getSecond()[indexForCallable], startEndKeys.getFirst()[indexForCallable + 1]) == 0)) {
        throw new IOException("The endkey of one region for table " + table.getName() + " is not equal to the startkey of the next region in hbase:meta." + "Please use hbck tool to fix it first.");
    }
    boolean lastKeyInRange = Bytes.compareTo(last, startEndKeys.getSecond()[idx]) < 0 || Bytes.equals(startEndKeys.getSecond()[idx], HConstants.EMPTY_BYTE_ARRAY);
    if (!lastKeyInRange) {
        List<LoadQueueItem> lqis = splitStoreFile(item, table, startEndKeys.getFirst()[indexForCallable], startEndKeys.getSecond()[indexForCallable]);
        return new Pair<>(lqis, null);
    }
    // group regions.
    regionGroups.put(ByteBuffer.wrap(startEndKeys.getFirst()[idx]), item);
    return null;
}
Also used : Path(org.apache.hadoop.fs.Path) FileNotFoundException(java.io.FileNotFoundException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) HFile(org.apache.hadoop.hbase.io.hfile.HFile) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Pair(org.apache.hadoop.hbase.util.Pair)

Example 4 with HFile

use of org.apache.hadoop.hbase.io.hfile.HFile in project hbase by apache.

the class HBaseFsck method adoptHdfsOrphan.

/**
   * Orphaned regions are regions without a .regioninfo file in them.  We "adopt"
   * these orphans by creating a new region, and moving the column families,
   * recovered edits, WALs, into the new region dir.  We determine the region
   * startkey and endkeys by looking at all of the hfiles inside the column
   * families to identify the min and max keys. The resulting region will
   * likely violate table integrity but will be dealt with by merging
   * overlapping regions.
   */
@SuppressWarnings("deprecation")
private void adoptHdfsOrphan(HbckInfo hi) throws IOException {
    Path p = hi.getHdfsRegionDir();
    FileSystem fs = p.getFileSystem(getConf());
    FileStatus[] dirs = fs.listStatus(p);
    if (dirs == null) {
        LOG.warn("Attempt to adopt orphan hdfs region skipped because no files present in " + p + ". This dir could probably be deleted.");
        return;
    }
    TableName tableName = hi.getTableName();
    TableInfo tableInfo = tablesInfo.get(tableName);
    Preconditions.checkNotNull(tableInfo, "Table '" + tableName + "' not present!");
    HTableDescriptor template = tableInfo.getHTD();
    // find min and max key values
    Pair<byte[], byte[]> orphanRegionRange = null;
    for (FileStatus cf : dirs) {
        String cfName = cf.getPath().getName();
        // TODO Figure out what the special dirs are
        if (cfName.startsWith(".") || cfName.equals(HConstants.SPLIT_LOGDIR_NAME))
            continue;
        FileStatus[] hfiles = fs.listStatus(cf.getPath());
        for (FileStatus hfile : hfiles) {
            byte[] start, end;
            HFile.Reader hf = null;
            try {
                CacheConfig cacheConf = new CacheConfig(getConf());
                hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf());
                hf.loadFileInfo();
                Cell startKv = hf.getFirstKey();
                start = CellUtil.cloneRow(startKv);
                Cell endKv = hf.getLastKey();
                end = CellUtil.cloneRow(endKv);
            } catch (IOException ioe) {
                LOG.warn("Problem reading orphan file " + hfile + ", skipping");
                continue;
            } catch (NullPointerException ioe) {
                LOG.warn("Orphan file " + hfile + " is possibly corrupted HFile, skipping");
                continue;
            } finally {
                if (hf != null) {
                    hf.close();
                }
            }
            // expand the range to include the range of all hfiles
            if (orphanRegionRange == null) {
                // first range
                orphanRegionRange = new Pair<>(start, end);
            } else {
                // expand range only if the hfile is wider.
                if (Bytes.compareTo(orphanRegionRange.getFirst(), start) > 0) {
                    orphanRegionRange.setFirst(start);
                }
                if (Bytes.compareTo(orphanRegionRange.getSecond(), end) < 0) {
                    orphanRegionRange.setSecond(end);
                }
            }
        }
    }
    if (orphanRegionRange == null) {
        LOG.warn("No data in dir " + p + ", sidelining data");
        fixes++;
        sidelineRegionDir(fs, hi);
        return;
    }
    LOG.info("Min max keys are : [" + Bytes.toString(orphanRegionRange.getFirst()) + ", " + Bytes.toString(orphanRegionRange.getSecond()) + ")");
    // create new region on hdfs. move data into place.
    HRegionInfo hri = new HRegionInfo(template.getTableName(), orphanRegionRange.getFirst(), Bytes.add(orphanRegionRange.getSecond(), new byte[1]));
    LOG.info("Creating new region : " + hri);
    HRegion region = HBaseFsckRepair.createHDFSRegionDir(getConf(), hri, template);
    Path target = region.getRegionFileSystem().getRegionDir();
    // rename all the data to new region
    mergeRegionDirs(target, hi);
    fixes++;
}
Also used : Path(org.apache.hadoop.fs.Path) FileStatus(org.apache.hadoop.fs.FileStatus) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) HTableDescriptor(org.apache.hadoop.hbase.HTableDescriptor) HRegionInfo(org.apache.hadoop.hbase.HRegionInfo) TableName(org.apache.hadoop.hbase.TableName) HRegion(org.apache.hadoop.hbase.regionserver.HRegion) FileSystem(org.apache.hadoop.fs.FileSystem) MasterFileSystem(org.apache.hadoop.hbase.master.MasterFileSystem) HRegionFileSystem(org.apache.hadoop.hbase.regionserver.HRegionFileSystem) HFile(org.apache.hadoop.hbase.io.hfile.HFile) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Cell(org.apache.hadoop.hbase.Cell)

Example 5 with HFile

use of org.apache.hadoop.hbase.io.hfile.HFile in project hbase by apache.

the class TestImportTSVWithVisibilityLabels method getKVCountFromHfile.

/**
 * Method returns the total KVs in given hfile
 * @param fs File System
 * @param p HFile path
 * @return KV count in the given hfile
 * @throws IOException
 */
private static int getKVCountFromHfile(FileSystem fs, Path p) throws IOException {
    Configuration conf = util.getConfiguration();
    HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf), true, conf);
    HFileScanner scanner = reader.getScanner(conf, false, false);
    scanner.seekTo();
    int count = 0;
    do {
        count++;
    } while (scanner.next());
    reader.close();
    return count;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) HFileScanner(org.apache.hadoop.hbase.io.hfile.HFileScanner) HFile(org.apache.hadoop.hbase.io.hfile.HFile) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig)

Aggregations

HFile (org.apache.hadoop.hbase.io.hfile.HFile)19 CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)13 Path (org.apache.hadoop.fs.Path)11 Cell (org.apache.hadoop.hbase.Cell)10 FileSystem (org.apache.hadoop.fs.FileSystem)9 KeyValue (org.apache.hadoop.hbase.KeyValue)8 HFileContext (org.apache.hadoop.hbase.io.hfile.HFileContext)8 HFileContextBuilder (org.apache.hadoop.hbase.io.hfile.HFileContextBuilder)8 Configuration (org.apache.hadoop.conf.Configuration)6 HFileScanner (org.apache.hadoop.hbase.io.hfile.HFileScanner)5 Test (org.junit.Test)5 IOException (java.io.IOException)4 FileStatus (org.apache.hadoop.fs.FileStatus)4 InterruptedIOException (java.io.InterruptedIOException)3 FileNotFoundException (java.io.FileNotFoundException)2 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)2 ArrayBackedTag (org.apache.hadoop.hbase.ArrayBackedTag)2 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)2 TableName (org.apache.hadoop.hbase.TableName)2 Tag (org.apache.hadoop.hbase.Tag)2