Search in sources :

Example 11 with HFile

use of org.apache.hadoop.hbase.io.hfile.HFile in project hbase by apache.

the class TestHFileOutputFormat2 method test_TIMERANGE.

/*
   * Test that {@link HFileOutputFormat2} creates an HFile with TIMERANGE
   * metadata used by time-restricted scans.
   */
@Ignore("Goes zombie too frequently; needs work. See HBASE-14563")
@Test
public void test_TIMERANGE() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("test_TIMERANGE_present");
    LOG.info("Timerange dir writing to dir: " + dir);
    try {
        // build a record writer using HFileOutputFormat2
        Job job = new Job(conf);
        FileOutputFormat.setOutputPath(job, dir);
        context = createTestTaskAttemptContext(job);
        HFileOutputFormat2 hof = new HFileOutputFormat2();
        writer = hof.getRecordWriter(context);
        // Pass two key values with explicit times stamps
        final byte[] b = Bytes.toBytes("b");
        // value 1 with timestamp 2000
        KeyValue kv = new KeyValue(b, b, b, 2000, b);
        KeyValue original = kv.clone();
        writer.write(new ImmutableBytesWritable(), kv);
        assertEquals(original, kv);
        // value 2 with timestamp 1000
        kv = new KeyValue(b, b, b, 1000, b);
        original = kv.clone();
        writer.write(new ImmutableBytesWritable(), kv);
        assertEquals(original, kv);
        // verify that the file has the proper FileInfo.
        writer.close(context);
        // the generated file lives 1 directory down from the attempt directory
        // and is the only file, e.g.
        // _attempt__0000_r_000000_0/b/1979617994050536795
        FileSystem fs = FileSystem.get(conf);
        Path attemptDirectory = hof.getDefaultWorkFile(context, "").getParent();
        FileStatus[] sub1 = fs.listStatus(attemptDirectory);
        FileStatus[] file = fs.listStatus(sub1[0].getPath());
        // open as HFile Reader and pull out TIMERANGE FileInfo.
        HFile.Reader rd = HFile.createReader(fs, file[0].getPath(), new CacheConfig(conf), conf);
        Map<byte[], byte[]> finfo = rd.loadFileInfo();
        byte[] range = finfo.get("TIMERANGE".getBytes());
        assertNotNull(range);
        // unmarshall and check values.
        TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
        Writables.copyWritable(range, timeRangeTracker);
        LOG.info(timeRangeTracker.getMin() + "...." + timeRangeTracker.getMax());
        assertEquals(1000, timeRangeTracker.getMin());
        assertEquals(2000, timeRangeTracker.getMax());
        rd.close();
    } finally {
        if (writer != null && context != null)
            writer.close(context);
        dir.getFileSystem(conf).delete(dir, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) KeyValue(org.apache.hadoop.hbase.KeyValue) FileStatus(org.apache.hadoop.fs.FileStatus) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) Reader(org.apache.hadoop.hbase.io.hfile.HFile.Reader) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) TimeRangeTracker(org.apache.hadoop.hbase.regionserver.TimeRangeTracker) Job(org.apache.hadoop.mapreduce.Job) HFile(org.apache.hadoop.hbase.io.hfile.HFile) Cell(org.apache.hadoop.hbase.Cell) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Ignore(org.junit.Ignore) Test(org.junit.Test)

Example 12 with HFile

use of org.apache.hadoop.hbase.io.hfile.HFile in project hbase by apache.

the class TestHFileOutputFormat2 method test_WritingTagData.

/**
   * Test that {@link HFileOutputFormat2} RecordWriter writes tags such as ttl into
   * hfile.
   */
@Test
public void test_WritingTagData() throws Exception {
    Configuration conf = new Configuration(this.util.getConfiguration());
    final String HFILE_FORMAT_VERSION_CONF_KEY = "hfile.format.version";
    conf.setInt(HFILE_FORMAT_VERSION_CONF_KEY, HFile.MIN_FORMAT_VERSION_WITH_TAGS);
    RecordWriter<ImmutableBytesWritable, Cell> writer = null;
    TaskAttemptContext context = null;
    Path dir = util.getDataTestDir("WritingTagData");
    try {
        Job job = new Job(conf);
        FileOutputFormat.setOutputPath(job, dir);
        context = createTestTaskAttemptContext(job);
        HFileOutputFormat2 hof = new HFileOutputFormat2();
        writer = hof.getRecordWriter(context);
        final byte[] b = Bytes.toBytes("b");
        List<Tag> tags = new ArrayList<>();
        tags.add(new ArrayBackedTag(TagType.TTL_TAG_TYPE, Bytes.toBytes(978670)));
        KeyValue kv = new KeyValue(b, b, b, HConstants.LATEST_TIMESTAMP, b, tags);
        writer.write(new ImmutableBytesWritable(), kv);
        writer.close(context);
        writer = null;
        FileSystem fs = dir.getFileSystem(conf);
        RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(dir, true);
        while (iterator.hasNext()) {
            LocatedFileStatus keyFileStatus = iterator.next();
            HFile.Reader reader = HFile.createReader(fs, keyFileStatus.getPath(), new CacheConfig(conf), conf);
            HFileScanner scanner = reader.getScanner(false, false, false);
            scanner.seekTo();
            Cell cell = scanner.getCell();
            List<Tag> tagsFromCell = TagUtil.asList(cell.getTagsArray(), cell.getTagsOffset(), cell.getTagsLength());
            assertTrue(tagsFromCell.size() > 0);
            for (Tag tag : tagsFromCell) {
                assertTrue(tag.getType() == TagType.TTL_TAG_TYPE);
            }
        }
    } finally {
        if (writer != null && context != null)
            writer.close(context);
        dir.getFileSystem(conf).delete(dir, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) KeyValue(org.apache.hadoop.hbase.KeyValue) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) ArrayList(java.util.ArrayList) LocatedFileStatus(org.apache.hadoop.fs.LocatedFileStatus) HFileScanner(org.apache.hadoop.hbase.io.hfile.HFileScanner) TaskAttemptContext(org.apache.hadoop.mapreduce.TaskAttemptContext) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Reader(org.apache.hadoop.hbase.io.hfile.HFile.Reader) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) ArrayBackedTag(org.apache.hadoop.hbase.ArrayBackedTag) Tag(org.apache.hadoop.hbase.Tag) Job(org.apache.hadoop.mapreduce.Job) HFile(org.apache.hadoop.hbase.io.hfile.HFile) Cell(org.apache.hadoop.hbase.Cell) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Test(org.junit.Test)

Example 13 with HFile

use of org.apache.hadoop.hbase.io.hfile.HFile in project hbase by apache.

the class TestHRegionServerBulkLoad method createHFile.

/**
   * Create an HFile with the given number of rows with a specified value.
   */
public static void createHFile(FileSystem fs, Path path, byte[] family, byte[] qualifier, byte[] value, int numRows) throws IOException {
    HFileContext context = new HFileContextBuilder().withBlockSize(BLOCKSIZE).withCompression(COMPRESSION).build();
    HFile.Writer writer = HFile.getWriterFactory(conf, new CacheConfig(conf)).withPath(fs, path).withFileContext(context).create();
    long now = System.currentTimeMillis();
    try {
        // subtract 2 since iterateOnSplits doesn't include boundary keys
        for (int i = 0; i < numRows; i++) {
            KeyValue kv = new KeyValue(rowkey(i), family, qualifier, now, value);
            writer.append(kv);
        }
        writer.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(now));
    } finally {
        writer.close();
    }
}
Also used : KeyValue(org.apache.hadoop.hbase.KeyValue) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) HFile(org.apache.hadoop.hbase.io.hfile.HFile) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext)

Example 14 with HFile

use of org.apache.hadoop.hbase.io.hfile.HFile in project hbase by apache.

the class HStore method assertBulkLoadHFileOk.

/**
   * This throws a WrongRegionException if the HFile does not fit in this region, or an
   * InvalidHFileException if the HFile is not valid.
   */
public void assertBulkLoadHFileOk(Path srcPath) throws IOException {
    HFile.Reader reader = null;
    try {
        LOG.info("Validating hfile at " + srcPath + " for inclusion in " + "store " + this + " region " + this.getRegionInfo().getRegionNameAsString());
        reader = HFile.createReader(srcPath.getFileSystem(conf), srcPath, cacheConf, conf);
        reader.loadFileInfo();
        byte[] firstKey = reader.getFirstRowKey();
        Preconditions.checkState(firstKey != null, "First key can not be null");
        Cell lk = reader.getLastKey();
        Preconditions.checkState(lk != null, "Last key can not be null");
        byte[] lastKey = CellUtil.cloneRow(lk);
        LOG.debug("HFile bounds: first=" + Bytes.toStringBinary(firstKey) + " last=" + Bytes.toStringBinary(lastKey));
        LOG.debug("Region bounds: first=" + Bytes.toStringBinary(getRegionInfo().getStartKey()) + " last=" + Bytes.toStringBinary(getRegionInfo().getEndKey()));
        if (!this.getRegionInfo().containsRange(firstKey, lastKey)) {
            throw new WrongRegionException("Bulk load file " + srcPath.toString() + " does not fit inside region " + this.getRegionInfo().getRegionNameAsString());
        }
        if (reader.length() > conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE)) {
            LOG.warn("Trying to bulk load hfile " + srcPath.toString() + " with size: " + reader.length() + " bytes can be problematic as it may lead to oversplitting.");
        }
        if (verifyBulkLoads) {
            long verificationStartTime = EnvironmentEdgeManager.currentTime();
            LOG.info("Full verification started for bulk load hfile: " + srcPath.toString());
            Cell prevCell = null;
            HFileScanner scanner = reader.getScanner(false, false, false);
            scanner.seekTo();
            do {
                Cell cell = scanner.getCell();
                if (prevCell != null) {
                    if (comparator.compareRows(prevCell, cell) > 0) {
                        throw new InvalidHFileException("Previous row is greater than" + " current row: path=" + srcPath + " previous=" + CellUtil.getCellKeyAsString(prevCell) + " current=" + CellUtil.getCellKeyAsString(cell));
                    }
                    if (CellComparator.compareFamilies(prevCell, cell) != 0) {
                        throw new InvalidHFileException("Previous key had different" + " family compared to current key: path=" + srcPath + " previous=" + Bytes.toStringBinary(prevCell.getFamilyArray(), prevCell.getFamilyOffset(), prevCell.getFamilyLength()) + " current=" + Bytes.toStringBinary(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()));
                    }
                }
                prevCell = cell;
            } while (scanner.next());
            LOG.info("Full verification complete for bulk load hfile: " + srcPath.toString() + " took " + (EnvironmentEdgeManager.currentTime() - verificationStartTime) + " ms");
        }
    } finally {
        if (reader != null)
            reader.close();
    }
}
Also used : InvalidHFileException(org.apache.hadoop.hbase.io.hfile.InvalidHFileException) HFileScanner(org.apache.hadoop.hbase.io.hfile.HFileScanner) HFile(org.apache.hadoop.hbase.io.hfile.HFile)

Aggregations

HFile (org.apache.hadoop.hbase.io.hfile.HFile)14 CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)11 Path (org.apache.hadoop.fs.Path)8 Configuration (org.apache.hadoop.conf.Configuration)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 Cell (org.apache.hadoop.hbase.Cell)6 KeyValue (org.apache.hadoop.hbase.KeyValue)6 HFileContext (org.apache.hadoop.hbase.io.hfile.HFileContext)6 HFileContextBuilder (org.apache.hadoop.hbase.io.hfile.HFileContextBuilder)6 Test (org.junit.Test)5 HFileScanner (org.apache.hadoop.hbase.io.hfile.HFileScanner)4 IOException (java.io.IOException)3 FileStatus (org.apache.hadoop.fs.FileStatus)3 InterruptedIOException (java.io.InterruptedIOException)2 LocatedFileStatus (org.apache.hadoop.fs.LocatedFileStatus)2 ArrayBackedTag (org.apache.hadoop.hbase.ArrayBackedTag)2 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)2 Tag (org.apache.hadoop.hbase.Tag)2 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)2 Reader (org.apache.hadoop.hbase.io.hfile.HFile.Reader)2