Search in sources :

Example 6 with DataBlockEncoding

use of org.apache.hadoop.hbase.io.encoding.DataBlockEncoding in project hbase by apache.

the class TestHFile method testDBEShipped.

@Test
public void testDBEShipped() throws IOException {
    for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
        DataBlockEncoder encoder = encoding.getEncoder();
        if (encoder == null) {
            continue;
        }
        Path f = new Path(ROOT_DIR, testName.getMethodName() + "_" + encoding);
        HFileContext context = new HFileContextBuilder().withIncludesTags(false).withDataBlockEncoding(encoding).build();
        HFileWriterImpl writer = (HFileWriterImpl) HFile.getWriterFactory(conf, cacheConf).withPath(fs, f).withFileContext(context).create();
        KeyValue kv = new KeyValue(Bytes.toBytes("testkey1"), Bytes.toBytes("family"), Bytes.toBytes("qual"), Bytes.toBytes("testvalue"));
        KeyValue kv2 = new KeyValue(Bytes.toBytes("testkey2"), Bytes.toBytes("family"), Bytes.toBytes("qual"), Bytes.toBytes("testvalue"));
        KeyValue kv3 = new KeyValue(Bytes.toBytes("testkey3"), Bytes.toBytes("family"), Bytes.toBytes("qual"), Bytes.toBytes("testvalue"));
        ByteBuffer buffer = ByteBuffer.wrap(kv.getBuffer());
        ByteBuffer buffer2 = ByteBuffer.wrap(kv2.getBuffer());
        ByteBuffer buffer3 = ByteBuffer.wrap(kv3.getBuffer());
        writer.append(new ByteBufferKeyValue(buffer, 0, buffer.remaining()));
        writer.beforeShipped();
        // pollute first cell's backing ByteBuffer
        ByteBufferUtils.copyFromBufferToBuffer(buffer3, buffer);
        // write another cell, if DBE not Shipped, test will fail
        writer.append(new ByteBufferKeyValue(buffer2, 0, buffer2.remaining()));
        writer.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding) ByteBufferKeyValue(org.apache.hadoop.hbase.ByteBufferKeyValue) ByteBufferKeyValue(org.apache.hadoop.hbase.ByteBufferKeyValue) KeyValue(org.apache.hadoop.hbase.KeyValue) DataBlockEncoder(org.apache.hadoop.hbase.io.encoding.DataBlockEncoder) ByteBuffer(java.nio.ByteBuffer) Test(org.junit.Test)

Example 7 with DataBlockEncoding

use of org.apache.hadoop.hbase.io.encoding.DataBlockEncoding in project hbase by apache.

the class TestCacheOnWrite method readStoreFile.

private void readStoreFile(boolean useTags) throws IOException {
    HFile.Reader reader = HFile.createReader(fs, storeFilePath, cacheConf, true, conf);
    LOG.info("HFile information: " + reader);
    HFileContext meta = new HFileContextBuilder().withCompression(compress).withBytesPerCheckSum(CKBYTES).withChecksumType(ChecksumType.NULL).withBlockSize(DATA_BLOCK_SIZE).withDataBlockEncoding(NoOpDataBlockEncoder.INSTANCE.getDataBlockEncoding()).withIncludesTags(useTags).build();
    final boolean cacheBlocks = false;
    final boolean pread = false;
    HFileScanner scanner = reader.getScanner(conf, cacheBlocks, pread);
    assertTrue(testDescription, scanner.seekTo());
    long offset = 0;
    EnumMap<BlockType, Integer> blockCountByType = new EnumMap<>(BlockType.class);
    DataBlockEncoding encodingInCache = NoOpDataBlockEncoder.INSTANCE.getDataBlockEncoding();
    List<Long> cachedBlocksOffset = new ArrayList<>();
    Map<Long, Pair<HFileBlock, HFileBlock>> cachedBlocks = new HashMap<>();
    while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
        // Flags: don't cache the block, use pread, this is not a compaction.
        // Also, pass null for expected block type to avoid checking it.
        HFileBlock block = reader.readBlock(offset, -1, false, true, false, true, null, encodingInCache);
        BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(), offset);
        HFileBlock fromCache = (HFileBlock) blockCache.getBlock(blockCacheKey, true, false, true);
        boolean isCached = fromCache != null;
        cachedBlocksOffset.add(offset);
        cachedBlocks.put(offset, fromCache == null ? null : Pair.newPair(block, fromCache));
        boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType());
        assertTrue("shouldBeCached: " + shouldBeCached + "\n" + "isCached: " + isCached + "\n" + "Test description: " + testDescription + "\n" + "block: " + block + "\n" + "encodingInCache: " + encodingInCache + "\n" + "blockCacheKey: " + blockCacheKey, shouldBeCached == isCached);
        if (isCached) {
            if (cacheConf.shouldCacheCompressed(fromCache.getBlockType().getCategory())) {
                if (compress != Compression.Algorithm.NONE) {
                    assertFalse(fromCache.isUnpacked());
                }
                fromCache = fromCache.unpack(meta, reader.getUncachedBlockReader());
            } else {
                assertTrue(fromCache.isUnpacked());
            }
            // block we cached at write-time and block read from file should be identical
            assertEquals(block.getChecksumType(), fromCache.getChecksumType());
            assertEquals(block.getBlockType(), fromCache.getBlockType());
            assertNotEquals(BlockType.ENCODED_DATA, block.getBlockType());
            assertEquals(block.getOnDiskSizeWithHeader(), fromCache.getOnDiskSizeWithHeader());
            assertEquals(block.getOnDiskSizeWithoutHeader(), fromCache.getOnDiskSizeWithoutHeader());
            assertEquals(block.getUncompressedSizeWithoutHeader(), fromCache.getUncompressedSizeWithoutHeader());
        }
        offset += block.getOnDiskSizeWithHeader();
        BlockType bt = block.getBlockType();
        Integer count = blockCountByType.get(bt);
        blockCountByType.put(bt, (count == null ? 0 : count) + 1);
    }
    LOG.info("Block count by type: " + blockCountByType);
    String countByType = blockCountByType.toString();
    if (useTags) {
        assertEquals("{" + BlockType.DATA + "=2663, LEAF_INDEX=297, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=32}", countByType);
    } else {
        assertEquals("{" + BlockType.DATA + "=2498, LEAF_INDEX=278, BLOOM_CHUNK=9, INTERMEDIATE_INDEX=31}", countByType);
    }
    // iterate all the keyvalue from hfile
    while (scanner.next()) {
        scanner.getCell();
    }
    Iterator<Long> iterator = cachedBlocksOffset.iterator();
    while (iterator.hasNext()) {
        Long entry = iterator.next();
        BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(), entry);
        Pair<HFileBlock, HFileBlock> blockPair = cachedBlocks.get(entry);
        if (blockPair != null) {
            // Call return twice because for the isCache cased the counter would have got incremented
            // twice. Notice that here we need to returnBlock with different blocks. see comments in
            // BucketCache#returnBlock.
            blockPair.getSecond().release();
            if (cacheCompressedData) {
                if (this.compress == Compression.Algorithm.NONE || cowType == CacheOnWriteType.INDEX_BLOCKS || cowType == CacheOnWriteType.BLOOM_BLOCKS) {
                    blockPair.getFirst().release();
                }
            } else {
                blockPair.getFirst().release();
            }
        }
    }
    scanner.shipped();
    reader.close();
}
Also used : DataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) EnumMap(java.util.EnumMap) Pair(org.apache.hadoop.hbase.util.Pair)

Example 8 with DataBlockEncoding

use of org.apache.hadoop.hbase.io.encoding.DataBlockEncoding in project hbase by apache.

the class TestHFileWriterV3WithDataEncoders method parameters.

@Parameterized.Parameters
public static Collection<Object[]> parameters() {
    DataBlockEncoding[] dataBlockEncodings = DataBlockEncoding.values();
    Object[][] params = new Object[dataBlockEncodings.length * 2 - 2][];
    int i = 0;
    for (DataBlockEncoding dataBlockEncoding : dataBlockEncodings) {
        if (dataBlockEncoding == DataBlockEncoding.NONE) {
            continue;
        }
        params[i++] = new Object[] { false, dataBlockEncoding };
        params[i++] = new Object[] { true, dataBlockEncoding };
    }
    return Arrays.asList(params);
}
Also used : DataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding)

Example 9 with DataBlockEncoding

use of org.apache.hadoop.hbase.io.encoding.DataBlockEncoding in project hbase by apache.

the class TestReversibleScanners method testReversibleStoreFileScanner.

@Test
public void testReversibleStoreFileScanner() throws IOException {
    FileSystem fs = TEST_UTIL.getTestFileSystem();
    Path hfilePath = new Path(new Path(TEST_UTIL.getDataTestDir("testReversibleStoreFileScanner"), "regionname"), "familyname");
    CacheConfig cacheConf = new CacheConfig(TEST_UTIL.getConfiguration());
    for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
        HFileContextBuilder hcBuilder = new HFileContextBuilder();
        hcBuilder.withBlockSize(2 * 1024);
        hcBuilder.withDataBlockEncoding(encoding);
        HFileContext hFileContext = hcBuilder.build();
        StoreFileWriter writer = new StoreFileWriter.Builder(TEST_UTIL.getConfiguration(), cacheConf, fs).withOutputDir(hfilePath).withFileContext(hFileContext).build();
        writeStoreFile(writer);
        HStoreFile sf = new HStoreFile(fs, writer.getPath(), TEST_UTIL.getConfiguration(), cacheConf, BloomType.NONE, true);
        List<StoreFileScanner> scanners = StoreFileScanner.getScannersForStoreFiles(Collections.singletonList(sf), false, true, false, false, Long.MAX_VALUE);
        StoreFileScanner scanner = scanners.get(0);
        seekTestOfReversibleKeyValueScanner(scanner);
        for (int readPoint = 0; readPoint < MAXMVCC; readPoint++) {
            LOG.info("Setting read point to " + readPoint);
            scanners = StoreFileScanner.getScannersForStoreFiles(Collections.singletonList(sf), false, true, false, false, readPoint);
            seekTestOfReversibleKeyValueScannerWithMVCC(scanners, readPoint);
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding) FileSystem(org.apache.hadoop.fs.FileSystem) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) Test(org.junit.Test)

Example 10 with DataBlockEncoding

use of org.apache.hadoop.hbase.io.encoding.DataBlockEncoding in project hbase by apache.

the class DataBlockEncodingTool method checkStatistics.

/**
 * Check statistics for given HFile for different data block encoders.
 * @param scanner Of file which will be compressed.
 * @param kvLimit Maximal count of KeyValue which will be processed.
 * @throws IOException thrown if scanner is invalid
 */
public void checkStatistics(final KeyValueScanner scanner, final int kvLimit) throws IOException {
    scanner.seek(KeyValue.LOWESTKEY);
    KeyValue currentKV;
    byte[] previousKey = null;
    byte[] currentKey;
    DataBlockEncoding[] encodings = DataBlockEncoding.values();
    ByteArrayOutputStream uncompressedOutputStream = new ByteArrayOutputStream();
    int j = 0;
    while ((currentKV = KeyValueUtil.ensureKeyValue(scanner.next())) != null && j < kvLimit) {
        // Iterates through key/value pairs
        j++;
        currentKey = currentKV.getKey();
        if (previousKey != null) {
            for (int i = 0; i < previousKey.length && i < currentKey.length && previousKey[i] == currentKey[i]; ++i) {
                totalKeyRedundancyLength++;
            }
        }
        // if the cell tagsLen equals 0, it means other cells may have tags.
        if (USE_TAG && currentKV.getTagsLength() == 0) {
            uncompressedOutputStream.write(currentKV.getBuffer(), currentKV.getOffset(), currentKV.getLength());
            // write tagsLen = 0.
            uncompressedOutputStream.write(Bytes.toBytes((short) 0));
        } else {
            uncompressedOutputStream.write(currentKV.getBuffer(), currentKV.getOffset(), currentKV.getLength());
        }
        if (includesMemstoreTS) {
            WritableUtils.writeVLong(new DataOutputStream(uncompressedOutputStream), currentKV.getSequenceId());
        }
        previousKey = currentKey;
        int kLen = currentKV.getKeyLength();
        int vLen = currentKV.getValueLength();
        int cfOffset = currentKV.getFamilyOffset();
        int cfLen = currentKV.getFamilyLength();
        int restLen = currentKV.getLength() - kLen - vLen;
        totalKeyLength += kLen;
        totalValueLength += vLen;
        totalPrefixLength += restLen;
        totalCFLength += cfLen;
    }
    rawKVs = uncompressedOutputStream.toByteArray();
    for (DataBlockEncoding encoding : encodings) {
        if (encoding == DataBlockEncoding.NONE) {
            continue;
        }
        DataBlockEncoder d = encoding.getEncoder();
        HFileContext meta = new HFileContextBuilder().withDataBlockEncoding(encoding).withCompression(Compression.Algorithm.NONE).withIncludesMvcc(includesMemstoreTS).withIncludesTags(USE_TAG).build();
        codecs.add(new EncodedDataBlock(conf, d, encoding, rawKVs, meta));
    }
}
Also used : DataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding) KeyValue(org.apache.hadoop.hbase.KeyValue) DataOutputStream(java.io.DataOutputStream) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) ByteArrayOutputStream(java.io.ByteArrayOutputStream) DataBlockEncoder(org.apache.hadoop.hbase.io.encoding.DataBlockEncoder) EncodedDataBlock(org.apache.hadoop.hbase.io.encoding.EncodedDataBlock) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext)

Aggregations

DataBlockEncoding (org.apache.hadoop.hbase.io.encoding.DataBlockEncoding)29 Path (org.apache.hadoop.fs.Path)8 ArrayList (java.util.ArrayList)7 Test (org.junit.Test)7 Configuration (org.apache.hadoop.conf.Configuration)6 HFileContext (org.apache.hadoop.hbase.io.hfile.HFileContext)6 HFileContextBuilder (org.apache.hadoop.hbase.io.hfile.HFileContextBuilder)6 Compression (org.apache.hadoop.hbase.io.compress.Compression)5 Algorithm (org.apache.hadoop.hbase.io.compress.Compression.Algorithm)5 IOException (java.io.IOException)4 KeyValue (org.apache.hadoop.hbase.KeyValue)4 ColumnFamilyDescriptor (org.apache.hadoop.hbase.client.ColumnFamilyDescriptor)4 ColumnFamilyDescriptorBuilder (org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder)4 BloomType (org.apache.hadoop.hbase.regionserver.BloomType)4 EnumMap (java.util.EnumMap)3 FileSystem (org.apache.hadoop.fs.FileSystem)3 Cell (org.apache.hadoop.hbase.Cell)3 TableDescriptorBuilder (org.apache.hadoop.hbase.client.TableDescriptorBuilder)3 CacheConfig (org.apache.hadoop.hbase.io.hfile.CacheConfig)3 ByteBuffer (java.nio.ByteBuffer)2