Examples with Compression - org.apache.hadoop.hbase.io.compress.Compression

Example 6 with Compression

use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.

the class TestHFileBlock method testInternals.

private void testInternals() throws IOException {
    final int numBlocks = 5;
    final Configuration conf = TEST_UTIL.getConfiguration();
    if (includesTag) {
        conf.setInt("hfile.format.version", 3);
    }
    for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
        for (boolean pread : new boolean[] { false, true }) {
            for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
                LOG.info("testDataBlockEncoding: Compression algorithm={}, pread={}, dataBlockEncoder={}", algo.toString(), pread, encoding);
                Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_" + algo + "_" + encoding.toString());
                FSDataOutputStream os = fs.create(path);
                HFileDataBlockEncoder dataBlockEncoder = (encoding != DataBlockEncoding.NONE) ? new HFileDataBlockEncoderImpl(encoding) : NoOpDataBlockEncoder.INSTANCE;
                HFileContext meta = new HFileContextBuilder().withCompression(algo).withIncludesMvcc(includesMemstoreTS).withIncludesTags(includesTag).withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM).build();
                HFileBlock.Writer hbw = new HFileBlock.Writer(conf, dataBlockEncoder, meta);
                long totalSize = 0;
                final List<Integer> encodedSizes = new ArrayList<>();
                final List<ByteBuff> encodedBlocks = new ArrayList<>();
                for (int blockId = 0; blockId < numBlocks; ++blockId) {
                    hbw.startWriting(BlockType.DATA);
                    writeTestKeyValues(hbw, blockId, includesMemstoreTS, includesTag);
                    hbw.writeHeaderAndData(os);
                    int headerLen = HConstants.HFILEBLOCK_HEADER_SIZE;
                    ByteBuff encodedResultWithHeader = hbw.cloneUncompressedBufferWithHeader();
                    final int encodedSize = encodedResultWithHeader.limit() - headerLen;
                    if (encoding != DataBlockEncoding.NONE) {
                        // We need to account for the two-byte encoding algorithm ID that
                        // comes after the 24-byte block header but before encoded KVs.
                        headerLen += DataBlockEncoding.ID_SIZE;
                    }
                    encodedSizes.add(encodedSize);
                    ByteBuff encodedBuf = encodedResultWithHeader.position(headerLen).slice();
                    encodedBlocks.add(encodedBuf);
                    totalSize += hbw.getOnDiskSizeWithHeader();
                }
                os.close();
                FSDataInputStream is = fs.open(path);
                meta = new HFileContextBuilder().withHBaseCheckSum(true).withCompression(algo).withIncludesMvcc(includesMemstoreTS).withIncludesTags(includesTag).build();
                ReaderContext context = new ReaderContextBuilder().withInputStreamWrapper(new FSDataInputStreamWrapper(is)).withFileSize(totalSize).withFilePath(path).withFileSystem(fs).build();
                HFileBlock.FSReaderImpl hbr = new HFileBlock.FSReaderImpl(context, meta, alloc, conf);
                hbr.setDataBlockEncoder(dataBlockEncoder, conf);
                hbr.setIncludesMemStoreTS(includesMemstoreTS);
                HFileBlock blockFromHFile, blockUnpacked;
                int pos = 0;
                for (int blockId = 0; blockId < numBlocks; ++blockId) {
                    blockFromHFile = hbr.readBlockData(pos, -1, pread, false, true);
                    assertEquals(0, HFile.getAndResetChecksumFailuresCount());
                    blockFromHFile.sanityCheck();
                    pos += blockFromHFile.getOnDiskSizeWithHeader();
                    assertEquals((int) encodedSizes.get(blockId), blockFromHFile.getUncompressedSizeWithoutHeader());
                    assertEquals(meta.isCompressedOrEncrypted(), !blockFromHFile.isUnpacked());
                    long packedHeapsize = blockFromHFile.heapSize();
                    blockUnpacked = blockFromHFile.unpack(meta, hbr);
                    assertTrue(blockUnpacked.isUnpacked());
                    if (meta.isCompressedOrEncrypted()) {
                        LOG.info("packedHeapsize=" + packedHeapsize + ", unpackedHeadsize=" + blockUnpacked.heapSize());
                        assertFalse(packedHeapsize == blockUnpacked.heapSize());
                        assertTrue("Packed heapSize should be < unpacked heapSize", packedHeapsize < blockUnpacked.heapSize());
                    }
                    ByteBuff actualBuffer = blockUnpacked.getBufferWithoutHeader();
                    if (encoding != DataBlockEncoding.NONE) {
                        // We expect a two-byte big-endian encoding id.
                        assertEquals("Unexpected first byte with " + buildMessageDetails(algo, encoding, pread), Long.toHexString(0), Long.toHexString(actualBuffer.get(0)));
                        assertEquals("Unexpected second byte with " + buildMessageDetails(algo, encoding, pread), Long.toHexString(encoding.getId()), Long.toHexString(actualBuffer.get(1)));
                        actualBuffer.position(2);
                        actualBuffer = actualBuffer.slice();
                    }
                    ByteBuff expectedBuff = encodedBlocks.get(blockId);
                    expectedBuff.rewind();
                    // test if content matches, produce nice message
                    assertBuffersEqual(expectedBuff, actualBuffer, algo, encoding, pread);
                    // test serialized blocks
                    for (boolean reuseBuffer : new boolean[] { false, true }) {
                        ByteBuffer serialized = ByteBuffer.allocate(blockFromHFile.getSerializedLength());
                        blockFromHFile.serialize(serialized, true);
                        HFileBlock deserialized = (HFileBlock) blockFromHFile.getDeserializer().deserialize(new SingleByteBuff(serialized), HEAP);
                        assertEquals("Serialization did not preserve block state. reuseBuffer=" + reuseBuffer, blockFromHFile, deserialized);
                        // intentional reference comparison
                        if (blockFromHFile != blockUnpacked) {
                            assertEquals("Deserialized block cannot be unpacked correctly.", blockUnpacked, deserialized.unpack(meta, hbr));
                        }
                    }
                    assertRelease(blockUnpacked);
                    if (blockFromHFile != blockUnpacked) {
                        blockFromHFile.release();
                    }
                }
                is.close();
            }
        }
    }
}

Also used : DataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding) Compression(org.apache.hadoop.hbase.io.compress.Compression) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) ArrayList(java.util.ArrayList) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) MultiByteBuff(org.apache.hadoop.hbase.nio.MultiByteBuff) SingleByteBuff(org.apache.hadoop.hbase.nio.SingleByteBuff) ByteBuff(org.apache.hadoop.hbase.nio.ByteBuff) Path(org.apache.hadoop.fs.Path) ByteBuffer(java.nio.ByteBuffer) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm) SingleByteBuff(org.apache.hadoop.hbase.nio.SingleByteBuff) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataInputStreamWrapper(org.apache.hadoop.hbase.io.FSDataInputStreamWrapper)

Example 7 with Compression

use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.

the class TestHFileBlock method testPreviousOffsetInternals.

protected void testPreviousOffsetInternals() throws IOException {
    // TODO: parameterize these nested loops.
    Configuration conf = TEST_UTIL.getConfiguration();
    for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
        for (boolean pread : BOOLEAN_VALUES) {
            for (boolean cacheOnWrite : BOOLEAN_VALUES) {
                Random rand = defaultRandom();
                LOG.info("testPreviousOffset: Compression algorithm={}, pread={}, cacheOnWrite={}", algo.toString(), pread, cacheOnWrite);
                Path path = new Path(TEST_UTIL.getDataTestDir(), "prev_offset");
                List<Long> expectedOffsets = new ArrayList<>();
                List<Long> expectedPrevOffsets = new ArrayList<>();
                List<BlockType> expectedTypes = new ArrayList<>();
                List<ByteBuffer> expectedContents = cacheOnWrite ? new ArrayList<>() : null;
                long totalSize = writeBlocks(TEST_UTIL.getConfiguration(), rand, algo, path, expectedOffsets, expectedPrevOffsets, expectedTypes, expectedContents);
                FSDataInputStream is = fs.open(path);
                HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(true).withIncludesMvcc(includesMemstoreTS).withIncludesTags(includesTag).withCompression(algo).build();
                ReaderContext context = new ReaderContextBuilder().withInputStreamWrapper(new FSDataInputStreamWrapper(is)).withFileSize(totalSize).withFilePath(path).withFileSystem(fs).build();
                HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(context, meta, alloc, conf);
                long curOffset = 0;
                for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
                    if (!pread) {
                        assertEquals(is.getPos(), curOffset + (i == 0 ? 0 : HConstants.HFILEBLOCK_HEADER_SIZE));
                    }
                    assertEquals(expectedOffsets.get(i).longValue(), curOffset);
                    if (detailedLogging) {
                        LOG.info("Reading block #" + i + " at offset " + curOffset);
                    }
                    HFileBlock b = hbr.readBlockData(curOffset, -1, pread, false, false);
                    if (detailedLogging) {
                        LOG.info("Block #" + i + ": " + b);
                    }
                    assertEquals("Invalid block #" + i + "'s type:", expectedTypes.get(i), b.getBlockType());
                    assertEquals("Invalid previous block offset for block " + i + " of " + "type " + b.getBlockType() + ":", (long) expectedPrevOffsets.get(i), b.getPrevBlockOffset());
                    b.sanityCheck();
                    assertEquals(curOffset, b.getOffset());
                    // Now re-load this block knowing the on-disk size. This tests a
                    // different branch in the loader.
                    HFileBlock b2 = hbr.readBlockData(curOffset, b.getOnDiskSizeWithHeader(), pread, false, false);
                    b2.sanityCheck();
                    assertEquals(b.getBlockType(), b2.getBlockType());
                    assertEquals(b.getOnDiskSizeWithoutHeader(), b2.getOnDiskSizeWithoutHeader());
                    assertEquals(b.getOnDiskSizeWithHeader(), b2.getOnDiskSizeWithHeader());
                    assertEquals(b.getUncompressedSizeWithoutHeader(), b2.getUncompressedSizeWithoutHeader());
                    assertEquals(b.getPrevBlockOffset(), b2.getPrevBlockOffset());
                    assertEquals(curOffset, b2.getOffset());
                    assertEquals(b.getBytesPerChecksum(), b2.getBytesPerChecksum());
                    assertEquals(b.getOnDiskDataSizeWithHeader(), b2.getOnDiskDataSizeWithHeader());
                    assertEquals(0, HFile.getAndResetChecksumFailuresCount());
                    assertRelease(b2);
                    curOffset += b.getOnDiskSizeWithHeader();
                    if (cacheOnWrite) {
                        // NOTE: cache-on-write testing doesn't actually involve a BlockCache. It simply
                        // verifies that the unpacked value read back off disk matches the unpacked value
                        // generated before writing to disk.
                        HFileBlock newBlock = b.unpack(meta, hbr);
                        // b's buffer has header + data + checksum while
                        // expectedContents have header + data only
                        ByteBuff bufRead = newBlock.getBufferReadOnly();
                        ByteBuffer bufExpected = expectedContents.get(i);
                        byte[] tmp = new byte[bufRead.limit() - newBlock.totalChecksumBytes()];
                        bufRead.get(tmp, 0, tmp.length);
                        boolean bytesAreCorrect = Bytes.compareTo(tmp, 0, tmp.length, bufExpected.array(), bufExpected.arrayOffset(), bufExpected.limit()) == 0;
                        String wrongBytesMsg = "";
                        if (!bytesAreCorrect) {
                            // Optimization: only construct an error message in case we
                            // will need it.
                            wrongBytesMsg = "Expected bytes in block #" + i + " (algo=" + algo + ", pread=" + pread + ", cacheOnWrite=" + cacheOnWrite + "):\n";
                            wrongBytesMsg += Bytes.toStringBinary(bufExpected.array(), bufExpected.arrayOffset(), Math.min(32 + 10, bufExpected.limit())) + ", actual:\n" + Bytes.toStringBinary(bufRead.array(), bufRead.arrayOffset(), Math.min(32 + 10, bufRead.limit()));
                            if (detailedLogging) {
                                LOG.warn("expected header" + HFileBlock.toStringHeader(new SingleByteBuff(bufExpected)) + "\nfound    header" + HFileBlock.toStringHeader(bufRead));
                                LOG.warn("bufread offset " + bufRead.arrayOffset() + " limit " + bufRead.limit() + " expected offset " + bufExpected.arrayOffset() + " limit " + bufExpected.limit());
                                LOG.warn(wrongBytesMsg);
                            }
                        }
                        assertTrue(wrongBytesMsg, bytesAreCorrect);
                        assertRelease(newBlock);
                        if (newBlock != b) {
                            assertRelease(b);
                        }
                    } else {
                        assertRelease(b);
                    }
                }
                assertEquals(curOffset, fs.getFileStatus(path).getLen());
                is.close();
            }
        }
    }
}

Also used : Compression(org.apache.hadoop.hbase.io.compress.Compression) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) ArrayList(java.util.ArrayList) Random(java.util.Random) MultiByteBuff(org.apache.hadoop.hbase.nio.MultiByteBuff) SingleByteBuff(org.apache.hadoop.hbase.nio.SingleByteBuff) ByteBuff(org.apache.hadoop.hbase.nio.ByteBuff) Path(org.apache.hadoop.fs.Path) ByteBuffer(java.nio.ByteBuffer) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm) SingleByteBuff(org.apache.hadoop.hbase.nio.SingleByteBuff) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataInputStreamWrapper(org.apache.hadoop.hbase.io.FSDataInputStreamWrapper)

Example 8 with Compression

use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.

the class TestSecureBulkLoadManager method prepareHFile.

private void prepareHFile(Path dir, byte[] key, byte[] value) throws Exception {
    TableDescriptor desc = testUtil.getAdmin().getDescriptor(TABLE);
    ColumnFamilyDescriptor family = desc.getColumnFamily(FAMILY);
    Compression.Algorithm compression = HFile.DEFAULT_COMPRESSION_ALGORITHM;
    CacheConfig writerCacheConf = new CacheConfig(conf, family, null, ByteBuffAllocator.HEAP);
    writerCacheConf.setCacheDataOnWrite(false);
    HFileContext hFileContext = new HFileContextBuilder().withIncludesMvcc(false).withIncludesTags(true).withCompression(compression).withCompressTags(family.isCompressTags()).withChecksumType(StoreUtils.getChecksumType(conf)).withBytesPerCheckSum(StoreUtils.getBytesPerChecksum(conf)).withBlockSize(family.getBlocksize()).withHBaseCheckSum(true).withDataBlockEncoding(family.getDataBlockEncoding()).withEncryptionContext(Encryption.Context.NONE).withCreateTime(EnvironmentEdgeManager.currentTime()).build();
    StoreFileWriter.Builder builder = new StoreFileWriter.Builder(conf, writerCacheConf, dir.getFileSystem(conf)).withOutputDir(new Path(dir, family.getNameAsString())).withBloomType(family.getBloomFilterType()).withMaxKeyCount(Integer.MAX_VALUE).withFileContext(hFileContext);
    StoreFileWriter writer = builder.build();
    Put put = new Put(key);
    put.addColumn(FAMILY, COLUMN, value);
    for (Cell c : put.get(FAMILY, COLUMN)) {
        writer.append(c);
    }
    writer.close();
}

Also used : Path(org.apache.hadoop.fs.Path) Compression(org.apache.hadoop.hbase.io.compress.Compression) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor) TableDescriptor(org.apache.hadoop.hbase.client.TableDescriptor) Put(org.apache.hadoop.hbase.client.Put) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig) Cell(org.apache.hadoop.hbase.Cell)

Example 9 with Compression

use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.

the class HBaseTestingUtility method generateColumnDescriptors.

/**
 * Create a set of column descriptors with the combination of compression,
 * encoding, bloom codecs available.
 * @param prefix family names prefix
 * @return the list of column descriptors
 */
public static List<ColumnFamilyDescriptor> generateColumnDescriptors(final String prefix) {
    List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>();
    long familyId = 0;
    for (Compression.Algorithm compressionType : getSupportedCompressionAlgorithms()) {
        for (DataBlockEncoding encodingType : DataBlockEncoding.values()) {
            for (BloomType bloomType : BloomType.values()) {
                String name = String.format("%s-cf-!@#&-%d!@#", prefix, familyId);
                ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(name));
                columnFamilyDescriptorBuilder.setCompressionType(compressionType);
                columnFamilyDescriptorBuilder.setDataBlockEncoding(encodingType);
                columnFamilyDescriptorBuilder.setBloomFilterType(bloomType);
                columnFamilyDescriptors.add(columnFamilyDescriptorBuilder.build());
                familyId++;
            }
        }
    }
    return columnFamilyDescriptors;
}

Also used : DataBlockEncoding(org.apache.hadoop.hbase.io.encoding.DataBlockEncoding) Compression(org.apache.hadoop.hbase.io.compress.Compression) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm) BloomType(org.apache.hadoop.hbase.regionserver.BloomType) ArrayList(java.util.ArrayList) ColumnFamilyDescriptorBuilder(org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder) ColumnFamilyDescriptor(org.apache.hadoop.hbase.client.ColumnFamilyDescriptor)

Example 10 with Compression

use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.

the class HBaseTestingUtil method generateColumnDescriptors.

/**
 * Create a set of column descriptors with the combination of compression, encoding, bloom codecs
 * available.
 * @param prefix family names prefix
 * @return the list of column descriptors
 */
public static List<ColumnFamilyDescriptor> generateColumnDescriptors(final String prefix) {
    List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>();
    long familyId = 0;
    for (Compression.Algorithm compressionType : getSupportedCompressionAlgorithms()) {
        for (DataBlockEncoding encodingType : DataBlockEncoding.values()) {
            for (BloomType bloomType : BloomType.values()) {
                String name = String.format("%s-cf-!@#&-%d!@#", prefix, familyId);
                ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(name));
                columnFamilyDescriptorBuilder.setCompressionType(compressionType);
                columnFamilyDescriptorBuilder.setDataBlockEncoding(encodingType);
                columnFamilyDescriptorBuilder.setBloomFilterType(bloomType);
                columnFamilyDescriptors.add(columnFamilyDescriptorBuilder.build());
                familyId++;
            }
        }
    }
    return columnFamilyDescriptors;
}

Aggregations

Compression (org.apache.hadoop.hbase.io.compress.Compression)16 Path (org.apache.hadoop.fs.Path)9 ArrayList (java.util.ArrayList)7 Algorithm (org.apache.hadoop.hbase.io.compress.Compression.Algorithm)7 Configuration (org.apache.hadoop.conf.Configuration)6 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)6 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)6 FSDataInputStreamWrapper (org.apache.hadoop.hbase.io.FSDataInputStreamWrapper)6 DataBlockEncoding (org.apache.hadoop.hbase.io.encoding.DataBlockEncoding)5 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)4 SingleByteBuff (org.apache.hadoop.hbase.nio.SingleByteBuff)4 DataOutputStream (java.io.DataOutputStream)3 IOException (java.io.IOException)3 ColumnFamilyDescriptor (org.apache.hadoop.hbase.client.ColumnFamilyDescriptor)3 ByteBuff (org.apache.hadoop.hbase.nio.ByteBuff)3 MultiByteBuff (org.apache.hadoop.hbase.nio.MultiByteBuff)3 DataInputStream (java.io.DataInputStream)2 ByteBuffer (java.nio.ByteBuffer)2 Random (java.util.Random)2 Cell (org.apache.hadoop.hbase.Cell)2