use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.
the class TestHFileBlock method testInternals.
private void testInternals() throws IOException {
final int numBlocks = 5;
final Configuration conf = TEST_UTIL.getConfiguration();
if (includesTag) {
conf.setInt("hfile.format.version", 3);
}
for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
for (boolean pread : new boolean[] { false, true }) {
for (DataBlockEncoding encoding : DataBlockEncoding.values()) {
LOG.info("testDataBlockEncoding: Compression algorithm={}, pread={}, dataBlockEncoder={}", algo.toString(), pread, encoding);
Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_" + algo + "_" + encoding.toString());
FSDataOutputStream os = fs.create(path);
HFileDataBlockEncoder dataBlockEncoder = (encoding != DataBlockEncoding.NONE) ? new HFileDataBlockEncoderImpl(encoding) : NoOpDataBlockEncoder.INSTANCE;
HFileContext meta = new HFileContextBuilder().withCompression(algo).withIncludesMvcc(includesMemstoreTS).withIncludesTags(includesTag).withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM).build();
HFileBlock.Writer hbw = new HFileBlock.Writer(conf, dataBlockEncoder, meta);
long totalSize = 0;
final List<Integer> encodedSizes = new ArrayList<>();
final List<ByteBuff> encodedBlocks = new ArrayList<>();
for (int blockId = 0; blockId < numBlocks; ++blockId) {
hbw.startWriting(BlockType.DATA);
writeTestKeyValues(hbw, blockId, includesMemstoreTS, includesTag);
hbw.writeHeaderAndData(os);
int headerLen = HConstants.HFILEBLOCK_HEADER_SIZE;
ByteBuff encodedResultWithHeader = hbw.cloneUncompressedBufferWithHeader();
final int encodedSize = encodedResultWithHeader.limit() - headerLen;
if (encoding != DataBlockEncoding.NONE) {
// We need to account for the two-byte encoding algorithm ID that
// comes after the 24-byte block header but before encoded KVs.
headerLen += DataBlockEncoding.ID_SIZE;
}
encodedSizes.add(encodedSize);
ByteBuff encodedBuf = encodedResultWithHeader.position(headerLen).slice();
encodedBlocks.add(encodedBuf);
totalSize += hbw.getOnDiskSizeWithHeader();
}
os.close();
FSDataInputStream is = fs.open(path);
meta = new HFileContextBuilder().withHBaseCheckSum(true).withCompression(algo).withIncludesMvcc(includesMemstoreTS).withIncludesTags(includesTag).build();
ReaderContext context = new ReaderContextBuilder().withInputStreamWrapper(new FSDataInputStreamWrapper(is)).withFileSize(totalSize).withFilePath(path).withFileSystem(fs).build();
HFileBlock.FSReaderImpl hbr = new HFileBlock.FSReaderImpl(context, meta, alloc, conf);
hbr.setDataBlockEncoder(dataBlockEncoder, conf);
hbr.setIncludesMemStoreTS(includesMemstoreTS);
HFileBlock blockFromHFile, blockUnpacked;
int pos = 0;
for (int blockId = 0; blockId < numBlocks; ++blockId) {
blockFromHFile = hbr.readBlockData(pos, -1, pread, false, true);
assertEquals(0, HFile.getAndResetChecksumFailuresCount());
blockFromHFile.sanityCheck();
pos += blockFromHFile.getOnDiskSizeWithHeader();
assertEquals((int) encodedSizes.get(blockId), blockFromHFile.getUncompressedSizeWithoutHeader());
assertEquals(meta.isCompressedOrEncrypted(), !blockFromHFile.isUnpacked());
long packedHeapsize = blockFromHFile.heapSize();
blockUnpacked = blockFromHFile.unpack(meta, hbr);
assertTrue(blockUnpacked.isUnpacked());
if (meta.isCompressedOrEncrypted()) {
LOG.info("packedHeapsize=" + packedHeapsize + ", unpackedHeadsize=" + blockUnpacked.heapSize());
assertFalse(packedHeapsize == blockUnpacked.heapSize());
assertTrue("Packed heapSize should be < unpacked heapSize", packedHeapsize < blockUnpacked.heapSize());
}
ByteBuff actualBuffer = blockUnpacked.getBufferWithoutHeader();
if (encoding != DataBlockEncoding.NONE) {
// We expect a two-byte big-endian encoding id.
assertEquals("Unexpected first byte with " + buildMessageDetails(algo, encoding, pread), Long.toHexString(0), Long.toHexString(actualBuffer.get(0)));
assertEquals("Unexpected second byte with " + buildMessageDetails(algo, encoding, pread), Long.toHexString(encoding.getId()), Long.toHexString(actualBuffer.get(1)));
actualBuffer.position(2);
actualBuffer = actualBuffer.slice();
}
ByteBuff expectedBuff = encodedBlocks.get(blockId);
expectedBuff.rewind();
// test if content matches, produce nice message
assertBuffersEqual(expectedBuff, actualBuffer, algo, encoding, pread);
// test serialized blocks
for (boolean reuseBuffer : new boolean[] { false, true }) {
ByteBuffer serialized = ByteBuffer.allocate(blockFromHFile.getSerializedLength());
blockFromHFile.serialize(serialized, true);
HFileBlock deserialized = (HFileBlock) blockFromHFile.getDeserializer().deserialize(new SingleByteBuff(serialized), HEAP);
assertEquals("Serialization did not preserve block state. reuseBuffer=" + reuseBuffer, blockFromHFile, deserialized);
// intentional reference comparison
if (blockFromHFile != blockUnpacked) {
assertEquals("Deserialized block cannot be unpacked correctly.", blockUnpacked, deserialized.unpack(meta, hbr));
}
}
assertRelease(blockUnpacked);
if (blockFromHFile != blockUnpacked) {
blockFromHFile.release();
}
}
is.close();
}
}
}
}
use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.
the class TestHFileBlock method testPreviousOffsetInternals.
protected void testPreviousOffsetInternals() throws IOException {
// TODO: parameterize these nested loops.
Configuration conf = TEST_UTIL.getConfiguration();
for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
for (boolean pread : BOOLEAN_VALUES) {
for (boolean cacheOnWrite : BOOLEAN_VALUES) {
Random rand = defaultRandom();
LOG.info("testPreviousOffset: Compression algorithm={}, pread={}, cacheOnWrite={}", algo.toString(), pread, cacheOnWrite);
Path path = new Path(TEST_UTIL.getDataTestDir(), "prev_offset");
List<Long> expectedOffsets = new ArrayList<>();
List<Long> expectedPrevOffsets = new ArrayList<>();
List<BlockType> expectedTypes = new ArrayList<>();
List<ByteBuffer> expectedContents = cacheOnWrite ? new ArrayList<>() : null;
long totalSize = writeBlocks(TEST_UTIL.getConfiguration(), rand, algo, path, expectedOffsets, expectedPrevOffsets, expectedTypes, expectedContents);
FSDataInputStream is = fs.open(path);
HFileContext meta = new HFileContextBuilder().withHBaseCheckSum(true).withIncludesMvcc(includesMemstoreTS).withIncludesTags(includesTag).withCompression(algo).build();
ReaderContext context = new ReaderContextBuilder().withInputStreamWrapper(new FSDataInputStreamWrapper(is)).withFileSize(totalSize).withFilePath(path).withFileSystem(fs).build();
HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(context, meta, alloc, conf);
long curOffset = 0;
for (int i = 0; i < NUM_TEST_BLOCKS; ++i) {
if (!pread) {
assertEquals(is.getPos(), curOffset + (i == 0 ? 0 : HConstants.HFILEBLOCK_HEADER_SIZE));
}
assertEquals(expectedOffsets.get(i).longValue(), curOffset);
if (detailedLogging) {
LOG.info("Reading block #" + i + " at offset " + curOffset);
}
HFileBlock b = hbr.readBlockData(curOffset, -1, pread, false, false);
if (detailedLogging) {
LOG.info("Block #" + i + ": " + b);
}
assertEquals("Invalid block #" + i + "'s type:", expectedTypes.get(i), b.getBlockType());
assertEquals("Invalid previous block offset for block " + i + " of " + "type " + b.getBlockType() + ":", (long) expectedPrevOffsets.get(i), b.getPrevBlockOffset());
b.sanityCheck();
assertEquals(curOffset, b.getOffset());
// Now re-load this block knowing the on-disk size. This tests a
// different branch in the loader.
HFileBlock b2 = hbr.readBlockData(curOffset, b.getOnDiskSizeWithHeader(), pread, false, false);
b2.sanityCheck();
assertEquals(b.getBlockType(), b2.getBlockType());
assertEquals(b.getOnDiskSizeWithoutHeader(), b2.getOnDiskSizeWithoutHeader());
assertEquals(b.getOnDiskSizeWithHeader(), b2.getOnDiskSizeWithHeader());
assertEquals(b.getUncompressedSizeWithoutHeader(), b2.getUncompressedSizeWithoutHeader());
assertEquals(b.getPrevBlockOffset(), b2.getPrevBlockOffset());
assertEquals(curOffset, b2.getOffset());
assertEquals(b.getBytesPerChecksum(), b2.getBytesPerChecksum());
assertEquals(b.getOnDiskDataSizeWithHeader(), b2.getOnDiskDataSizeWithHeader());
assertEquals(0, HFile.getAndResetChecksumFailuresCount());
assertRelease(b2);
curOffset += b.getOnDiskSizeWithHeader();
if (cacheOnWrite) {
// NOTE: cache-on-write testing doesn't actually involve a BlockCache. It simply
// verifies that the unpacked value read back off disk matches the unpacked value
// generated before writing to disk.
HFileBlock newBlock = b.unpack(meta, hbr);
// b's buffer has header + data + checksum while
// expectedContents have header + data only
ByteBuff bufRead = newBlock.getBufferReadOnly();
ByteBuffer bufExpected = expectedContents.get(i);
byte[] tmp = new byte[bufRead.limit() - newBlock.totalChecksumBytes()];
bufRead.get(tmp, 0, tmp.length);
boolean bytesAreCorrect = Bytes.compareTo(tmp, 0, tmp.length, bufExpected.array(), bufExpected.arrayOffset(), bufExpected.limit()) == 0;
String wrongBytesMsg = "";
if (!bytesAreCorrect) {
// Optimization: only construct an error message in case we
// will need it.
wrongBytesMsg = "Expected bytes in block #" + i + " (algo=" + algo + ", pread=" + pread + ", cacheOnWrite=" + cacheOnWrite + "):\n";
wrongBytesMsg += Bytes.toStringBinary(bufExpected.array(), bufExpected.arrayOffset(), Math.min(32 + 10, bufExpected.limit())) + ", actual:\n" + Bytes.toStringBinary(bufRead.array(), bufRead.arrayOffset(), Math.min(32 + 10, bufRead.limit()));
if (detailedLogging) {
LOG.warn("expected header" + HFileBlock.toStringHeader(new SingleByteBuff(bufExpected)) + "\nfound header" + HFileBlock.toStringHeader(bufRead));
LOG.warn("bufread offset " + bufRead.arrayOffset() + " limit " + bufRead.limit() + " expected offset " + bufExpected.arrayOffset() + " limit " + bufExpected.limit());
LOG.warn(wrongBytesMsg);
}
}
assertTrue(wrongBytesMsg, bytesAreCorrect);
assertRelease(newBlock);
if (newBlock != b) {
assertRelease(b);
}
} else {
assertRelease(b);
}
}
assertEquals(curOffset, fs.getFileStatus(path).getLen());
is.close();
}
}
}
}
use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.
the class TestSecureBulkLoadManager method prepareHFile.
private void prepareHFile(Path dir, byte[] key, byte[] value) throws Exception {
TableDescriptor desc = testUtil.getAdmin().getDescriptor(TABLE);
ColumnFamilyDescriptor family = desc.getColumnFamily(FAMILY);
Compression.Algorithm compression = HFile.DEFAULT_COMPRESSION_ALGORITHM;
CacheConfig writerCacheConf = new CacheConfig(conf, family, null, ByteBuffAllocator.HEAP);
writerCacheConf.setCacheDataOnWrite(false);
HFileContext hFileContext = new HFileContextBuilder().withIncludesMvcc(false).withIncludesTags(true).withCompression(compression).withCompressTags(family.isCompressTags()).withChecksumType(StoreUtils.getChecksumType(conf)).withBytesPerCheckSum(StoreUtils.getBytesPerChecksum(conf)).withBlockSize(family.getBlocksize()).withHBaseCheckSum(true).withDataBlockEncoding(family.getDataBlockEncoding()).withEncryptionContext(Encryption.Context.NONE).withCreateTime(EnvironmentEdgeManager.currentTime()).build();
StoreFileWriter.Builder builder = new StoreFileWriter.Builder(conf, writerCacheConf, dir.getFileSystem(conf)).withOutputDir(new Path(dir, family.getNameAsString())).withBloomType(family.getBloomFilterType()).withMaxKeyCount(Integer.MAX_VALUE).withFileContext(hFileContext);
StoreFileWriter writer = builder.build();
Put put = new Put(key);
put.addColumn(FAMILY, COLUMN, value);
for (Cell c : put.get(FAMILY, COLUMN)) {
writer.append(c);
}
writer.close();
}
use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.
the class HBaseTestingUtility method generateColumnDescriptors.
/**
* Create a set of column descriptors with the combination of compression,
* encoding, bloom codecs available.
* @param prefix family names prefix
* @return the list of column descriptors
*/
public static List<ColumnFamilyDescriptor> generateColumnDescriptors(final String prefix) {
List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>();
long familyId = 0;
for (Compression.Algorithm compressionType : getSupportedCompressionAlgorithms()) {
for (DataBlockEncoding encodingType : DataBlockEncoding.values()) {
for (BloomType bloomType : BloomType.values()) {
String name = String.format("%s-cf-!@#&-%d!@#", prefix, familyId);
ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(name));
columnFamilyDescriptorBuilder.setCompressionType(compressionType);
columnFamilyDescriptorBuilder.setDataBlockEncoding(encodingType);
columnFamilyDescriptorBuilder.setBloomFilterType(bloomType);
columnFamilyDescriptors.add(columnFamilyDescriptorBuilder.build());
familyId++;
}
}
}
return columnFamilyDescriptors;
}
use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.
the class HBaseTestingUtil method generateColumnDescriptors.
/**
* Create a set of column descriptors with the combination of compression, encoding, bloom codecs
* available.
* @param prefix family names prefix
* @return the list of column descriptors
*/
public static List<ColumnFamilyDescriptor> generateColumnDescriptors(final String prefix) {
List<ColumnFamilyDescriptor> columnFamilyDescriptors = new ArrayList<>();
long familyId = 0;
for (Compression.Algorithm compressionType : getSupportedCompressionAlgorithms()) {
for (DataBlockEncoding encodingType : DataBlockEncoding.values()) {
for (BloomType bloomType : BloomType.values()) {
String name = String.format("%s-cf-!@#&-%d!@#", prefix, familyId);
ColumnFamilyDescriptorBuilder columnFamilyDescriptorBuilder = ColumnFamilyDescriptorBuilder.newBuilder(Bytes.toBytes(name));
columnFamilyDescriptorBuilder.setCompressionType(compressionType);
columnFamilyDescriptorBuilder.setDataBlockEncoding(encodingType);
columnFamilyDescriptorBuilder.setBloomFilterType(bloomType);
columnFamilyDescriptors.add(columnFamilyDescriptorBuilder.build());
familyId++;
}
}
}
return columnFamilyDescriptors;
}
Aggregations