Examples with Compression - org.apache.hadoop.hbase.io.compress.Compression

Example 11 with Compression

use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.

the class HBaseTestingUtil method getSupportedCompressionAlgorithms.

/**
 * Get supported compression algorithms.
 * @return supported compression algorithms.
 */
public static Compression.Algorithm[] getSupportedCompressionAlgorithms() {
    String[] allAlgos = HFile.getSupportedCompressionAlgorithms();
    List<Compression.Algorithm> supportedAlgos = new ArrayList<>();
    for (String algoName : allAlgos) {
        try {
            Compression.Algorithm algo = Compression.getCompressionAlgorithmByName(algoName);
            algo.getCompressor();
            supportedAlgos.add(algo);
        } catch (Throwable t) {
        // this algo is not available
        }
    }
    return supportedAlgos.toArray(new Algorithm[supportedAlgos.size()]);
}

Also used : Compression(org.apache.hadoop.hbase.io.compress.Compression) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm) ArrayList(java.util.ArrayList) SplitAlgorithm(org.apache.hadoop.hbase.util.RegionSplitter.SplitAlgorithm) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm)

Example 12 with Compression

use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.

the class TestHFileBlock method testReaderV2Internals.

protected void testReaderV2Internals() throws IOException {
    final Configuration conf = TEST_UTIL.getConfiguration();
    if (includesTag) {
        conf.setInt("hfile.format.version", 3);
    }
    for (Compression.Algorithm algo : COMPRESSION_ALGORITHMS) {
        for (boolean pread : new boolean[] { false, true }) {
            LOG.info("testReaderV2: Compression algorithm: " + algo + ", pread=" + pread);
            Path path = new Path(TEST_UTIL.getDataTestDir(), "blocks_v2_" + algo);
            FSDataOutputStream os = fs.create(path);
            HFileContext meta = new HFileContextBuilder().withCompression(algo).withIncludesMvcc(includesMemstoreTS).withIncludesTags(includesTag).withBytesPerCheckSum(HFile.DEFAULT_BYTES_PER_CHECKSUM).build();
            HFileBlock.Writer hbw = new HFileBlock.Writer(conf, null, meta);
            long totalSize = 0;
            for (int blockId = 0; blockId < 2; ++blockId) {
                DataOutputStream dos = hbw.startWriting(BlockType.DATA);
                for (int i = 0; i < 1234; ++i) dos.writeInt(i);
                hbw.writeHeaderAndData(os);
                totalSize += hbw.getOnDiskSizeWithHeader();
            }
            os.close();
            FSDataInputStream is = fs.open(path);
            meta = new HFileContextBuilder().withHBaseCheckSum(true).withIncludesMvcc(includesMemstoreTS).withIncludesTags(includesTag).withCompression(algo).build();
            ReaderContext context = new ReaderContextBuilder().withInputStreamWrapper(new FSDataInputStreamWrapper(is)).withFileSize(totalSize).withFilePath(path).withFileSystem(fs).build();
            HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(context, meta, alloc, TEST_UTIL.getConfiguration());
            HFileBlock b = hbr.readBlockData(0, -1, pread, false, true);
            is.close();
            assertEquals(0, HFile.getAndResetChecksumFailuresCount());
            b.sanityCheck();
            assertEquals(4936, b.getUncompressedSizeWithoutHeader());
            assertEquals(algo == GZ ? 2173 : 4936, b.getOnDiskSizeWithoutHeader() - b.totalChecksumBytes());
            HFileBlock expected = b;
            if (algo == GZ) {
                is = fs.open(path);
                ReaderContext readerContext = new ReaderContextBuilder().withInputStreamWrapper(new FSDataInputStreamWrapper(is)).withFileSize(totalSize).withFilePath(path).withFileSystem(fs).build();
                hbr = new HFileBlock.FSReaderImpl(readerContext, meta, alloc, TEST_UTIL.getConfiguration());
                b = hbr.readBlockData(0, 2173 + HConstants.HFILEBLOCK_HEADER_SIZE + b.totalChecksumBytes(), pread, false, true);
                assertEquals(expected, b);
                int wrongCompressedSize = 2172;
                try {
                    hbr.readBlockData(0, wrongCompressedSize + HConstants.HFILEBLOCK_HEADER_SIZE, pread, false, true);
                    fail("Exception expected");
                } catch (IOException ex) {
                    String expectedPrefix = "Passed in onDiskSizeWithHeader=";
                    assertTrue("Invalid exception message: '" + ex.getMessage() + "'.\nMessage is expected to start with: '" + expectedPrefix + "'", ex.getMessage().startsWith(expectedPrefix));
                }
                assertRelease(b);
                is.close();
            }
            assertRelease(expected);
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) Compression(org.apache.hadoop.hbase.io.compress.Compression) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) DataOutputStream(java.io.DataOutputStream) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) IOException(java.io.IOException) Algorithm(org.apache.hadoop.hbase.io.compress.Compression.Algorithm) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataInputStreamWrapper(org.apache.hadoop.hbase.io.FSDataInputStreamWrapper) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream)

Example 13 with Compression

use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.

the class HBaseTestingUtility method getSupportedCompressionAlgorithms.

/**
 * Get supported compression algorithms.
 * @return supported compression algorithms.
 */
public static Compression.Algorithm[] getSupportedCompressionAlgorithms() {
    String[] allAlgos = HFile.getSupportedCompressionAlgorithms();
    List<Compression.Algorithm> supportedAlgos = new ArrayList<>();
    for (String algoName : allAlgos) {
        try {
            Compression.Algorithm algo = Compression.getCompressionAlgorithmByName(algoName);
            algo.getCompressor();
            supportedAlgos.add(algo);
        } catch (Throwable t) {
        // this algo is not available
        }
    }
    return supportedAlgos.toArray(new Algorithm[supportedAlgos.size()]);
}

Example 14 with Compression

use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.

the class CreateRandomStoreFile method run.

/**
 * Runs the tools.
 *
 * @param args command-line arguments
 * @return true in case of success
 * @throws IOException
 */
public boolean run(String[] args) throws IOException {
    options.addOption(OUTPUT_DIR_OPTION, "output_dir", true, "Output directory");
    options.addOption(NUM_KV_OPTION, "num_kv", true, "Number of key/value pairs");
    options.addOption(KEY_SIZE_OPTION, "key_size", true, "Average key size");
    options.addOption(VALUE_SIZE_OPTION, "value_size", true, "Average value size");
    options.addOption(HFILE_VERSION_OPTION, "hfile_version", true, "HFile version to create");
    options.addOption(COMPRESSION_OPTION, "compression", true, " Compression type, one of " + Arrays.toString(Compression.Algorithm.values()));
    options.addOption(BLOOM_FILTER_OPTION, "bloom_filter", true, "Bloom filter type, one of " + Arrays.toString(BloomType.values()));
    options.addOption(BLOOM_FILTER_PARAM_OPTION, "bloom_param", true, "the parameter of the bloom filter");
    options.addOption(BLOCK_SIZE_OPTION, "block_size", true, "HFile block size");
    options.addOption(BLOOM_BLOCK_SIZE_OPTION, "bloom_block_size", true, "Compound Bloom filters block size");
    options.addOption(INDEX_BLOCK_SIZE_OPTION, "index_block_size", true, "Index block size");
    if (args.length == 0) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(CreateRandomStoreFile.class.getSimpleName(), options, true);
        return false;
    }
    CommandLineParser parser = new PosixParser();
    CommandLine cmdLine;
    try {
        cmdLine = parser.parse(options, args);
    } catch (ParseException ex) {
        LOG.error(ex.toString(), ex);
        return false;
    }
    if (!cmdLine.hasOption(OUTPUT_DIR_OPTION)) {
        LOG.error("Output directory is not specified");
        return false;
    }
    if (!cmdLine.hasOption(NUM_KV_OPTION)) {
        LOG.error("The number of keys/values not specified");
        return false;
    }
    if (!cmdLine.hasOption(KEY_SIZE_OPTION)) {
        LOG.error("Key size is not specified");
        return false;
    }
    if (!cmdLine.hasOption(VALUE_SIZE_OPTION)) {
        LOG.error("Value size not specified");
        return false;
    }
    Configuration conf = HBaseConfiguration.create();
    Path outputDir = new Path(cmdLine.getOptionValue(OUTPUT_DIR_OPTION));
    long numKV = Long.parseLong(cmdLine.getOptionValue(NUM_KV_OPTION));
    configureKeyValue(numKV, Integer.parseInt(cmdLine.getOptionValue(KEY_SIZE_OPTION)), Integer.parseInt(cmdLine.getOptionValue(VALUE_SIZE_OPTION)));
    FileSystem fs = FileSystem.get(conf);
    Compression.Algorithm compr = Compression.Algorithm.NONE;
    if (cmdLine.hasOption(COMPRESSION_OPTION)) {
        compr = Compression.Algorithm.valueOf(cmdLine.getOptionValue(COMPRESSION_OPTION));
    }
    BloomType bloomType = BloomType.NONE;
    if (cmdLine.hasOption(BLOOM_FILTER_OPTION)) {
        bloomType = BloomType.valueOf(cmdLine.getOptionValue(BLOOM_FILTER_OPTION));
    }
    if (bloomType == BloomType.ROWPREFIX_FIXED_LENGTH) {
        if (!cmdLine.hasOption(BLOOM_FILTER_PARAM_OPTION)) {
            LOG.error("the parameter of bloom filter is not specified");
            return false;
        } else {
            conf.set(BloomFilterUtil.PREFIX_LENGTH_KEY, cmdLine.getOptionValue(BLOOM_FILTER_PARAM_OPTION));
        }
    }
    int blockSize = HConstants.DEFAULT_BLOCKSIZE;
    if (cmdLine.hasOption(BLOCK_SIZE_OPTION))
        blockSize = Integer.valueOf(cmdLine.getOptionValue(BLOCK_SIZE_OPTION));
    if (cmdLine.hasOption(BLOOM_BLOCK_SIZE_OPTION)) {
        conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE, Integer.valueOf(cmdLine.getOptionValue(BLOOM_BLOCK_SIZE_OPTION)));
    }
    if (cmdLine.hasOption(INDEX_BLOCK_SIZE_OPTION)) {
        conf.setInt(HFileBlockIndex.MAX_CHUNK_SIZE_KEY, Integer.valueOf(cmdLine.getOptionValue(INDEX_BLOCK_SIZE_OPTION)));
    }
    HFileContext meta = new HFileContextBuilder().withCompression(compr).withBlockSize(blockSize).build();
    StoreFileWriter sfw = new StoreFileWriter.Builder(conf, new CacheConfig(conf), fs).withOutputDir(outputDir).withBloomType(bloomType).withMaxKeyCount(numKV).withFileContext(meta).build();
    rand = new Random();
    LOG.info("Writing " + numKV + " key/value pairs");
    for (long i = 0; i < numKV; ++i) {
        sfw.append(generateKeyValue(i));
    }
    int numMetaBlocks = rand.nextInt(10) + 1;
    LOG.info("Writing " + numMetaBlocks + " meta blocks");
    for (int metaI = 0; metaI < numMetaBlocks; ++metaI) {
        sfw.getHFileWriter().appendMetaBlock(generateString(), new BytesWritable(generateValue()));
    }
    sfw.close();
    Path storeFilePath = sfw.getPath();
    long fileSize = fs.getFileStatus(storeFilePath).getLen();
    LOG.info("Created {}, {} bytes, compression={}", storeFilePath, fileSize, compr.toString());
    return true;
}

Also used : Path(org.apache.hadoop.fs.Path) Compression(org.apache.hadoop.hbase.io.compress.Compression) Configuration(org.apache.hadoop.conf.Configuration) HBaseConfiguration(org.apache.hadoop.hbase.HBaseConfiguration) PosixParser(org.apache.hbase.thirdparty.org.apache.commons.cli.PosixParser) HFileContextBuilder(org.apache.hadoop.hbase.io.hfile.HFileContextBuilder) BytesWritable(org.apache.hadoop.io.BytesWritable) HFileContext(org.apache.hadoop.hbase.io.hfile.HFileContext) HelpFormatter(org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine) Random(java.util.Random) FileSystem(org.apache.hadoop.fs.FileSystem) CommandLineParser(org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser) ParseException(org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException) CacheConfig(org.apache.hadoop.hbase.io.hfile.CacheConfig)

Example 15 with Compression

use of org.apache.hadoop.hbase.io.compress.Compression in project hbase by apache.

the class TestChecksum method testChecksumInternals.

protected void testChecksumInternals(boolean useTags) throws IOException {
    Compression.Algorithm algo = NONE;
    for (boolean pread : new boolean[] { false, true }) {
        for (int bytesPerChecksum : BYTES_PER_CHECKSUM) {
            Path path = new Path(TEST_UTIL.getDataTestDir(), "checksumChunk_" + algo + bytesPerChecksum);
            FSDataOutputStream os = fs.create(path);
            HFileContext meta = new HFileContextBuilder().withCompression(algo).withIncludesMvcc(true).withIncludesTags(useTags).withHBaseCheckSum(true).withBytesPerCheckSum(bytesPerChecksum).build();
            HFileBlock.Writer hbw = new HFileBlock.Writer(TEST_UTIL.getConfiguration(), null, meta);
            // write one block. The block has data
            // that is at least 6 times more than the checksum chunk size
            long dataSize = 0;
            DataOutputStream dos = hbw.startWriting(BlockType.DATA);
            for (; dataSize < 6 * bytesPerChecksum; ) {
                for (int i = 0; i < 1234; ++i) {
                    dos.writeInt(i);
                    dataSize += 4;
                }
            }
            hbw.writeHeaderAndData(os);
            long totalSize = hbw.getOnDiskSizeWithHeader();
            os.close();
            long expectedChunks = ChecksumUtil.numChunks(dataSize + HConstants.HFILEBLOCK_HEADER_SIZE, bytesPerChecksum);
            LOG.info("testChecksumChunks: pread={}, bytesPerChecksum={}, fileSize={}, " + "dataSize={}, expectedChunks={}, compression={}", pread, bytesPerChecksum, totalSize, dataSize, expectedChunks, algo.toString());
            // Verify hbase checksums.
            assertEquals(true, hfs.useHBaseChecksum());
            // Read data back from file.
            FSDataInputStream is = fs.open(path);
            FSDataInputStream nochecksum = hfs.getNoChecksumFs().open(path);
            meta = new HFileContextBuilder().withCompression(algo).withIncludesMvcc(true).withIncludesTags(useTags).withHBaseCheckSum(true).withBytesPerCheckSum(bytesPerChecksum).build();
            ReaderContext context = new ReaderContextBuilder().withInputStreamWrapper(new FSDataInputStreamWrapper(is, nochecksum)).withFileSize(totalSize).withFileSystem(hfs).withFilePath(path).build();
            HFileBlock.FSReader hbr = new HFileBlock.FSReaderImpl(context, meta, ByteBuffAllocator.HEAP, TEST_UTIL.getConfiguration());
            HFileBlock b = hbr.readBlockData(0, -1, pread, false, true);
            assertTrue(b.getBufferReadOnly() instanceof SingleByteBuff);
            is.close();
            b.sanityCheck();
            assertEquals(dataSize, b.getUncompressedSizeWithoutHeader());
            // verify that we have the expected number of checksum chunks
            assertEquals(totalSize, HConstants.HFILEBLOCK_HEADER_SIZE + dataSize + expectedChunks * HFileBlock.CHECKSUM_SIZE);
            // assert that we did not encounter hbase checksum verification failures
            assertEquals(0, HFile.getAndResetChecksumFailuresCount());
        }
    }
}

Also used : Path(org.apache.hadoop.fs.Path) Compression(org.apache.hadoop.hbase.io.compress.Compression) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream) DataOutputStream(java.io.DataOutputStream) SingleByteBuff(org.apache.hadoop.hbase.nio.SingleByteBuff) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) FSDataInputStreamWrapper(org.apache.hadoop.hbase.io.FSDataInputStreamWrapper) FSDataOutputStream(org.apache.hadoop.fs.FSDataOutputStream)

Aggregations

Compression (org.apache.hadoop.hbase.io.compress.Compression)16 Path (org.apache.hadoop.fs.Path)9 ArrayList (java.util.ArrayList)7 Algorithm (org.apache.hadoop.hbase.io.compress.Compression.Algorithm)7 Configuration (org.apache.hadoop.conf.Configuration)6 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)6 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)6 FSDataInputStreamWrapper (org.apache.hadoop.hbase.io.FSDataInputStreamWrapper)6 DataBlockEncoding (org.apache.hadoop.hbase.io.encoding.DataBlockEncoding)5 HBaseConfiguration (org.apache.hadoop.hbase.HBaseConfiguration)4 SingleByteBuff (org.apache.hadoop.hbase.nio.SingleByteBuff)4 DataOutputStream (java.io.DataOutputStream)3 IOException (java.io.IOException)3 ColumnFamilyDescriptor (org.apache.hadoop.hbase.client.ColumnFamilyDescriptor)3 ByteBuff (org.apache.hadoop.hbase.nio.ByteBuff)3 MultiByteBuff (org.apache.hadoop.hbase.nio.MultiByteBuff)3 DataInputStream (java.io.DataInputStream)2 ByteBuffer (java.nio.ByteBuffer)2 Random (java.util.Random)2 Cell (org.apache.hadoop.hbase.Cell)2