Search in sources :

Example 11 with CompressionOutputStream

use of in project hadoop by apache.

the class TestZStandardCompressorDecompressor method testCompressingWithOneByteOutputBuffer.

public void testCompressingWithOneByteOutputBuffer() throws Exception {
    int uncompressedSize = (int) FileUtils.sizeOf(uncompressedFile);
    byte[] bytes = FileUtils.readFileToByteArray(uncompressedFile);
    assertEquals(uncompressedSize, bytes.length);
    Configuration conf = new Configuration();
    ZStandardCodec codec = new ZStandardCodec();
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    Compressor compressor = new ZStandardCompressor(3, IO_FILE_BUFFER_SIZE_DEFAULT, 1);
    CompressionOutputStream outputStream = codec.createOutputStream(baos, compressor);
    for (byte aByte : bytes) {
    assertEquals(uncompressedSize, compressor.getBytesRead());
    // just make sure we can decompress the file
    ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    Decompressor decompressor = codec.createDecompressor();
    CompressionInputStream inputStream = codec.createInputStream(bais, decompressor);
    byte[] buffer = new byte[100];
    int n = buffer.length;
    while ((n =, 0, n)) != -1) {
        byteArrayOutputStream.write(buffer, 0, n);
    assertArrayEquals(bytes, byteArrayOutputStream.toByteArray());
Also used : CompressionOutputStream( Decompressor( Configuration(org.apache.hadoop.conf.Configuration) ByteArrayInputStream( CompressionInputStream( Compressor( ZStandardCodec( ByteArrayOutputStream( Test(org.junit.Test)

Example 12 with CompressionOutputStream

use of in project ignite by apache.

the class HadoopSnappyTest method checkSnappy.

 * Internal check routine.
 * @throws Throwable If failed.
public static void checkSnappy() throws Throwable {
    try {
        byte[] expBytes = new byte[BYTE_SIZE];
        byte[] actualBytes = new byte[BYTE_SIZE];
        for (int i = 0; i < expBytes.length; i++) expBytes[i] = (byte) ThreadLocalRandom.current().nextInt(16);
        SnappyCodec codec = new SnappyCodec();
        codec.setConf(new Configuration());
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        try (CompressionOutputStream cos = codec.createOutputStream(baos)) {
        try (CompressionInputStream cis = codec.createInputStream(new ByteArrayInputStream(baos.toByteArray()))) {
            int read =, 0, actualBytes.length);
            assert read == actualBytes.length;
        assert Arrays.equals(expBytes, actualBytes);
    } catch (Throwable e) {
        System.out.println("Snappy check failed:");
        System.out.println("### NativeCodeLoader.isNativeCodeLoaded:  " + NativeCodeLoader.isNativeCodeLoaded());
        System.out.println("### SnappyCompressor.isNativeCodeLoaded:  " + SnappyCompressor.isNativeCodeLoaded());
        throw e;
Also used : CompressionOutputStream( Configuration(org.apache.hadoop.conf.Configuration) CompressionInputStream( ByteArrayInputStream( ByteArrayOutputStream( SnappyCodec(

Example 13 with CompressionOutputStream

use of in project mongo-hadoop by mongodb.

the class BSONSplitter method run.

 * When run as a Tool, BSONSplitter can be used to pre-split and compress
 * BSON files. This can be especially useful before uploading large BSON
 * files to HDFS to save time. The compressed splits are written to the
 * given output path or to the directory containing the input file, if
 * the output path is unspecified. A ".splits" file is not generated, since
 * each output file is expected to be its own split.
 * @param args command-line arguments. Run with zero arguments to see usage.
 * @return exit status
 * @throws Exception
public int run(final String[] args) throws Exception {
    if (args.length < 1) {
        return 1;
    // Parse command-line arguments.
    Path filePath = new Path(args[0]);
    String compressorName = null, outputDirectoryStr = null;
    Path outputDirectory;
    CompressionCodec codec;
    Compressor compressor;
    for (int i = 1; i < args.length; ++i) {
        if ("-c".equals(args[i]) && args.length > i) {
            compressorName = args[++i];
        } else if ("-o".equals(args[i]) && args.length > i) {
            outputDirectoryStr = args[++i];
        } else {
            // CHECKSTYLE:OFF
            System.err.println("unrecognized option: " + args[i]);
            // CHECKSTYLE:ON
            return 1;
    // Supply default values for unspecified arguments.
    if (null == outputDirectoryStr) {
        outputDirectory = filePath.getParent();
    } else {
        outputDirectory = new Path(outputDirectoryStr);
    if (null == compressorName) {
        codec = new DefaultCodec();
    } else {
        Class<?> codecClass = Class.forName(compressorName);
        codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, getConf());
    if (codec instanceof Configurable) {
        ((Configurable) codec).setConf(getConf());
    // Do not write a .splits file so as not to confuse BSONSplitter.
    // Each compressed file will be its own split.
    MongoConfigUtil.setBSONWriteSplits(getConf(), false);
    // Open the file.
    FileSystem inputFS = FileSystem.get(filePath.toUri(), getConf());
    FileSystem outputFS = FileSystem.get(outputDirectory.toUri(), getConf());
    FSDataInputStream inputStream =;
    // Use BSONSplitter to split the file.
    Path splitFilePath = getSplitsFilePath(filePath, getConf());
    try {
        loadSplitsFromSplitFile(inputFS.getFileStatus(filePath), splitFilePath);
    } catch (NoSplitFileException e) {"did not find .splits file in " + splitFilePath.toUri());
    List<BSONFileSplit> splits = getAllSplits();"compressing " + splits.size() + " splits.");
    byte[] buf = new byte[1024 * 1024];
    for (int i = 0; i < splits.size(); ++i) {
        // e.g., hdfs:///user/hive/warehouse/mongo/OutputFile-42.bz2
        Path splitOutputPath = new Path(outputDirectory, filePath.getName() + "-" + i + codec.getDefaultExtension());
        // Compress the split into a new file.
        compressor = CodecPool.getCompressor(codec);
        CompressionOutputStream compressionOutputStream = null;
        try {
            compressionOutputStream = codec.createOutputStream(outputFS.create(splitOutputPath), compressor);
            int totalBytes = 0, bytesRead = 0;
            BSONFileSplit split = splits.get(i);
  "writing " + splitOutputPath.toUri() + ".");
            while (totalBytes < split.getLength() && bytesRead >= 0) {
                bytesRead =, 0, (int) Math.min(buf.length, split.getLength() - totalBytes));
                if (bytesRead > 0) {
                    compressionOutputStream.write(buf, 0, bytesRead);
                    totalBytes += bytesRead;
        } finally {
            if (compressionOutputStream != null) {
    return 0;
Also used : Path(org.apache.hadoop.fs.Path) CompressionOutputStream( BSONFileSplit(com.mongodb.hadoop.input.BSONFileSplit) Compressor( DefaultCodec( Configurable(org.apache.hadoop.conf.Configurable) FileSystem(org.apache.hadoop.fs.FileSystem) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) CompressionCodec(

Example 14 with CompressionOutputStream

use of in project hbase by apache.

the class DataBlockEncodingTool method benchmarkAlgorithm.

 * Check decompress performance of a given algorithm and print it.
 * @param algorithm Compression algorithm.
 * @param name Name of algorithm.
 * @param buffer Buffer to be compressed.
 * @param offset Position of the beginning of the data.
 * @param length Length of data in buffer.
 * @throws IOException
public void benchmarkAlgorithm(Compression.Algorithm algorithm, String name, byte[] buffer, int offset, int length) throws IOException {
    System.out.println(name + ":");
    // compress it
    List<Long> compressDurations = new ArrayList<>();
    ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();
    CompressionOutputStream compressingStream = algorithm.createPlainCompressionStream(compressedStream, compressor);
    try {
        for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
            final long startTime = System.nanoTime();
            // The compressedStream should reset before compressingStream resetState since in GZ
            // resetStatue will write header in the outputstream.
            compressingStream.write(buffer, offset, length);
            final long finishTime = System.nanoTime();
            // add time record
            if (itTime >= benchmarkNOmit) {
                compressDurations.add(finishTime - startTime);
    } catch (IOException e) {
        throw new RuntimeException(String.format("Benchmark, or encoding algorithm '%s' cause some stream problems", name), e);
    printBenchmarkResult(length, compressDurations, Manipulation.COMPRESSION);
    byte[] compBuffer = compressedStream.toByteArray();
    // uncompress it several times and measure performance
    List<Long> durations = new ArrayList<>();
    for (int itTime = 0; itTime < benchmarkNTimes; ++itTime) {
        final long startTime = System.nanoTime();
        byte[] newBuf = new byte[length + 1];
        try {
            ByteArrayInputStream downStream = new ByteArrayInputStream(compBuffer, 0, compBuffer.length);
            InputStream decompressedStream = algorithm.createDecompressionStream(downStream, decompressor, 0);
            int destOffset = 0;
            int nextChunk;
            while ((nextChunk = decompressedStream.available()) > 0) {
                destOffset +=, destOffset, nextChunk);
        } catch (IOException e) {
            throw new RuntimeException(String.format("Decoding path in '%s' algorithm cause exception ", name), e);
        final long finishTime = System.nanoTime();
        // check correctness
        if (0 != Bytes.compareTo(buffer, 0, length, newBuf, 0, length)) {
            int prefix = 0;
            for (; prefix < buffer.length && prefix < newBuf.length; ++prefix) {
                if (buffer[prefix] != newBuf[prefix]) {
            throw new RuntimeException(String.format("Algorithm '%s' is corrupting the data", name));
        // add time record
        if (itTime >= benchmarkNOmit) {
            durations.add(finishTime - startTime);
    printBenchmarkResult(length, durations, Manipulation.DECOMPRESSION);
Also used : CompressionOutputStream( ByteArrayInputStream( ByteArrayInputStream( InputStream( ArrayList(java.util.ArrayList) ByteArrayOutputStream( IOException(

Example 15 with CompressionOutputStream

use of in project brisk by riptano.

the class CompressionTests method testSnappyCompression.

public void testSnappyCompression() throws IOException {
    SnappyCodec c = new SnappyCodec(new Configuration());
    byte[] inmsg = new byte[1024 * 1024 * 10];
    byte[] buffer = new byte[1024 * 1024];
    byte[] outmsg = new byte[1024 * 1024 * 16];
    for (int k = 0; k < 64; k++) {
        ByteArrayOutputStream bout = new ByteArrayOutputStream();
        CompressionOutputStream cout = c.createOutputStream(bout);
        ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
        CompressionInputStream cin = c.createInputStream(bin);
        int totaln = 0;
        while (cin.available() > 0) {
            int n =;
            if (n < 0)
            try {
                System.arraycopy(buffer, 0, outmsg, totaln, n);
            } catch (Throwable t) {
                System.err.println("n = " + n + " totaln " + totaln);
                throw new RuntimeException(t);
            totaln += n;
        assertEquals(inmsg.length, totaln);
        for (int i = 0; i < inmsg.length; i++) {
            assertEquals(inmsg[i], outmsg[i]);
        assertEquals(new String(inmsg), new String(outmsg, 0, totaln));
Also used : CompressionOutputStream( Configuration(org.apache.hadoop.conf.Configuration) ByteArrayInputStream( CompressionInputStream( ByteArrayOutputStream( Test(org.junit.Test)


CompressionOutputStream ( CompressionInputStream ( ByteArrayOutputStream ( Configuration (org.apache.hadoop.conf.Configuration)8 ByteArrayInputStream ( Test (org.junit.Test)7 BufferedOutputStream ( DataOutputStream ( IOException ( DataOutputBuffer ( Compressor ( BufferedInputStream ( DataInputStream ( DataInputBuffer ( Decompressor ( SnappyCodec ( File ( FileInputStream ( FileOutputStream ( BZip2Codec (