Search in sources :

Example 1 with BZip2CompressorInputStream

use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project hadoop by apache.

the class TestLineRecordReader method readRecordsDirectly.

// Gather the records by just splitting on new lines
public String[] readRecordsDirectly(URL testFileUrl, boolean bzip) throws IOException {
    int MAX_DATA_SIZE = 1024 * 1024;
    byte[] data = new byte[MAX_DATA_SIZE];
    FileInputStream fis = new FileInputStream(testFileUrl.getFile());
    int count;
    if (bzip) {
        BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(fis);
        count = bzIn.read(data);
        bzIn.close();
    } else {
        count = fis.read(data);
    }
    fis.close();
    assertTrue("Test file data too big for buffer", count < data.length);
    return new String(data, 0, count, "UTF-8").split("\n");
}
Also used : BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) FileInputStream(java.io.FileInputStream)

Example 2 with BZip2CompressorInputStream

use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project DataX by alibaba.

the class UnstructuredStorageReaderUtil method readFromStream.

public static void readFromStream(InputStream inputStream, String context, Configuration readerSliceConfig, RecordSender recordSender, TaskPluginCollector taskPluginCollector) {
    String compress = readerSliceConfig.getString(Key.COMPRESS, null);
    if (StringUtils.isBlank(compress)) {
        compress = null;
    }
    String encoding = readerSliceConfig.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
    // handle blank encoding
    if (StringUtils.isBlank(encoding)) {
        encoding = Constant.DEFAULT_ENCODING;
        LOG.warn(String.format("您配置的encoding为[%s], 使用默认值[%s]", encoding, Constant.DEFAULT_ENCODING));
    }
    List<Configuration> column = readerSliceConfig.getListConfiguration(Key.COLUMN);
    // handle ["*"] -> [], null
    if (null != column && 1 == column.size() && "\"*\"".equals(column.get(0).toString())) {
        readerSliceConfig.set(Key.COLUMN, null);
        column = null;
    }
    BufferedReader reader = null;
    int bufferSize = readerSliceConfig.getInt(Key.BUFFER_SIZE, Constant.DEFAULT_BUFFER_SIZE);
    // compress logic
    try {
        if (null == compress) {
            reader = new BufferedReader(new InputStreamReader(inputStream, encoding), bufferSize);
        } else {
            // TODO compress
            if ("lzo_deflate".equalsIgnoreCase(compress)) {
                LzoInputStream lzoInputStream = new LzoInputStream(inputStream, new LzoDecompressor1x_safe());
                reader = new BufferedReader(new InputStreamReader(lzoInputStream, encoding));
            } else if ("lzo".equalsIgnoreCase(compress)) {
                LzoInputStream lzopInputStream = new ExpandLzopInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(lzopInputStream, encoding));
            } else if ("gzip".equalsIgnoreCase(compress)) {
                CompressorInputStream compressorInputStream = new GzipCompressorInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
            } else if ("bzip2".equalsIgnoreCase(compress)) {
                CompressorInputStream compressorInputStream = new BZip2CompressorInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
            } else if ("hadoop-snappy".equalsIgnoreCase(compress)) {
                CompressionCodec snappyCodec = new SnappyCodec();
                InputStream snappyInputStream = snappyCodec.createInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
            } else if ("framing-snappy".equalsIgnoreCase(compress)) {
                InputStream snappyInputStream = new SnappyFramedInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
            } else /*else if ("xz".equalsIgnoreCase(compress)) {
					CompressorInputStream compressorInputStream = new XZCompressorInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							compressorInputStream, encoding));
				} else if ("ar".equalsIgnoreCase(compress)) {
					ArArchiveInputStream arArchiveInputStream = new ArArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							arArchiveInputStream, encoding));
				} else if ("arj".equalsIgnoreCase(compress)) {
					ArjArchiveInputStream arjArchiveInputStream = new ArjArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							arjArchiveInputStream, encoding));
				} else if ("cpio".equalsIgnoreCase(compress)) {
					CpioArchiveInputStream cpioArchiveInputStream = new CpioArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							cpioArchiveInputStream, encoding));
				} else if ("dump".equalsIgnoreCase(compress)) {
					DumpArchiveInputStream dumpArchiveInputStream = new DumpArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							dumpArchiveInputStream, encoding));
				} else if ("jar".equalsIgnoreCase(compress)) {
					JarArchiveInputStream jarArchiveInputStream = new JarArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							jarArchiveInputStream, encoding));
				} else if ("tar".equalsIgnoreCase(compress)) {
					TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							tarArchiveInputStream, encoding));
				}*/
            if ("zip".equalsIgnoreCase(compress)) {
                ZipCycleInputStream zipCycleInputStream = new ZipCycleInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(zipCycleInputStream, encoding), bufferSize);
            } else {
                throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.ILLEGAL_VALUE, String.format("仅支持 gzip, bzip2, zip, lzo, lzo_deflate, hadoop-snappy, framing-snappy" + "文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", compress));
            }
        }
        UnstructuredStorageReaderUtil.doReadFromStream(reader, context, readerSliceConfig, recordSender, taskPluginCollector);
    } catch (UnsupportedEncodingException uee) {
        throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.OPEN_FILE_WITH_CHARSET_ERROR, String.format("不支持的编码格式 : [%s]", encoding), uee);
    } catch (NullPointerException e) {
        throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.RUNTIME_EXCEPTION, "运行时错误, 请联系我们", e);
    }/* catch (ArchiveException e) {
			throw DataXException.asDataXException(
					UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR,
					String.format("压缩文件流读取错误 : [%s]", context), e);
		} */
     catch (IOException e) {
        throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, String.format("流读取错误 : [%s]", context), e);
    } finally {
        IOUtils.closeQuietly(reader);
    }
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) Configuration(com.alibaba.datax.common.util.Configuration) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) SnappyFramedInputStream(io.airlift.compress.snappy.SnappyFramedInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) SnappyFramedInputStream(io.airlift.compress.snappy.SnappyFramedInputStream) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) SnappyCodec(io.airlift.compress.snappy.SnappyCodec)

Example 3 with BZip2CompressorInputStream

use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project otter by alibaba.

the class BZip2Compressor method decompressTo.

public void decompressTo(InputStream in, OutputStream out) throws CompressException {
    BZip2CompressorInputStream inputStream = null;
    try {
        inputStream = new BZip2CompressorInputStream(in);
        NioUtils.copy(inputStream, out);
    } catch (Exception e) {
        throw new CompressException("bzip_decompress_error", e);
    }
}
Also used : BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) CompressException(com.alibaba.otter.node.etl.common.io.compress.exception.CompressException) CompressException(com.alibaba.otter.node.etl.common.io.compress.exception.CompressException)

Example 4 with BZip2CompressorInputStream

use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project rest.li by linkedin.

the class Bzip2Compressor method inflate.

@Override
public byte[] inflate(InputStream data) throws CompressionException {
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    BZip2CompressorInputStream bzip2 = null;
    try {
        bzip2 = new BZip2CompressorInputStream(data);
        IOUtils.copy(bzip2, out);
    } catch (IOException e) {
        throw new CompressionException(CompressionConstants.DECODING_ERROR + getContentEncodingName(), e);
    } finally {
        if (bzip2 != null) {
            IOUtils.closeQuietly(bzip2);
        }
    }
    return out.toByteArray();
}
Also used : BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException)

Example 5 with BZip2CompressorInputStream

use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project gephi by gephi.

the class DesktopImportControllerUI method getBzipFile.

/**
     * Uncompress a Bzip2 file.
     */
private static File getBzipFile(FileObject in, File out, boolean isTar) throws IOException {
    // Stream buffer
    final int BUFF_SIZE = 8192;
    final byte[] buffer = new byte[BUFF_SIZE];
    BZip2CompressorInputStream inputStream = null;
    FileOutputStream outStream = null;
    try {
        FileInputStream is = new FileInputStream(in.getPath());
        inputStream = new BZip2CompressorInputStream(is);
        outStream = new FileOutputStream(out.getAbsolutePath());
        if (isTar) {
            // Read Tar header
            int remainingBytes = readTarHeader(inputStream);
            // Read content
            ByteBuffer bb = ByteBuffer.allocateDirect(4 * BUFF_SIZE);
            byte[] tmpCache = new byte[BUFF_SIZE];
            int nRead, nGet;
            while ((nRead = inputStream.read(tmpCache)) != -1) {
                if (nRead == 0) {
                    continue;
                }
                bb.put(tmpCache);
                bb.position(0);
                bb.limit(nRead);
                while (bb.hasRemaining() && remainingBytes > 0) {
                    nGet = Math.min(bb.remaining(), BUFF_SIZE);
                    nGet = Math.min(nGet, remainingBytes);
                    bb.get(buffer, 0, nGet);
                    outStream.write(buffer, 0, nGet);
                    remainingBytes -= nGet;
                }
                bb.clear();
            }
        } else {
            int len;
            while ((len = inputStream.read(buffer)) > 0) {
                outStream.write(buffer, 0, len);
            }
        }
    } catch (IOException ex) {
        Exceptions.printStackTrace(ex);
    } finally {
        if (inputStream != null) {
            inputStream.close();
        }
        if (outStream != null) {
            outStream.close();
        }
    }
    return out;
}
Also used : BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer) FileInputStream(java.io.FileInputStream)

Aggregations

BZip2CompressorInputStream (org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream)19 FileInputStream (java.io.FileInputStream)11 IOException (java.io.IOException)6 InputStream (java.io.InputStream)6 GZIPInputStream (java.util.zip.GZIPInputStream)6 File (java.io.File)4 FileOutputStream (java.io.FileOutputStream)4 BufferedReader (java.io.BufferedReader)3 InputStreamReader (java.io.InputStreamReader)3 ZipArchiveInputStream (org.apache.commons.compress.archivers.zip.ZipArchiveInputStream)3 GzipCompressorInputStream (org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream)3 BufferedInputStream (java.io.BufferedInputStream)2 ByteBuffer (java.nio.ByteBuffer)2 ArchiveInputStream (org.apache.commons.compress.archivers.ArchiveInputStream)2 TarArchiveInputStream (org.apache.commons.compress.archivers.tar.TarArchiveInputStream)2 CompressorInputStream (org.apache.commons.compress.compressors.CompressorInputStream)2 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)2 Test (org.junit.Test)2 Configuration (com.alibaba.datax.common.util.Configuration)1 CompressException (com.alibaba.otter.node.etl.common.io.compress.exception.CompressException)1