use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project hadoop by apache.
the class TestLineRecordReader method readRecordsDirectly.
// Gather the records by just splitting on new lines
public String[] readRecordsDirectly(URL testFileUrl, boolean bzip) throws IOException {
int MAX_DATA_SIZE = 1024 * 1024;
byte[] data = new byte[MAX_DATA_SIZE];
FileInputStream fis = new FileInputStream(testFileUrl.getFile());
int count;
if (bzip) {
BZip2CompressorInputStream bzIn = new BZip2CompressorInputStream(fis);
count = bzIn.read(data);
bzIn.close();
} else {
count = fis.read(data);
}
fis.close();
assertTrue("Test file data too big for buffer", count < data.length);
return new String(data, 0, count, "UTF-8").split("\n");
}
use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project DataX by alibaba.
the class UnstructuredStorageReaderUtil method readFromStream.
public static void readFromStream(InputStream inputStream, String context, Configuration readerSliceConfig, RecordSender recordSender, TaskPluginCollector taskPluginCollector) {
String compress = readerSliceConfig.getString(Key.COMPRESS, null);
if (StringUtils.isBlank(compress)) {
compress = null;
}
String encoding = readerSliceConfig.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
// handle blank encoding
if (StringUtils.isBlank(encoding)) {
encoding = Constant.DEFAULT_ENCODING;
LOG.warn(String.format("您配置的encoding为[%s], 使用默认值[%s]", encoding, Constant.DEFAULT_ENCODING));
}
List<Configuration> column = readerSliceConfig.getListConfiguration(Key.COLUMN);
// handle ["*"] -> [], null
if (null != column && 1 == column.size() && "\"*\"".equals(column.get(0).toString())) {
readerSliceConfig.set(Key.COLUMN, null);
column = null;
}
BufferedReader reader = null;
int bufferSize = readerSliceConfig.getInt(Key.BUFFER_SIZE, Constant.DEFAULT_BUFFER_SIZE);
// compress logic
try {
if (null == compress) {
reader = new BufferedReader(new InputStreamReader(inputStream, encoding), bufferSize);
} else {
// TODO compress
if ("lzo_deflate".equalsIgnoreCase(compress)) {
LzoInputStream lzoInputStream = new LzoInputStream(inputStream, new LzoDecompressor1x_safe());
reader = new BufferedReader(new InputStreamReader(lzoInputStream, encoding));
} else if ("lzo".equalsIgnoreCase(compress)) {
LzoInputStream lzopInputStream = new ExpandLzopInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(lzopInputStream, encoding));
} else if ("gzip".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new GzipCompressorInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
} else if ("bzip2".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new BZip2CompressorInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
} else if ("hadoop-snappy".equalsIgnoreCase(compress)) {
CompressionCodec snappyCodec = new SnappyCodec();
InputStream snappyInputStream = snappyCodec.createInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
} else if ("framing-snappy".equalsIgnoreCase(compress)) {
InputStream snappyInputStream = new SnappyFramedInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
} else /*else if ("xz".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new XZCompressorInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
compressorInputStream, encoding));
} else if ("ar".equalsIgnoreCase(compress)) {
ArArchiveInputStream arArchiveInputStream = new ArArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
arArchiveInputStream, encoding));
} else if ("arj".equalsIgnoreCase(compress)) {
ArjArchiveInputStream arjArchiveInputStream = new ArjArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
arjArchiveInputStream, encoding));
} else if ("cpio".equalsIgnoreCase(compress)) {
CpioArchiveInputStream cpioArchiveInputStream = new CpioArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
cpioArchiveInputStream, encoding));
} else if ("dump".equalsIgnoreCase(compress)) {
DumpArchiveInputStream dumpArchiveInputStream = new DumpArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
dumpArchiveInputStream, encoding));
} else if ("jar".equalsIgnoreCase(compress)) {
JarArchiveInputStream jarArchiveInputStream = new JarArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
jarArchiveInputStream, encoding));
} else if ("tar".equalsIgnoreCase(compress)) {
TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
tarArchiveInputStream, encoding));
}*/
if ("zip".equalsIgnoreCase(compress)) {
ZipCycleInputStream zipCycleInputStream = new ZipCycleInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(zipCycleInputStream, encoding), bufferSize);
} else {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.ILLEGAL_VALUE, String.format("仅支持 gzip, bzip2, zip, lzo, lzo_deflate, hadoop-snappy, framing-snappy" + "文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", compress));
}
}
UnstructuredStorageReaderUtil.doReadFromStream(reader, context, readerSliceConfig, recordSender, taskPluginCollector);
} catch (UnsupportedEncodingException uee) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.OPEN_FILE_WITH_CHARSET_ERROR, String.format("不支持的编码格式 : [%s]", encoding), uee);
} catch (NullPointerException e) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.RUNTIME_EXCEPTION, "运行时错误, 请联系我们", e);
}/* catch (ArchiveException e) {
throw DataXException.asDataXException(
UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR,
String.format("压缩文件流读取错误 : [%s]", context), e);
} */
catch (IOException e) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, String.format("流读取错误 : [%s]", context), e);
} finally {
IOUtils.closeQuietly(reader);
}
}
use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project otter by alibaba.
the class BZip2Compressor method decompressTo.
public void decompressTo(InputStream in, OutputStream out) throws CompressException {
BZip2CompressorInputStream inputStream = null;
try {
inputStream = new BZip2CompressorInputStream(in);
NioUtils.copy(inputStream, out);
} catch (Exception e) {
throw new CompressException("bzip_decompress_error", e);
}
}
use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project rest.li by linkedin.
the class Bzip2Compressor method inflate.
@Override
public byte[] inflate(InputStream data) throws CompressionException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
BZip2CompressorInputStream bzip2 = null;
try {
bzip2 = new BZip2CompressorInputStream(data);
IOUtils.copy(bzip2, out);
} catch (IOException e) {
throw new CompressionException(CompressionConstants.DECODING_ERROR + getContentEncodingName(), e);
} finally {
if (bzip2 != null) {
IOUtils.closeQuietly(bzip2);
}
}
return out.toByteArray();
}
use of org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream in project gephi by gephi.
the class DesktopImportControllerUI method getBzipFile.
/**
* Uncompress a Bzip2 file.
*/
private static File getBzipFile(FileObject in, File out, boolean isTar) throws IOException {
// Stream buffer
final int BUFF_SIZE = 8192;
final byte[] buffer = new byte[BUFF_SIZE];
BZip2CompressorInputStream inputStream = null;
FileOutputStream outStream = null;
try {
FileInputStream is = new FileInputStream(in.getPath());
inputStream = new BZip2CompressorInputStream(is);
outStream = new FileOutputStream(out.getAbsolutePath());
if (isTar) {
// Read Tar header
int remainingBytes = readTarHeader(inputStream);
// Read content
ByteBuffer bb = ByteBuffer.allocateDirect(4 * BUFF_SIZE);
byte[] tmpCache = new byte[BUFF_SIZE];
int nRead, nGet;
while ((nRead = inputStream.read(tmpCache)) != -1) {
if (nRead == 0) {
continue;
}
bb.put(tmpCache);
bb.position(0);
bb.limit(nRead);
while (bb.hasRemaining() && remainingBytes > 0) {
nGet = Math.min(bb.remaining(), BUFF_SIZE);
nGet = Math.min(nGet, remainingBytes);
bb.get(buffer, 0, nGet);
outStream.write(buffer, 0, nGet);
remainingBytes -= nGet;
}
bb.clear();
}
} else {
int len;
while ((len = inputStream.read(buffer)) > 0) {
outStream.write(buffer, 0, len);
}
}
} catch (IOException ex) {
Exceptions.printStackTrace(ex);
} finally {
if (inputStream != null) {
inputStream.close();
}
if (outStream != null) {
outStream.close();
}
}
return out;
}
Aggregations