use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project DataX by alibaba.
the class UnstructuredStorageReaderUtil method readFromStream.
public static void readFromStream(InputStream inputStream, String context, Configuration readerSliceConfig, RecordSender recordSender, TaskPluginCollector taskPluginCollector) {
String compress = readerSliceConfig.getString(Key.COMPRESS, null);
if (StringUtils.isBlank(compress)) {
compress = null;
}
String encoding = readerSliceConfig.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
// handle blank encoding
if (StringUtils.isBlank(encoding)) {
encoding = Constant.DEFAULT_ENCODING;
LOG.warn(String.format("您配置的encoding为[%s], 使用默认值[%s]", encoding, Constant.DEFAULT_ENCODING));
}
List<Configuration> column = readerSliceConfig.getListConfiguration(Key.COLUMN);
// handle ["*"] -> [], null
if (null != column && 1 == column.size() && "\"*\"".equals(column.get(0).toString())) {
readerSliceConfig.set(Key.COLUMN, null);
column = null;
}
BufferedReader reader = null;
int bufferSize = readerSliceConfig.getInt(Key.BUFFER_SIZE, Constant.DEFAULT_BUFFER_SIZE);
// compress logic
try {
if (null == compress) {
reader = new BufferedReader(new InputStreamReader(inputStream, encoding), bufferSize);
} else {
// TODO compress
if ("lzo_deflate".equalsIgnoreCase(compress)) {
LzoInputStream lzoInputStream = new LzoInputStream(inputStream, new LzoDecompressor1x_safe());
reader = new BufferedReader(new InputStreamReader(lzoInputStream, encoding));
} else if ("lzo".equalsIgnoreCase(compress)) {
LzoInputStream lzopInputStream = new ExpandLzopInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(lzopInputStream, encoding));
} else if ("gzip".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new GzipCompressorInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
} else if ("bzip2".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new BZip2CompressorInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
} else if ("hadoop-snappy".equalsIgnoreCase(compress)) {
CompressionCodec snappyCodec = new SnappyCodec();
InputStream snappyInputStream = snappyCodec.createInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
} else if ("framing-snappy".equalsIgnoreCase(compress)) {
InputStream snappyInputStream = new SnappyFramedInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
} else /*else if ("xz".equalsIgnoreCase(compress)) {
CompressorInputStream compressorInputStream = new XZCompressorInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
compressorInputStream, encoding));
} else if ("ar".equalsIgnoreCase(compress)) {
ArArchiveInputStream arArchiveInputStream = new ArArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
arArchiveInputStream, encoding));
} else if ("arj".equalsIgnoreCase(compress)) {
ArjArchiveInputStream arjArchiveInputStream = new ArjArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
arjArchiveInputStream, encoding));
} else if ("cpio".equalsIgnoreCase(compress)) {
CpioArchiveInputStream cpioArchiveInputStream = new CpioArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
cpioArchiveInputStream, encoding));
} else if ("dump".equalsIgnoreCase(compress)) {
DumpArchiveInputStream dumpArchiveInputStream = new DumpArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
dumpArchiveInputStream, encoding));
} else if ("jar".equalsIgnoreCase(compress)) {
JarArchiveInputStream jarArchiveInputStream = new JarArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
jarArchiveInputStream, encoding));
} else if ("tar".equalsIgnoreCase(compress)) {
TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream(
inputStream);
reader = new BufferedReader(new InputStreamReader(
tarArchiveInputStream, encoding));
}*/
if ("zip".equalsIgnoreCase(compress)) {
ZipCycleInputStream zipCycleInputStream = new ZipCycleInputStream(inputStream);
reader = new BufferedReader(new InputStreamReader(zipCycleInputStream, encoding), bufferSize);
} else {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.ILLEGAL_VALUE, String.format("仅支持 gzip, bzip2, zip, lzo, lzo_deflate, hadoop-snappy, framing-snappy" + "文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", compress));
}
}
UnstructuredStorageReaderUtil.doReadFromStream(reader, context, readerSliceConfig, recordSender, taskPluginCollector);
} catch (UnsupportedEncodingException uee) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.OPEN_FILE_WITH_CHARSET_ERROR, String.format("不支持的编码格式 : [%s]", encoding), uee);
} catch (NullPointerException e) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.RUNTIME_EXCEPTION, "运行时错误, 请联系我们", e);
}/* catch (ArchiveException e) {
throw DataXException.asDataXException(
UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR,
String.format("压缩文件流读取错误 : [%s]", context), e);
} */
catch (IOException e) {
throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, String.format("流读取错误 : [%s]", context), e);
} finally {
IOUtils.closeQuietly(reader);
}
}
use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project heron by twitter.
the class Extractor method extract.
static void extract(InputStream in, Path destination) throws IOException {
try (final BufferedInputStream bufferedInputStream = new BufferedInputStream(in);
final GzipCompressorInputStream gzipInputStream = new GzipCompressorInputStream(bufferedInputStream);
final TarArchiveInputStream tarInputStream = new TarArchiveInputStream(gzipInputStream)) {
final String destinationAbsolutePath = destination.toFile().getAbsolutePath();
TarArchiveEntry entry;
while ((entry = (TarArchiveEntry) tarInputStream.getNextEntry()) != null) {
if (entry.isDirectory()) {
File f = Paths.get(destinationAbsolutePath, entry.getName()).toFile();
f.mkdirs();
} else {
Path fileDestinationPath = Paths.get(destinationAbsolutePath, entry.getName());
Files.copy(tarInputStream, fileDestinationPath, StandardCopyOption.REPLACE_EXISTING);
}
}
}
}
use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project crate by crate.
the class SymbolicLinkPreservingUntarTransform method unpack.
public void unpack(File tarFile, File targetDir) throws IOException {
Logging.getLogger(SymbolicLinkPreservingUntarTransform.class).info("Unpacking " + tarFile.getName() + " using " + SymbolicLinkPreservingUntarTransform.class.getSimpleName() + ".");
TarArchiveInputStream tar = new TarArchiveInputStream(new GzipCompressorInputStream(new FileInputStream(tarFile)));
final Path destinationPath = targetDir.toPath();
TarArchiveEntry entry = tar.getNextTarEntry();
while (entry != null) {
final Path relativePath = UnpackTransform.trimArchiveExtractPath(entry.getName());
if (relativePath == null) {
entry = tar.getNextTarEntry();
continue;
}
final Path destination = destinationPath.resolve(relativePath);
final Path parent = destination.getParent();
if (Files.exists(parent) == false) {
Files.createDirectories(parent);
}
if (entry.isDirectory()) {
Files.createDirectory(destination);
} else if (entry.isSymbolicLink()) {
Files.createSymbolicLink(destination, Paths.get(entry.getLinkName()));
} else {
// copy the file from the archive using a small buffer to avoid heaping
Files.createFile(destination);
try (FileOutputStream fos = new FileOutputStream(destination.toFile())) {
tar.transferTo(fos);
}
}
if (entry.isSymbolicLink() == false) {
// check if the underlying file system supports POSIX permissions
final PosixFileAttributeView view = Files.getFileAttributeView(destination, PosixFileAttributeView.class);
if (view != null) {
final Set<PosixFilePermission> permissions = PosixFilePermissions.fromString(permissions((entry.getMode() >> 6) & 07) + permissions((entry.getMode() >> 3) & 07) + permissions((entry.getMode() >> 0) & 07));
Files.setPosixFilePermissions(destination, permissions);
}
}
entry = tar.getNextTarEntry();
}
}
use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project zeppelin by apache.
the class HeliumBundleFactory method unTgz.
private static List<String> unTgz(File tarFile, File directory) throws IOException {
List<String> result = new ArrayList<>();
try (TarArchiveInputStream in = new TarArchiveInputStream(new GzipCompressorInputStream(new FileInputStream(tarFile)))) {
TarArchiveEntry entry = in.getNextTarEntry();
while (entry != null) {
if (entry.isDirectory()) {
entry = in.getNextTarEntry();
continue;
}
File curfile = new File(directory, entry.getName());
File parent = curfile.getParentFile();
if (!parent.exists()) {
parent.mkdirs();
}
try (OutputStream out = new FileOutputStream(curfile)) {
IOUtils.copy(in, out);
}
result.add(entry.getName());
entry = in.getNextTarEntry();
}
}
return result;
}
use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project zeppelin by apache.
the class TarUtils method decompress.
public static void decompress(String in, File out) throws IOException {
FileInputStream fileInputStream = new FileInputStream(in);
GzipCompressorInputStream gzipInputStream = new GzipCompressorInputStream(fileInputStream);
try (TarArchiveInputStream fin = new TarArchiveInputStream(gzipInputStream)) {
TarArchiveEntry entry;
while ((entry = fin.getNextTarEntry()) != null) {
if (entry.isDirectory()) {
continue;
}
File curfile = new File(out, entry.getName());
File parent = curfile.getParentFile();
if (!parent.exists()) {
parent.mkdirs();
}
IOUtils.copy(fin, new FileOutputStream(curfile));
}
}
}
Aggregations