Search in sources :

Example 21 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project DataX by alibaba.

the class UnstructuredStorageReaderUtil method readFromStream.

public static void readFromStream(InputStream inputStream, String context, Configuration readerSliceConfig, RecordSender recordSender, TaskPluginCollector taskPluginCollector) {
    String compress = readerSliceConfig.getString(Key.COMPRESS, null);
    if (StringUtils.isBlank(compress)) {
        compress = null;
    }
    String encoding = readerSliceConfig.getString(Key.ENCODING, Constant.DEFAULT_ENCODING);
    // handle blank encoding
    if (StringUtils.isBlank(encoding)) {
        encoding = Constant.DEFAULT_ENCODING;
        LOG.warn(String.format("您配置的encoding为[%s], 使用默认值[%s]", encoding, Constant.DEFAULT_ENCODING));
    }
    List<Configuration> column = readerSliceConfig.getListConfiguration(Key.COLUMN);
    // handle ["*"] -> [], null
    if (null != column && 1 == column.size() && "\"*\"".equals(column.get(0).toString())) {
        readerSliceConfig.set(Key.COLUMN, null);
        column = null;
    }
    BufferedReader reader = null;
    int bufferSize = readerSliceConfig.getInt(Key.BUFFER_SIZE, Constant.DEFAULT_BUFFER_SIZE);
    // compress logic
    try {
        if (null == compress) {
            reader = new BufferedReader(new InputStreamReader(inputStream, encoding), bufferSize);
        } else {
            // TODO compress
            if ("lzo_deflate".equalsIgnoreCase(compress)) {
                LzoInputStream lzoInputStream = new LzoInputStream(inputStream, new LzoDecompressor1x_safe());
                reader = new BufferedReader(new InputStreamReader(lzoInputStream, encoding));
            } else if ("lzo".equalsIgnoreCase(compress)) {
                LzoInputStream lzopInputStream = new ExpandLzopInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(lzopInputStream, encoding));
            } else if ("gzip".equalsIgnoreCase(compress)) {
                CompressorInputStream compressorInputStream = new GzipCompressorInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
            } else if ("bzip2".equalsIgnoreCase(compress)) {
                CompressorInputStream compressorInputStream = new BZip2CompressorInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(compressorInputStream, encoding), bufferSize);
            } else if ("hadoop-snappy".equalsIgnoreCase(compress)) {
                CompressionCodec snappyCodec = new SnappyCodec();
                InputStream snappyInputStream = snappyCodec.createInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
            } else if ("framing-snappy".equalsIgnoreCase(compress)) {
                InputStream snappyInputStream = new SnappyFramedInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(snappyInputStream, encoding));
            } else /*else if ("xz".equalsIgnoreCase(compress)) {
					CompressorInputStream compressorInputStream = new XZCompressorInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							compressorInputStream, encoding));
				} else if ("ar".equalsIgnoreCase(compress)) {
					ArArchiveInputStream arArchiveInputStream = new ArArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							arArchiveInputStream, encoding));
				} else if ("arj".equalsIgnoreCase(compress)) {
					ArjArchiveInputStream arjArchiveInputStream = new ArjArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							arjArchiveInputStream, encoding));
				} else if ("cpio".equalsIgnoreCase(compress)) {
					CpioArchiveInputStream cpioArchiveInputStream = new CpioArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							cpioArchiveInputStream, encoding));
				} else if ("dump".equalsIgnoreCase(compress)) {
					DumpArchiveInputStream dumpArchiveInputStream = new DumpArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							dumpArchiveInputStream, encoding));
				} else if ("jar".equalsIgnoreCase(compress)) {
					JarArchiveInputStream jarArchiveInputStream = new JarArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							jarArchiveInputStream, encoding));
				} else if ("tar".equalsIgnoreCase(compress)) {
					TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream(
							inputStream);
					reader = new BufferedReader(new InputStreamReader(
							tarArchiveInputStream, encoding));
				}*/
            if ("zip".equalsIgnoreCase(compress)) {
                ZipCycleInputStream zipCycleInputStream = new ZipCycleInputStream(inputStream);
                reader = new BufferedReader(new InputStreamReader(zipCycleInputStream, encoding), bufferSize);
            } else {
                throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.ILLEGAL_VALUE, String.format("仅支持 gzip, bzip2, zip, lzo, lzo_deflate, hadoop-snappy, framing-snappy" + "文件压缩格式 , 不支持您配置的文件压缩格式: [%s]", compress));
            }
        }
        UnstructuredStorageReaderUtil.doReadFromStream(reader, context, readerSliceConfig, recordSender, taskPluginCollector);
    } catch (UnsupportedEncodingException uee) {
        throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.OPEN_FILE_WITH_CHARSET_ERROR, String.format("不支持的编码格式 : [%s]", encoding), uee);
    } catch (NullPointerException e) {
        throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.RUNTIME_EXCEPTION, "运行时错误, 请联系我们", e);
    }/* catch (ArchiveException e) {
			throw DataXException.asDataXException(
					UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR,
					String.format("压缩文件流读取错误 : [%s]", context), e);
		} */
     catch (IOException e) {
        throw DataXException.asDataXException(UnstructuredStorageReaderErrorCode.READ_FILE_IO_ERROR, String.format("流读取错误 : [%s]", context), e);
    } finally {
        IOUtils.closeQuietly(reader);
    }
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) Configuration(com.alibaba.datax.common.util.Configuration) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) SnappyFramedInputStream(io.airlift.compress.snappy.SnappyFramedInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) SnappyFramedInputStream(io.airlift.compress.snappy.SnappyFramedInputStream) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) SnappyCodec(io.airlift.compress.snappy.SnappyCodec)

Example 22 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project heron by twitter.

the class Extractor method extract.

static void extract(InputStream in, Path destination) throws IOException {
    try (final BufferedInputStream bufferedInputStream = new BufferedInputStream(in);
        final GzipCompressorInputStream gzipInputStream = new GzipCompressorInputStream(bufferedInputStream);
        final TarArchiveInputStream tarInputStream = new TarArchiveInputStream(gzipInputStream)) {
        final String destinationAbsolutePath = destination.toFile().getAbsolutePath();
        TarArchiveEntry entry;
        while ((entry = (TarArchiveEntry) tarInputStream.getNextEntry()) != null) {
            if (entry.isDirectory()) {
                File f = Paths.get(destinationAbsolutePath, entry.getName()).toFile();
                f.mkdirs();
            } else {
                Path fileDestinationPath = Paths.get(destinationAbsolutePath, entry.getName());
                Files.copy(tarInputStream, fileDestinationPath, StandardCopyOption.REPLACE_EXISTING);
            }
        }
    }
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) Path(java.nio.file.Path) BufferedInputStream(java.io.BufferedInputStream) File(java.io.File) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry)

Example 23 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project crate by crate.

the class SymbolicLinkPreservingUntarTransform method unpack.

public void unpack(File tarFile, File targetDir) throws IOException {
    Logging.getLogger(SymbolicLinkPreservingUntarTransform.class).info("Unpacking " + tarFile.getName() + " using " + SymbolicLinkPreservingUntarTransform.class.getSimpleName() + ".");
    TarArchiveInputStream tar = new TarArchiveInputStream(new GzipCompressorInputStream(new FileInputStream(tarFile)));
    final Path destinationPath = targetDir.toPath();
    TarArchiveEntry entry = tar.getNextTarEntry();
    while (entry != null) {
        final Path relativePath = UnpackTransform.trimArchiveExtractPath(entry.getName());
        if (relativePath == null) {
            entry = tar.getNextTarEntry();
            continue;
        }
        final Path destination = destinationPath.resolve(relativePath);
        final Path parent = destination.getParent();
        if (Files.exists(parent) == false) {
            Files.createDirectories(parent);
        }
        if (entry.isDirectory()) {
            Files.createDirectory(destination);
        } else if (entry.isSymbolicLink()) {
            Files.createSymbolicLink(destination, Paths.get(entry.getLinkName()));
        } else {
            // copy the file from the archive using a small buffer to avoid heaping
            Files.createFile(destination);
            try (FileOutputStream fos = new FileOutputStream(destination.toFile())) {
                tar.transferTo(fos);
            }
        }
        if (entry.isSymbolicLink() == false) {
            // check if the underlying file system supports POSIX permissions
            final PosixFileAttributeView view = Files.getFileAttributeView(destination, PosixFileAttributeView.class);
            if (view != null) {
                final Set<PosixFilePermission> permissions = PosixFilePermissions.fromString(permissions((entry.getMode() >> 6) & 07) + permissions((entry.getMode() >> 3) & 07) + permissions((entry.getMode() >> 0) & 07));
                Files.setPosixFilePermissions(destination, permissions);
            }
        }
        entry = tar.getNextTarEntry();
    }
}
Also used : TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) Path(java.nio.file.Path) FileOutputStream(java.io.FileOutputStream) PosixFilePermission(java.nio.file.attribute.PosixFilePermission) FileInputStream(java.io.FileInputStream) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) PosixFileAttributeView(java.nio.file.attribute.PosixFileAttributeView)

Example 24 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project zeppelin by apache.

the class HeliumBundleFactory method unTgz.

private static List<String> unTgz(File tarFile, File directory) throws IOException {
    List<String> result = new ArrayList<>();
    try (TarArchiveInputStream in = new TarArchiveInputStream(new GzipCompressorInputStream(new FileInputStream(tarFile)))) {
        TarArchiveEntry entry = in.getNextTarEntry();
        while (entry != null) {
            if (entry.isDirectory()) {
                entry = in.getNextTarEntry();
                continue;
            }
            File curfile = new File(directory, entry.getName());
            File parent = curfile.getParentFile();
            if (!parent.exists()) {
                parent.mkdirs();
            }
            try (OutputStream out = new FileOutputStream(curfile)) {
                IOUtils.copy(in, out);
            }
            result.add(entry.getName());
            entry = in.getNextTarEntry();
        }
    }
    return result;
}
Also used : TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) FileOutputStream(java.io.FileOutputStream) ArrayList(java.util.ArrayList) File(java.io.File) FileInputStream(java.io.FileInputStream) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry)

Example 25 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project zeppelin by apache.

the class TarUtils method decompress.

public static void decompress(String in, File out) throws IOException {
    FileInputStream fileInputStream = new FileInputStream(in);
    GzipCompressorInputStream gzipInputStream = new GzipCompressorInputStream(fileInputStream);
    try (TarArchiveInputStream fin = new TarArchiveInputStream(gzipInputStream)) {
        TarArchiveEntry entry;
        while ((entry = fin.getNextTarEntry()) != null) {
            if (entry.isDirectory()) {
                continue;
            }
            File curfile = new File(out, entry.getName());
            File parent = curfile.getParentFile();
            if (!parent.exists()) {
                parent.mkdirs();
            }
            IOUtils.copy(fin, new FileOutputStream(curfile));
        }
    }
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) FileOutputStream(java.io.FileOutputStream) File(java.io.File) FileInputStream(java.io.FileInputStream) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry)

Aggregations

GzipCompressorInputStream (org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream)58 TarArchiveInputStream (org.apache.commons.compress.archivers.tar.TarArchiveInputStream)46 TarArchiveEntry (org.apache.commons.compress.archivers.tar.TarArchiveEntry)40 IOException (java.io.IOException)29 FileInputStream (java.io.FileInputStream)26 File (java.io.File)23 BufferedInputStream (java.io.BufferedInputStream)22 FileOutputStream (java.io.FileOutputStream)20 InputStream (java.io.InputStream)16 OutputStream (java.io.OutputStream)10 Path (java.nio.file.Path)9 ArrayList (java.util.ArrayList)8 BufferedOutputStream (java.io.BufferedOutputStream)7 ByteArrayInputStream (java.io.ByteArrayInputStream)7 BZip2CompressorInputStream (org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream)6 BufferedReader (java.io.BufferedReader)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 InputStreamReader (java.io.InputStreamReader)4 URL (java.net.URL)4 ArchiveEntry (org.apache.commons.compress.archivers.ArchiveEntry)4