Search in sources :

Example 26 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project hive by apache.

the class CompressionUtils method unTar.

/**
 * Untar an input file into an output file.
 *
 * The output file is created in the output folder, having the same name as the input file, minus
 * the '.tar' extension.
 *
 * @param inputFileName the input .tar file
 * @param outputDirName the output directory file.
 * @throws IOException
 * @throws FileNotFoundException
 *
 * @return The {@link List} of {@link File}s with the untared content.
 * @throws ArchiveException
 */
public static List<File> unTar(final String inputFileName, final String outputDirName, boolean flatten) throws FileNotFoundException, IOException, ArchiveException {
    File inputFile = new File(inputFileName);
    File outputDir = new File(outputDirName);
    final List<File> untaredFiles = new LinkedList<File>();
    InputStream is = null;
    try {
        if (inputFileName.endsWith(".gz")) {
            is = new GzipCompressorInputStream(new FileInputStream(inputFile));
        } else {
            is = new FileInputStream(inputFile);
        }
        final TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory().createArchiveInputStream("tar", is);
        TarArchiveEntry entry = null;
        while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) {
            final File outputFile = new File(outputDir, entry.getName());
            if (!outputFile.toPath().toAbsolutePath().normalize().startsWith(outputDir.toPath().toAbsolutePath().normalize())) {
                throw new IOException("Untarred file is not under the output directory");
            }
            if (entry.isDirectory()) {
                if (flatten) {
                    // no sub-directories
                    continue;
                }
                LOG.debug(String.format("Attempting to write output directory %s.", outputFile.getAbsolutePath()));
                if (!outputFile.exists()) {
                    LOG.debug(String.format("Attempting to create output directory %s.", outputFile.getAbsolutePath()));
                    if (!outputFile.mkdirs()) {
                        throw new IllegalStateException(String.format("Couldn't create directory %s.", outputFile.getAbsolutePath()));
                    }
                }
            } else {
                final OutputStream outputFileStream;
                if (flatten) {
                    File flatOutputFile = new File(outputDir, outputFile.getName());
                    LOG.debug(String.format("Creating flat output file %s.", flatOutputFile.getAbsolutePath()));
                    outputFileStream = new FileOutputStream(flatOutputFile);
                } else if (!outputFile.getParentFile().exists()) {
                    LOG.debug(String.format("Attempting to create output directory %s.", outputFile.getParentFile().getAbsoluteFile()));
                    if (!outputFile.getParentFile().getAbsoluteFile().mkdirs()) {
                        throw new IllegalStateException(String.format("Couldn't create directory %s.", outputFile.getParentFile().getAbsolutePath()));
                    }
                    LOG.debug(String.format("Creating output file %s.", outputFile.getAbsolutePath()));
                    outputFileStream = new FileOutputStream(outputFile);
                } else {
                    outputFileStream = new FileOutputStream(outputFile);
                }
                IOUtils.copy(debInputStream, outputFileStream);
                outputFileStream.close();
            }
            untaredFiles.add(outputFile);
        }
        debInputStream.close();
        return untaredFiles;
    } finally {
        if (is != null)
            is.close();
    }
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) FileInputStream(java.io.FileInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) ZipOutputStream(org.apache.tools.zip.ZipOutputStream) FileOutputStream(java.io.FileOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) TarArchiveOutputStream(org.apache.commons.compress.archivers.tar.TarArchiveOutputStream) GzipCompressorOutputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream) IOException(java.io.IOException) LinkedList(java.util.LinkedList) FileInputStream(java.io.FileInputStream) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ArchiveStreamFactory(org.apache.commons.compress.archivers.ArchiveStreamFactory) FileOutputStream(java.io.FileOutputStream) File(java.io.File)

Example 27 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project uPortal by Jasig.

the class JaxbPortalDataHandlerService method importDataArchive.

private void importDataArchive(Resource archive, InputStream resourceStream, BatchImportOptions options) {
    BufferedInputStream bufferedResourceStream = null;
    try {
        // Make sure the stream is buffered
        if (resourceStream instanceof BufferedInputStream) {
            bufferedResourceStream = (BufferedInputStream) resourceStream;
        } else {
            bufferedResourceStream = new BufferedInputStream(resourceStream);
        }
        // Buffer up to 100MB, bad things will happen if we bust this buffer.
        // TODO see if there is a buffered stream that will write to a file once the buffer
        // fills up
        bufferedResourceStream.mark(100 * 1024 * 1024);
        final MediaType type = getMediaType(bufferedResourceStream, archive.getFilename());
        if (MT_JAVA_ARCHIVE.equals(type)) {
            final ArchiveInputStream archiveStream = new JarArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MediaType.APPLICATION_ZIP.equals(type)) {
            final ArchiveInputStream archiveStream = new ZipArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_CPIO.equals(type)) {
            final ArchiveInputStream archiveStream = new CpioArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_AR.equals(type)) {
            final ArchiveInputStream archiveStream = new ArArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_TAR.equals(type)) {
            final ArchiveInputStream archiveStream = new TarArchiveInputStream(bufferedResourceStream);
            importDataArchive(archive, archiveStream, options);
        } else if (MT_BZIP2.equals(type)) {
            final CompressorInputStream compressedStream = new BZip2CompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else if (MT_GZIP.equals(type)) {
            final CompressorInputStream compressedStream = new GzipCompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else if (MT_PACK200.equals(type)) {
            final CompressorInputStream compressedStream = new Pack200CompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else if (MT_XZ.equals(type)) {
            final CompressorInputStream compressedStream = new XZCompressorInputStream(bufferedResourceStream);
            importDataArchive(archive, compressedStream, options);
        } else {
            throw new RuntimeException("Unrecognized archive media type: " + type);
        }
    } catch (IOException e) {
        throw new RuntimeException("Could not load InputStream for resource: " + archive, e);
    } finally {
        IOUtils.closeQuietly(bufferedResourceStream);
    }
}
Also used : JarArchiveInputStream(org.apache.commons.compress.archivers.jar.JarArchiveInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) ArArchiveInputStream(org.apache.commons.compress.archivers.ar.ArArchiveInputStream) CompressorInputStream(org.apache.commons.compress.compressors.CompressorInputStream) XZCompressorInputStream(org.apache.commons.compress.compressors.xz.XZCompressorInputStream) Pack200CompressorInputStream(org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) IOException(java.io.IOException) Pack200CompressorInputStream(org.apache.commons.compress.compressors.pack200.Pack200CompressorInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) JarArchiveInputStream(org.apache.commons.compress.archivers.jar.JarArchiveInputStream) ArchiveInputStream(org.apache.commons.compress.archivers.ArchiveInputStream) CpioArchiveInputStream(org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream) ArArchiveInputStream(org.apache.commons.compress.archivers.ar.ArArchiveInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) BufferedInputStream(java.io.BufferedInputStream) MediaType(org.apache.tika.mime.MediaType) CpioArchiveInputStream(org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream) XZCompressorInputStream(org.apache.commons.compress.compressors.xz.XZCompressorInputStream)

Example 28 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project MSEC by Tencent.

the class GzipUtil method unzip.

public static void unzip(String srcFile) throws Exception {
    GzipCompressorInputStream in = new GzipCompressorInputStream(new FileInputStream(srcFile));
    int index = srcFile.indexOf(".gz");
    String destFile = "";
    if (index == srcFile.length() - 3) {
        destFile = srcFile.substring(0, index);
    } else {
        destFile = srcFile + ".decompress";
    }
    FileOutputStream out = new FileOutputStream(destFile);
    byte[] buf = new byte[10240];
    while (true) {
        int len = in.read(buf);
        if (len <= 0) {
            break;
        }
        out.write(buf, 0, len);
    }
    out.flush();
    out.close();
    in.close();
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) FileOutputStream(java.io.FileOutputStream) FileInputStream(java.io.FileInputStream)

Example 29 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project ats-framework by Axway.

the class LocalFileSystemOperations method extractTarGZip.

private void extractTarGZip(String tarGzipFilePath, String outputDirPath) {
    TarArchiveEntry entry = null;
    try (TarArchiveInputStream tis = new TarArchiveInputStream(new GzipCompressorInputStream(new FileInputStream(tarGzipFilePath)))) {
        while ((entry = (TarArchiveEntry) tis.getNextEntry()) != null) {
            if (log.isDebugEnabled()) {
                log.debug("Extracting " + entry.getName());
            }
            File entryDestination = new File(outputDirPath, entry.getName());
            if (entry.isDirectory()) {
                entryDestination.mkdirs();
            } else {
                entryDestination.getParentFile().mkdirs();
                OutputStream out = new BufferedOutputStream(new FileOutputStream(entryDestination));
                IoUtils.copyStream(tis, out, false, true);
            }
            if (OperatingSystemType.getCurrentOsType() != OperatingSystemType.WINDOWS) {
                // check if the OS is UNIX
                // set file/dir permissions, after it is created
                Files.setPosixFilePermissions(entryDestination.getCanonicalFile().toPath(), getPosixFilePermission(entry.getMode()));
            }
        }
    } catch (Exception e) {
        String errorMsg = null;
        if (entry != null) {
            errorMsg = "Unable to gunzip " + entry.getName() + " from " + tarGzipFilePath + ".Target directory '" + outputDirPath + "' is in inconsistent state.";
        } else {
            errorMsg = "Could not read data from " + tarGzipFilePath;
        }
        throw new FileSystemOperationException(errorMsg, e);
    }
}
Also used : TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) FileSystemOperationException(com.axway.ats.common.filesystem.FileSystemOperationException) DataOutputStream(java.io.DataOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) FileOutputStream(java.io.FileOutputStream) RandomAccessFile(java.io.RandomAccessFile) ZipFile(org.apache.commons.compress.archivers.zip.ZipFile) File(java.io.File) BufferedOutputStream(java.io.BufferedOutputStream) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) FileInputStream(java.io.FileInputStream) OverlappingFileLockException(java.nio.channels.OverlappingFileLockException) FileSystemOperationException(com.axway.ats.common.filesystem.FileSystemOperationException) AttributeNotSupportedException(com.axway.ats.core.filesystem.exceptions.AttributeNotSupportedException) EOFException(java.io.EOFException) FileNotFoundException(java.io.FileNotFoundException) FileDoesNotExistException(com.axway.ats.core.filesystem.exceptions.FileDoesNotExistException) NoSuchAlgorithmException(java.security.NoSuchAlgorithmException) SocketTimeoutException(java.net.SocketTimeoutException) IOException(java.io.IOException)

Example 30 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project apex-malhar by apache.

the class HdfsTestSource method configure.

@Override
public void configure(Context context) {
    directory = context.getString(SOURCE_DIR);
    rate = context.getInteger(RATE, rate);
    initDate = context.getString(INIT_DATE);
    Preconditions.checkArgument(!Strings.isNullOrEmpty(directory));
    directoryPath = new Path(directory);
    String[] parts = initDate.split("-");
    Preconditions.checkArgument(parts.length == 3);
    Calendar calendar = Calendar.getInstance();
    calendar.set(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]) - 1, Integer.parseInt(parts[2]), 0, 0, 0);
    initTime = calendar.getTimeInMillis();
    try {
        List<String> files = findFiles();
        for (String file : files) {
            dataFiles.add(file);
        }
        if (logger.isDebugEnabled()) {
            SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
            logger.debug("settings {} {} {} {} {}", directory, rate, dateFormat.format(oneDayBack), dateFormat.format(new Date(initTime)), currentFile);
            for (String file : dataFiles) {
                logger.debug("settings add file {}", file);
            }
        }
        fs = FileSystem.newInstance(new Path(directory).toUri(), configuration);
        Path filePath = new Path(dataFiles.get(currentFile));
        br = new BufferedReader(new InputStreamReader(new GzipCompressorInputStream(fs.open(filePath))));
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    finished = true;
}
Also used : Path(org.apache.hadoop.fs.Path) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) InputStreamReader(java.io.InputStreamReader) Calendar(java.util.Calendar) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) SimpleDateFormat(java.text.SimpleDateFormat) Date(java.util.Date)

Aggregations

GzipCompressorInputStream (org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream)58 TarArchiveInputStream (org.apache.commons.compress.archivers.tar.TarArchiveInputStream)46 TarArchiveEntry (org.apache.commons.compress.archivers.tar.TarArchiveEntry)40 IOException (java.io.IOException)29 FileInputStream (java.io.FileInputStream)26 File (java.io.File)23 BufferedInputStream (java.io.BufferedInputStream)22 FileOutputStream (java.io.FileOutputStream)20 InputStream (java.io.InputStream)16 OutputStream (java.io.OutputStream)10 Path (java.nio.file.Path)9 ArrayList (java.util.ArrayList)8 BufferedOutputStream (java.io.BufferedOutputStream)7 ByteArrayInputStream (java.io.ByteArrayInputStream)7 BZip2CompressorInputStream (org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream)6 BufferedReader (java.io.BufferedReader)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 InputStreamReader (java.io.InputStreamReader)4 URL (java.net.URL)4 ArchiveEntry (org.apache.commons.compress.archivers.ArchiveEntry)4