Search in sources :

Example 1 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project pinot by linkedin.

the class TarGzCompressionUtils method unTarOneFile.

public static InputStream unTarOneFile(InputStream tarGzInputStream, final String filename) throws FileNotFoundException, IOException, ArchiveException {
    TarArchiveInputStream debInputStream = null;
    InputStream is = null;
    try {
        is = new GzipCompressorInputStream(tarGzInputStream);
        debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory().createArchiveInputStream("tar", is);
        TarArchiveEntry entry = null;
        while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) {
            if (entry.getName().contains(filename)) {
                ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
                IOUtils.copy(debInputStream, byteArrayOutputStream);
                return new ByteArrayInputStream(byteArrayOutputStream.toByteArray());
            }
        }
    } finally {
        IOUtils.closeQuietly(debInputStream);
        IOUtils.closeQuietly(is);
    }
    return null;
}
Also used : TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) ArchiveStreamFactory(org.apache.commons.compress.archivers.ArchiveStreamFactory) ByteArrayInputStream(java.io.ByteArrayInputStream) BufferedInputStream(java.io.BufferedInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry)

Example 2 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project pinot by linkedin.

the class TarGzCompressionUtils method unTar.

/** Untar an input file into an output file.

   * The output file is created in the output folder, having the same name
   * as the input file, minus the '.tar' extension.
   *
   * @param inputFile     the input .tar file
   * @param outputDir     the output directory file.
   * @throws IOException
   * @throws FileNotFoundException
   *
   * @return  The {@link List} of {@link File}s with the untared content.
   * @throws ArchiveException
   */
public static List<File> unTar(final File inputFile, final File outputDir) throws FileNotFoundException, IOException, ArchiveException {
    LOGGER.debug(String.format("Untaring %s to dir %s.", inputFile.getAbsolutePath(), outputDir.getAbsolutePath()));
    TarArchiveInputStream debInputStream = null;
    InputStream is = null;
    final List<File> untaredFiles = new LinkedList<File>();
    try {
        is = new GzipCompressorInputStream(new BufferedInputStream(new FileInputStream(inputFile)));
        debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory().createArchiveInputStream("tar", is);
        TarArchiveEntry entry = null;
        while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) {
            final File outputFile = new File(outputDir, entry.getName());
            if (entry.isDirectory()) {
                LOGGER.debug(String.format("Attempting to write output directory %s.", outputFile.getAbsolutePath()));
                if (!outputFile.exists()) {
                    LOGGER.debug(String.format("Attempting to create output directory %s.", outputFile.getAbsolutePath()));
                    if (!outputFile.mkdirs()) {
                        throw new IllegalStateException(String.format("Couldn't create directory %s.", outputFile.getAbsolutePath()));
                    }
                } else {
                    LOGGER.error("The directory already there. Deleting - " + outputFile.getAbsolutePath());
                    FileUtils.deleteDirectory(outputFile);
                }
            } else {
                LOGGER.debug(String.format("Creating output file %s.", outputFile.getAbsolutePath()));
                File directory = outputFile.getParentFile();
                if (!directory.exists()) {
                    directory.mkdirs();
                }
                OutputStream outputFileStream = null;
                try {
                    outputFileStream = new FileOutputStream(outputFile);
                    IOUtils.copy(debInputStream, outputFileStream);
                } finally {
                    IOUtils.closeQuietly(outputFileStream);
                }
            }
            untaredFiles.add(outputFile);
        }
    } finally {
        IOUtils.closeQuietly(debInputStream);
        IOUtils.closeQuietly(is);
    }
    return untaredFiles;
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) BufferedInputStream(java.io.BufferedInputStream) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ByteArrayInputStream(java.io.ByteArrayInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) TarArchiveOutputStream(org.apache.commons.compress.archivers.tar.TarArchiveOutputStream) GzipCompressorOutputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) LinkedList(java.util.LinkedList) FileInputStream(java.io.FileInputStream) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry) TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) ArchiveStreamFactory(org.apache.commons.compress.archivers.ArchiveStreamFactory) BufferedInputStream(java.io.BufferedInputStream) FileOutputStream(java.io.FileOutputStream) File(java.io.File)

Example 3 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project karaf by apache.

the class RunMojo method extractGzArchive.

private static void extractGzArchive(InputStream tarGz, File tar) throws IOException {
    BufferedInputStream in = new BufferedInputStream(tarGz);
    FileOutputStream out = new FileOutputStream(tar);
    GzipCompressorInputStream gzIn = new GzipCompressorInputStream(in);
    final byte[] buffer = new byte[1000];
    int n = 0;
    while (-1 != (n = gzIn.read(buffer))) {
        out.write(buffer, 0, n);
    }
    out.close();
    gzIn.close();
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream)

Example 4 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project tika by apache.

the class ExtractReader method loadExtract.

public List<Metadata> loadExtract(Path extractFile) throws ExtractReaderException {
    List<Metadata> metadataList = null;
    if (extractFile == null || !Files.isRegularFile(extractFile)) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.NO_EXTRACT_FILE);
    }
    FileSuffixes fileSuffixes = parseSuffixes(extractFile.getFileName().toString());
    if (fileSuffixes.txtOrJson == null) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.INCORRECT_EXTRACT_FILE_SUFFIX);
    }
    if (!Files.isRegularFile(extractFile)) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.NO_EXTRACT_FILE);
    }
    long length = -1L;
    try {
        length = Files.size(extractFile);
    } catch (IOException e) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
    }
    if (length == 0L) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.ZERO_BYTE_EXTRACT_FILE);
    }
    if (minExtractLength > IGNORE_LENGTH && length < minExtractLength) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_FILE_TOO_SHORT);
    }
    if (maxExtractLength > IGNORE_LENGTH && length > maxExtractLength) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_FILE_TOO_LONG);
    }
    Reader reader = null;
    InputStream is = null;
    try {
        is = Files.newInputStream(extractFile);
        if (fileSuffixes.compression != null) {
            if (fileSuffixes.compression.equals("bz2")) {
                is = new BZip2CompressorInputStream(is);
            } else if (fileSuffixes.compression.equals("gz") || fileSuffixes.compression.equals("gzip")) {
                is = new GzipCompressorInputStream(is);
            } else if (fileSuffixes.compression.equals("zip")) {
                is = new ZCompressorInputStream(is);
            } else {
                LOG.warn("Can't yet process compression of type: {}", fileSuffixes.compression);
                return metadataList;
            }
        }
        reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8));
    } catch (IOException e) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
    }
    try {
        if (fileSuffixes.txtOrJson.equals("json")) {
            metadataList = JsonMetadataList.fromJson(reader);
            if (alterMetadataList.equals(ALTER_METADATA_LIST.FIRST_ONLY) && metadataList.size() > 1) {
                while (metadataList.size() > 1) {
                    metadataList.remove(metadataList.size() - 1);
                }
            } else if (alterMetadataList.equals(ALTER_METADATA_LIST.AS_IS.CONCATENATE_CONTENT_INTO_FIRST) && metadataList.size() > 1) {
                StringBuilder sb = new StringBuilder();
                Metadata containerMetadata = metadataList.get(0);
                for (int i = 0; i < metadataList.size(); i++) {
                    Metadata m = metadataList.get(i);
                    String c = m.get(RecursiveParserWrapper.TIKA_CONTENT);
                    if (c != null) {
                        sb.append(c);
                        sb.append(" ");
                    }
                }
                containerMetadata.set(RecursiveParserWrapper.TIKA_CONTENT, sb.toString());
                while (metadataList.size() > 1) {
                    metadataList.remove(metadataList.size() - 1);
                }
            }
        } else {
            metadataList = generateListFromTextFile(reader, fileSuffixes);
        }
    } catch (IOException e) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.IO_EXCEPTION);
    } catch (TikaException e) {
        throw new ExtractReaderException(ExtractReaderException.TYPE.EXTRACT_PARSE_EXCEPTION);
    } finally {
        IOUtils.closeQuietly(reader);
        IOUtils.closeQuietly(is);
    }
    return metadataList;
}
Also used : GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) TikaException(org.apache.tika.exception.TikaException) InputStreamReader(java.io.InputStreamReader) ZCompressorInputStream(org.apache.commons.compress.compressors.z.ZCompressorInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) InputStream(java.io.InputStream) Metadata(org.apache.tika.metadata.Metadata) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) BufferedReader(java.io.BufferedReader) ZCompressorInputStream(org.apache.commons.compress.compressors.z.ZCompressorInputStream)

Example 5 with GzipCompressorInputStream

use of org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream in project incubator-systemml by apache.

the class ValidateLicAndNotice method extractFileFromTGZ.

/**
 * This will return the file from tgz file and store it in specified location.
 *
 * @param	tgzFileName is the name of tgz file from which file to be extracted.
 * @param	fileName is the name of the file to be extracted.
 * @param	strDestLoc is the location where file will be extracted.
 * @param 	bFirstDirLevel to indicate to get file from first directory level.
 * @return	Sucess or Failure
 */
public static boolean extractFileFromTGZ(String tgzFileName, String fileName, String strDestLoc, boolean bFirstDirLevel) {
    boolean bRetCode = Constants.bFAILURE;
    TarArchiveInputStream tarIn = null;
    try {
        tarIn = new TarArchiveInputStream(new GzipCompressorInputStream(new BufferedInputStream(new FileInputStream(tgzFileName))));
    } catch (Exception e) {
        Utility.debugPrint(Constants.DEBUG_ERROR, "Exception in unzipping tar file: " + e);
        return bRetCode;
    }
    try {
        BufferedOutputStream bufOut = null;
        BufferedInputStream bufIn = null;
        TarArchiveEntry tarEntry = null;
        while ((tarEntry = tarIn.getNextTarEntry()) != null) {
            if (!tarEntry.getName().endsWith(fileName))
                continue;
            // Get file at root (in single directory) level. This is for License in root location.
            if (bFirstDirLevel && (tarEntry.getName().indexOf('/') != tarEntry.getName().lastIndexOf('/')))
                continue;
            bufIn = new BufferedInputStream(tarIn);
            int count;
            byte[] data = new byte[Constants.BUFFER];
            String strOutFileName = strDestLoc == null ? tarEntry.getName() : strDestLoc + "/" + fileName;
            FileOutputStream fos = new FileOutputStream(strOutFileName);
            bufOut = new BufferedOutputStream(fos, Constants.BUFFER);
            while ((count = bufIn.read(data, 0, Constants.BUFFER)) != -1) {
                bufOut.write(data, 0, count);
            }
            bufOut.flush();
            bufOut.close();
            bufIn.close();
            bRetCode = Constants.bSUCCESS;
            break;
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return bRetCode;
}
Also used : TarArchiveInputStream(org.apache.commons.compress.archivers.tar.TarArchiveInputStream) GzipCompressorInputStream(org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream) BufferedInputStream(java.io.BufferedInputStream) FileOutputStream(java.io.FileOutputStream) BufferedOutputStream(java.io.BufferedOutputStream) FileInputStream(java.io.FileInputStream) IOException(java.io.IOException) TarArchiveEntry(org.apache.commons.compress.archivers.tar.TarArchiveEntry)

Aggregations

GzipCompressorInputStream (org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream)58 TarArchiveInputStream (org.apache.commons.compress.archivers.tar.TarArchiveInputStream)46 TarArchiveEntry (org.apache.commons.compress.archivers.tar.TarArchiveEntry)40 IOException (java.io.IOException)29 FileInputStream (java.io.FileInputStream)26 File (java.io.File)23 BufferedInputStream (java.io.BufferedInputStream)22 FileOutputStream (java.io.FileOutputStream)20 InputStream (java.io.InputStream)16 OutputStream (java.io.OutputStream)10 Path (java.nio.file.Path)9 ArrayList (java.util.ArrayList)8 BufferedOutputStream (java.io.BufferedOutputStream)7 ByteArrayInputStream (java.io.ByteArrayInputStream)7 BZip2CompressorInputStream (org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream)6 BufferedReader (java.io.BufferedReader)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 InputStreamReader (java.io.InputStreamReader)4 URL (java.net.URL)4 ArchiveEntry (org.apache.commons.compress.archivers.ArchiveEntry)4