Search in sources :

Example 11 with ZipArchiveInputStream

use of org.apache.commons.compress.archivers.zip.ZipArchiveInputStream in project ice by Netflix.

the class BillingFileProcessor method processBillingZipFile.

private void processBillingZipFile(File file, boolean withTags) throws IOException {
    InputStream input = new FileInputStream(file);
    ZipArchiveInputStream zipInput = new ZipArchiveInputStream(input);
    try {
        ArchiveEntry entry;
        while ((entry = zipInput.getNextEntry()) != null) {
            if (entry.isDirectory())
                continue;
            processBillingFile(entry.getName(), zipInput, withTags);
        }
    } catch (IOException e) {
        if (e.getMessage().equals("Stream closed"))
            logger.info("reached end of file.");
        else
            logger.error("Error processing " + file, e);
    } finally {
        try {
            zipInput.close();
        } catch (IOException e) {
            logger.error("Error closing " + file, e);
        }
        try {
            input.close();
        } catch (IOException e1) {
            logger.error("Cannot close input for " + file, e1);
        }
    }
}
Also used : ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) ArchiveEntry(org.apache.commons.compress.archivers.ArchiveEntry)

Example 12 with ZipArchiveInputStream

use of org.apache.commons.compress.archivers.zip.ZipArchiveInputStream in project dex2jar by pxb1988.

the class BadZipEntryFlagTest method test1.

@Test
public void test1() throws IOException {
    ZipArchiveInputStream zis = new ZipArchiveInputStream(BadZipEntryFlagTest.class.getResourceAsStream("/bad.zip"));
    for (ZipArchiveEntry e = zis.getNextZipEntry(); e != null; e = zis.getNextZipEntry()) {
        e.getGeneralPurposeBit().useEncryption(false);
        if (!e.isDirectory()) {
            zis.read();
            System.out.println(e.getName());
        }
    }
}
Also used : ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) ZipArchiveEntry(org.apache.commons.compress.archivers.zip.ZipArchiveEntry) Test(org.junit.Test)

Example 13 with ZipArchiveInputStream

use of org.apache.commons.compress.archivers.zip.ZipArchiveInputStream in project zm-mailbox by Zimbra.

the class ZipUtil method getZipEntryNameAndSize.

/**
     *
     * @param inputStream archive input stream
     * @param locale - best guess as to locale for the filenames in the archive
     * @param seqNo - the order of the item to return (excluding directory entries)
     * @return
     * @throws IOException
     */
public static ZipNameAndSize getZipEntryNameAndSize(InputStream inputStream, Locale locale, int seqNo) throws IOException {
    ZipArchiveInputStream zis = new ZipArchiveInputStream(inputStream, cp437charset.name(), false);
    ZipArchiveEntry ze;
    int idx = 0;
    while ((ze = zis.getNextZipEntry()) != null) {
        if (ze.isDirectory()) {
            continue;
        }
        if (idx++ == seqNo) {
            String entryName = bestGuessAtEntryName(ze, locale);
            return new ZipNameAndSize(entryName, ze.getSize(), zis);
        }
    }
    zis.close();
    throw new IOException("file " + seqNo + " not in archive");
}
Also used : ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) ZipArchiveEntry(org.apache.commons.compress.archivers.zip.ZipArchiveEntry) IOException(java.io.IOException)

Example 14 with ZipArchiveInputStream

use of org.apache.commons.compress.archivers.zip.ZipArchiveInputStream in project stanbol by apache.

the class MultiThreadedTestBase method initTestData.

/**
     * Helper method that initialises the test data based on the parsed parameter
     * @param settings the settings of the Test.
     * @return the Iterator over the contents in the test data
     * @throws IOException on any error while accessing the parsed test data
     */
private Iterator<String> initTestData(TestSettings settings) throws IOException {
    log.info("Read Testdata from '{}'", settings.getTestData());
    File testFile = new File(settings.getTestData());
    InputStream is = null;
    if (testFile.isFile()) {
        log.info(" ... init from File");
        is = new FileInputStream(testFile);
    }
    if (is == null) {
        is = MultiThreadedTest.class.getClassLoader().getResourceAsStream(settings.getTestData());
    }
    if (is == null) {
        is = ClassLoader.getSystemResourceAsStream(settings.getTestData());
    }
    if (is == null) {
        try {
            is = new URL(settings.getTestData()).openStream();
            log.info(" ... init from URL");
        } catch (MalformedURLException e) {
        //not a URL
        }
    } else {
        log.info(" ... init via Classpath");
    }
    Assert.assertNotNull("Unable to load the parsed TestData '" + settings.getTestData() + "'!", is);
    log.info("  - InputStream: {}", is == null ? null : is.getClass().getSimpleName());
    String name = FilenameUtils.getName(settings.getTestData());
    if ("gz".equalsIgnoreCase(FilenameUtils.getExtension(name))) {
        is = new GZIPInputStream(is);
        name = FilenameUtils.removeExtension(name);
        log.debug("   - from GZIP Archive");
    } else if ("bz2".equalsIgnoreCase(FilenameUtils.getExtension(name))) {
        is = new BZip2CompressorInputStream(is);
        name = FilenameUtils.removeExtension(name);
        log.debug("   - from BZip2 Archive");
    } else if ("zip".equalsIgnoreCase(FilenameUtils.getExtension(name))) {
        ZipArchiveInputStream zipin = new ZipArchiveInputStream(is);
        ArchiveEntry entry = zipin.getNextEntry();
        log.info("For ZIP archives only the 1st Entry will be processed!");
        name = FilenameUtils.getName(entry.getName());
        log.info("  - processed Entry: {}", entry.getName());
    } else {
        // else uncompressed data ...
        log.info("  - uncompressed source: {}", name);
    }
    String mediaType;
    if (settings.getTestDataMediaType() != null) {
        mediaType = settings.getTestDataMediaType();
    } else {
        //parse based on extension
        String ext = FilenameUtils.getExtension(name);
        if ("txt".equalsIgnoreCase(ext)) {
            mediaType = TEXT_PLAIN;
        } else if ("rdf".equalsIgnoreCase(ext)) {
            mediaType = SupportedFormat.RDF_XML;
        } else if ("xml".equalsIgnoreCase(ext)) {
            mediaType = SupportedFormat.RDF_XML;
        } else if ("ttl".equalsIgnoreCase(ext)) {
            mediaType = SupportedFormat.TURTLE;
        } else if ("n3".equalsIgnoreCase(ext)) {
            mediaType = SupportedFormat.N3;
        } else if ("nt".equalsIgnoreCase(ext)) {
            mediaType = SupportedFormat.N_TRIPLE;
        } else if ("json".equalsIgnoreCase(ext)) {
            mediaType = SupportedFormat.RDF_JSON;
        } else if (name.indexOf('.') < 0) {
            //no extension
            //try plain text
            mediaType = TEXT_PLAIN;
        } else {
            log.info("Unkown File Extension {} for resource name {}", ext, name);
            mediaType = null;
        }
    }
    Assert.assertNotNull("Unable to detect MediaType for RDFTerm '" + name + "'. Please use the property '" + PROPERTY_TEST_DATA_TYPE + "' to manually parse the MediaType!", mediaType);
    log.info("  - Media-Type: {}", mediaType);
    //now init the iterator for the test data
    return TEXT_PLAIN.equalsIgnoreCase(mediaType) ? createTextDataIterator(is, mediaType) : createRdfDataIterator(is, mediaType, settings.getContentProperty());
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) MalformedURLException(java.net.MalformedURLException) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) GZIPInputStream(java.util.zip.GZIPInputStream) BZip2CompressorInputStream(org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream) FileInputStream(java.io.FileInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) InputStream(java.io.InputStream) ArchiveEntry(org.apache.commons.compress.archivers.ArchiveEntry) File(java.io.File) FileInputStream(java.io.FileInputStream) URL(java.net.URL)

Example 15 with ZipArchiveInputStream

use of org.apache.commons.compress.archivers.zip.ZipArchiveInputStream in project tika by apache.

the class IWorkPackageParser method parse.

public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
    ZipArchiveInputStream zip = new ZipArchiveInputStream(stream);
    ZipArchiveEntry entry = zip.getNextZipEntry();
    while (entry != null) {
        if (!IWORK_CONTENT_ENTRIES.contains(entry.getName())) {
            entry = zip.getNextZipEntry();
            continue;
        }
        InputStream entryStream = new BufferedInputStream(zip, 4096);
        entryStream.mark(4096);
        IWORKDocumentType type = IWORKDocumentType.detectType(entryStream);
        entryStream.reset();
        if (type != null) {
            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
            ContentHandler contentHandler;
            switch(type) {
                case KEYNOTE:
                    contentHandler = new KeynoteContentHandler(xhtml, metadata);
                    break;
                case NUMBERS:
                    contentHandler = new NumbersContentHandler(xhtml, metadata);
                    break;
                case PAGES:
                    contentHandler = new PagesContentHandler(xhtml, metadata);
                    break;
                case ENCRYPTED:
                    // We can't do anything for the file right now
                    contentHandler = null;
                    break;
                default:
                    throw new TikaException("Unhandled iWorks file " + type);
            }
            metadata.add(Metadata.CONTENT_TYPE, type.getType().toString());
            xhtml.startDocument();
            if (contentHandler != null) {
                context.getSAXParser().parse(new CloseShieldInputStream(entryStream), new OfflineContentHandler(contentHandler));
            }
            xhtml.endDocument();
        }
        entry = zip.getNextZipEntry();
    }
// Don't close the zip InputStream (TIKA-1117).
}
Also used : TikaException(org.apache.tika.exception.TikaException) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) BufferedInputStream(java.io.BufferedInputStream) ZipArchiveInputStream(org.apache.commons.compress.archivers.zip.ZipArchiveInputStream) CloseShieldInputStream(org.apache.commons.io.input.CloseShieldInputStream) InputStream(java.io.InputStream) XHTMLContentHandler(org.apache.tika.sax.XHTMLContentHandler) XHTMLContentHandler(org.apache.tika.sax.XHTMLContentHandler) OfflineContentHandler(org.apache.tika.sax.OfflineContentHandler) ContentHandler(org.xml.sax.ContentHandler) OfflineContentHandler(org.apache.tika.sax.OfflineContentHandler) BufferedInputStream(java.io.BufferedInputStream) ZipArchiveEntry(org.apache.commons.compress.archivers.zip.ZipArchiveEntry) CloseShieldInputStream(org.apache.commons.io.input.CloseShieldInputStream)

Aggregations

ZipArchiveInputStream (org.apache.commons.compress.archivers.zip.ZipArchiveInputStream)15 ArchiveEntry (org.apache.commons.compress.archivers.ArchiveEntry)8 ZipArchiveEntry (org.apache.commons.compress.archivers.zip.ZipArchiveEntry)8 BufferedInputStream (java.io.BufferedInputStream)7 FileInputStream (java.io.FileInputStream)7 InputStream (java.io.InputStream)6 IOException (java.io.IOException)5 File (java.io.File)4 Archive (com.github.junrar.Archive)3 FileHeader (com.github.junrar.rarfile.FileHeader)3 FileOutputStream (java.io.FileOutputStream)3 BZip2CompressorInputStream (org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream)3 ImageInfo (com.github.hmdev.info.ImageInfo)2 ByteArrayInputStream (java.io.ByteArrayInputStream)2 URL (java.net.URL)2 GZIPInputStream (java.util.zip.GZIPInputStream)2 ArchiveInputStream (org.apache.commons.compress.archivers.ArchiveInputStream)2 TarArchiveInputStream (org.apache.commons.compress.archivers.tar.TarArchiveInputStream)2 ChapterInfo (com.github.hmdev.info.ChapterInfo)1 ChapterLineInfo (com.github.hmdev.info.ChapterLineInfo)1