use of org.apache.commons.compress.archivers.zip.ZipArchiveInputStream in project ice by Netflix.
the class BillingFileProcessor method processBillingZipFile.
private void processBillingZipFile(File file, boolean withTags) throws IOException {
InputStream input = new FileInputStream(file);
ZipArchiveInputStream zipInput = new ZipArchiveInputStream(input);
try {
ArchiveEntry entry;
while ((entry = zipInput.getNextEntry()) != null) {
if (entry.isDirectory())
continue;
processBillingFile(entry.getName(), zipInput, withTags);
}
} catch (IOException e) {
if (e.getMessage().equals("Stream closed"))
logger.info("reached end of file.");
else
logger.error("Error processing " + file, e);
} finally {
try {
zipInput.close();
} catch (IOException e) {
logger.error("Error closing " + file, e);
}
try {
input.close();
} catch (IOException e1) {
logger.error("Cannot close input for " + file, e1);
}
}
}
use of org.apache.commons.compress.archivers.zip.ZipArchiveInputStream in project dex2jar by pxb1988.
the class BadZipEntryFlagTest method test1.
@Test
public void test1() throws IOException {
ZipArchiveInputStream zis = new ZipArchiveInputStream(BadZipEntryFlagTest.class.getResourceAsStream("/bad.zip"));
for (ZipArchiveEntry e = zis.getNextZipEntry(); e != null; e = zis.getNextZipEntry()) {
e.getGeneralPurposeBit().useEncryption(false);
if (!e.isDirectory()) {
zis.read();
System.out.println(e.getName());
}
}
}
use of org.apache.commons.compress.archivers.zip.ZipArchiveInputStream in project zm-mailbox by Zimbra.
the class ZipUtil method getZipEntryNameAndSize.
/**
*
* @param inputStream archive input stream
* @param locale - best guess as to locale for the filenames in the archive
* @param seqNo - the order of the item to return (excluding directory entries)
* @return
* @throws IOException
*/
public static ZipNameAndSize getZipEntryNameAndSize(InputStream inputStream, Locale locale, int seqNo) throws IOException {
ZipArchiveInputStream zis = new ZipArchiveInputStream(inputStream, cp437charset.name(), false);
ZipArchiveEntry ze;
int idx = 0;
while ((ze = zis.getNextZipEntry()) != null) {
if (ze.isDirectory()) {
continue;
}
if (idx++ == seqNo) {
String entryName = bestGuessAtEntryName(ze, locale);
return new ZipNameAndSize(entryName, ze.getSize(), zis);
}
}
zis.close();
throw new IOException("file " + seqNo + " not in archive");
}
use of org.apache.commons.compress.archivers.zip.ZipArchiveInputStream in project stanbol by apache.
the class MultiThreadedTestBase method initTestData.
/**
* Helper method that initialises the test data based on the parsed parameter
* @param settings the settings of the Test.
* @return the Iterator over the contents in the test data
* @throws IOException on any error while accessing the parsed test data
*/
private Iterator<String> initTestData(TestSettings settings) throws IOException {
log.info("Read Testdata from '{}'", settings.getTestData());
File testFile = new File(settings.getTestData());
InputStream is = null;
if (testFile.isFile()) {
log.info(" ... init from File");
is = new FileInputStream(testFile);
}
if (is == null) {
is = MultiThreadedTest.class.getClassLoader().getResourceAsStream(settings.getTestData());
}
if (is == null) {
is = ClassLoader.getSystemResourceAsStream(settings.getTestData());
}
if (is == null) {
try {
is = new URL(settings.getTestData()).openStream();
log.info(" ... init from URL");
} catch (MalformedURLException e) {
//not a URL
}
} else {
log.info(" ... init via Classpath");
}
Assert.assertNotNull("Unable to load the parsed TestData '" + settings.getTestData() + "'!", is);
log.info(" - InputStream: {}", is == null ? null : is.getClass().getSimpleName());
String name = FilenameUtils.getName(settings.getTestData());
if ("gz".equalsIgnoreCase(FilenameUtils.getExtension(name))) {
is = new GZIPInputStream(is);
name = FilenameUtils.removeExtension(name);
log.debug(" - from GZIP Archive");
} else if ("bz2".equalsIgnoreCase(FilenameUtils.getExtension(name))) {
is = new BZip2CompressorInputStream(is);
name = FilenameUtils.removeExtension(name);
log.debug(" - from BZip2 Archive");
} else if ("zip".equalsIgnoreCase(FilenameUtils.getExtension(name))) {
ZipArchiveInputStream zipin = new ZipArchiveInputStream(is);
ArchiveEntry entry = zipin.getNextEntry();
log.info("For ZIP archives only the 1st Entry will be processed!");
name = FilenameUtils.getName(entry.getName());
log.info(" - processed Entry: {}", entry.getName());
} else {
// else uncompressed data ...
log.info(" - uncompressed source: {}", name);
}
String mediaType;
if (settings.getTestDataMediaType() != null) {
mediaType = settings.getTestDataMediaType();
} else {
//parse based on extension
String ext = FilenameUtils.getExtension(name);
if ("txt".equalsIgnoreCase(ext)) {
mediaType = TEXT_PLAIN;
} else if ("rdf".equalsIgnoreCase(ext)) {
mediaType = SupportedFormat.RDF_XML;
} else if ("xml".equalsIgnoreCase(ext)) {
mediaType = SupportedFormat.RDF_XML;
} else if ("ttl".equalsIgnoreCase(ext)) {
mediaType = SupportedFormat.TURTLE;
} else if ("n3".equalsIgnoreCase(ext)) {
mediaType = SupportedFormat.N3;
} else if ("nt".equalsIgnoreCase(ext)) {
mediaType = SupportedFormat.N_TRIPLE;
} else if ("json".equalsIgnoreCase(ext)) {
mediaType = SupportedFormat.RDF_JSON;
} else if (name.indexOf('.') < 0) {
//no extension
//try plain text
mediaType = TEXT_PLAIN;
} else {
log.info("Unkown File Extension {} for resource name {}", ext, name);
mediaType = null;
}
}
Assert.assertNotNull("Unable to detect MediaType for RDFTerm '" + name + "'. Please use the property '" + PROPERTY_TEST_DATA_TYPE + "' to manually parse the MediaType!", mediaType);
log.info(" - Media-Type: {}", mediaType);
//now init the iterator for the test data
return TEXT_PLAIN.equalsIgnoreCase(mediaType) ? createTextDataIterator(is, mediaType) : createRdfDataIterator(is, mediaType, settings.getContentProperty());
}
use of org.apache.commons.compress.archivers.zip.ZipArchiveInputStream in project tika by apache.
the class IWorkPackageParser method parse.
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
ZipArchiveInputStream zip = new ZipArchiveInputStream(stream);
ZipArchiveEntry entry = zip.getNextZipEntry();
while (entry != null) {
if (!IWORK_CONTENT_ENTRIES.contains(entry.getName())) {
entry = zip.getNextZipEntry();
continue;
}
InputStream entryStream = new BufferedInputStream(zip, 4096);
entryStream.mark(4096);
IWORKDocumentType type = IWORKDocumentType.detectType(entryStream);
entryStream.reset();
if (type != null) {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
ContentHandler contentHandler;
switch(type) {
case KEYNOTE:
contentHandler = new KeynoteContentHandler(xhtml, metadata);
break;
case NUMBERS:
contentHandler = new NumbersContentHandler(xhtml, metadata);
break;
case PAGES:
contentHandler = new PagesContentHandler(xhtml, metadata);
break;
case ENCRYPTED:
// We can't do anything for the file right now
contentHandler = null;
break;
default:
throw new TikaException("Unhandled iWorks file " + type);
}
metadata.add(Metadata.CONTENT_TYPE, type.getType().toString());
xhtml.startDocument();
if (contentHandler != null) {
context.getSAXParser().parse(new CloseShieldInputStream(entryStream), new OfflineContentHandler(contentHandler));
}
xhtml.endDocument();
}
entry = zip.getNextZipEntry();
}
// Don't close the zip InputStream (TIKA-1117).
}
Aggregations