use of org.apache.tika.parser.PasswordProvider in project tika by apache.
the class OfficeParser method parse.
protected void parse(DirectoryNode root, ParseContext context, Metadata metadata, XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
// Parse summary entries first, to make metadata available early
new SummaryExtractor(metadata).parseSummaries(root);
// Parse remaining document entries
POIFSDocumentType type = POIFSDocumentType.detectType(root);
if (type != POIFSDocumentType.UNKNOWN) {
setType(metadata, type.getType());
}
switch(type) {
case SOLIDWORKS_PART:
case SOLIDWORKS_ASSEMBLY:
case SOLIDWORKS_DRAWING:
break;
case PUBLISHER:
PublisherTextExtractor publisherTextExtractor = new PublisherTextExtractor(root);
xhtml.element("p", publisherTextExtractor.getText());
break;
case WORDDOCUMENT:
new WordExtractor(context, metadata).parse(root, xhtml);
break;
case POWERPOINT:
new HSLFExtractor(context, metadata).parse(root, xhtml);
break;
case WORKBOOK:
case XLR:
Locale locale = context.get(Locale.class, Locale.getDefault());
new ExcelExtractor(context, metadata).parse(root, xhtml, locale);
break;
case PROJECT:
// We currently can't do anything beyond the metadata
break;
case VISIO:
VisioTextExtractor visioTextExtractor = new VisioTextExtractor(root);
for (String text : visioTextExtractor.getAllText()) {
xhtml.element("p", text);
}
break;
case OUTLOOK:
OutlookExtractor extractor = new OutlookExtractor(root, context);
extractor.parse(xhtml, metadata);
break;
case ENCRYPTED:
EncryptionInfo info = new EncryptionInfo(root);
Decryptor d = Decryptor.getInstance(info);
try {
// By default, use the default Office Password
String password = Decryptor.DEFAULT_PASSWORD;
// If they supplied a Password Provider, ask that for the password,
// and use the provider given one if available (stick with default if not)
PasswordProvider passwordProvider = context.get(PasswordProvider.class);
if (passwordProvider != null) {
String suppliedPassword = passwordProvider.getPassword(metadata);
if (suppliedPassword != null) {
password = suppliedPassword;
}
}
// Check if we've the right password or not
if (!d.verifyPassword(password)) {
throw new EncryptedDocumentException();
}
// Decrypt the OLE2 stream, and delegate the resulting OOXML
// file to the regular OOXML parser for normal handling
OOXMLParser parser = new OOXMLParser();
parser.parse(d.getDataStream(root), new EmbeddedContentHandler(new BodyContentHandler(xhtml)), metadata, context);
} catch (GeneralSecurityException ex) {
throw new EncryptedDocumentException(ex);
}
default:
// is extracted, which happened above
break;
}
}
Aggregations