use of org.apache.tika.exception.EncryptedDocumentException in project tika by apache.
the class SXWPFExtractorTest method testEncrypted.
@Test
public void testEncrypted() throws Exception {
Map<String, String> tests = new HashMap<String, String>();
tests.put("testWORD_protected_passtika.docx", "This is an encrypted Word 2007 File");
Parser parser = new AutoDetectParser();
Metadata m = new Metadata();
PasswordProvider passwordProvider = new PasswordProvider() {
@Override
public String getPassword(Metadata metadata) {
return "tika";
}
};
OfficeParserConfig opc = new OfficeParserConfig();
opc.setUseSAXDocxExtractor(true);
ParseContext passwordContext = new ParseContext();
passwordContext.set(org.apache.tika.parser.PasswordProvider.class, passwordProvider);
passwordContext.set(OfficeParserConfig.class, opc);
for (Map.Entry<String, String> e : tests.entrySet()) {
assertContains(e.getValue(), getXML(e.getKey(), passwordContext).xml);
}
//now try with no password
for (Map.Entry<String, String> e : tests.entrySet()) {
boolean exc = false;
try {
getXML(e.getKey(), parseContext);
} catch (EncryptedDocumentException ex) {
exc = true;
}
assertTrue(exc);
}
}
use of org.apache.tika.exception.EncryptedDocumentException in project tika by apache.
the class OOXMLParserTest method testEncrypted.
@Test
public void testEncrypted() throws Exception {
Map<String, String> tests = new HashMap<String, String>();
tests.put("testWORD_protected_passtika.docx", "This is an encrypted Word 2007 File");
tests.put("testPPT_protected_passtika.pptx", "This is an encrypted PowerPoint 2007 slide.");
tests.put("testEXCEL_protected_passtika.xlsx", "This is an Encrypted Excel spreadsheet.");
Parser parser = new AutoDetectParser();
Metadata m = new Metadata();
PasswordProvider passwordProvider = new PasswordProvider() {
@Override
public String getPassword(Metadata metadata) {
return "tika";
}
};
ParseContext passwordContext = new ParseContext();
passwordContext.set(org.apache.tika.parser.PasswordProvider.class, passwordProvider);
for (Map.Entry<String, String> e : tests.entrySet()) {
try (InputStream is = getTestDocument(e.getKey())) {
ContentHandler handler = new BodyContentHandler();
parser.parse(is, handler, m, passwordContext);
assertContains(e.getValue(), handler.toString());
}
}
ParseContext context = new ParseContext();
//now try with no password
for (Map.Entry<String, String> e : tests.entrySet()) {
boolean exc = false;
try (InputStream is = getTestDocument(e.getKey())) {
ContentHandler handler = new BodyContentHandler();
parser.parse(is, handler, m, context);
} catch (EncryptedDocumentException ex) {
exc = true;
}
assertTrue(exc);
}
}
use of org.apache.tika.exception.EncryptedDocumentException in project tika by apache.
the class ExcelParserTest method testExcelParserPassword.
@Test
public void testExcelParserPassword() throws Exception {
try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_protected_passtika.xls")) {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
ParseContext context = new ParseContext();
context.set(Locale.class, Locale.US);
new OfficeParser().parse(input, handler, metadata, context);
fail("Document is encrypted, shouldn't parse");
} catch (EncryptedDocumentException e) {
// Good
}
// Try again, this time with the password
try (InputStream input = ExcelParserTest.class.getResourceAsStream("/test-documents/testEXCEL_protected_passtika.xls")) {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
ParseContext context = new ParseContext();
context.set(Locale.class, Locale.US);
context.set(PasswordProvider.class, new PasswordProvider() {
@Override
public String getPassword(Metadata metadata) {
return "tika";
}
});
new OfficeParser().parse(input, handler, metadata, context);
assertEquals("application/vnd.ms-excel", metadata.get(Metadata.CONTENT_TYPE));
assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
assertEquals("Antoni", metadata.get(TikaCoreProperties.CREATOR));
assertEquals("2011-11-25T09:52:48Z", metadata.get(TikaCoreProperties.CREATED));
String content = handler.toString();
assertContains("This is an Encrypted Excel spreadsheet", content);
assertNotContained("9.0", content);
}
}
use of org.apache.tika.exception.EncryptedDocumentException in project tika by apache.
the class JackcessParser method parse.
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
TikaInputStream tis = TikaInputStream.get(stream);
Database db = null;
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
String password = null;
PasswordProvider passwordProvider = context.get(PasswordProvider.class);
if (passwordProvider != null) {
password = passwordProvider.getPassword(metadata);
}
try {
if (password == null) {
//do this to ensure encryption/wrong password exception vs. more generic
//"need right codec" error message.
db = new DatabaseBuilder(tis.getFile()).setCodecProvider(new CryptCodecProvider()).setReadOnly(true).open();
} else {
db = new DatabaseBuilder(tis.getFile()).setCodecProvider(new CryptCodecProvider(password)).setReadOnly(true).open();
}
//just in case
db.setLinkResolver(IGNORE_LINK_RESOLVER);
JackcessExtractor ex = new JackcessExtractor(metadata, context, locale);
ex.parse(db, xhtml);
} catch (IllegalStateException e) {
if (e.getMessage() != null && e.getMessage().contains("Incorrect password")) {
throw new EncryptedDocumentException(e);
}
throw e;
} finally {
if (db != null) {
try {
db.close();
} catch (IOException e) {
//swallow = silent close
}
}
}
xhtml.endDocument();
}
use of org.apache.tika.exception.EncryptedDocumentException in project tika by apache.
the class RarParser method parse.
@Override
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException {
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
EmbeddedDocumentExtractor extractor = EmbeddedDocumentUtil.getEmbeddedDocumentExtractor(context);
Archive rar = null;
try (TemporaryResources tmp = new TemporaryResources()) {
TikaInputStream tis = TikaInputStream.get(stream, tmp);
rar = new Archive(tis.getFile());
if (rar.isEncrypted()) {
throw new EncryptedDocumentException();
}
//Without this BodyContentHandler does not work
xhtml.element("div", " ");
FileHeader header = rar.nextFileHeader();
while (header != null && !Thread.currentThread().isInterrupted()) {
if (!header.isDirectory()) {
try (InputStream subFile = rar.getInputStream(header)) {
Metadata entrydata = PackageParser.handleEntryMetadata("".equals(header.getFileNameW()) ? header.getFileNameString() : header.getFileNameW(), header.getCTime(), header.getMTime(), header.getFullUnpackSize(), xhtml);
if (extractor.shouldParseEmbedded(entrydata)) {
extractor.parseEmbedded(subFile, handler, entrydata, true);
}
}
}
header = rar.nextFileHeader();
}
} catch (RarException e) {
throw new TikaException("RarParser Exception", e);
} finally {
if (rar != null)
rar.close();
}
xhtml.endDocument();
}
Aggregations