use of org.xml.sax.ContentHandler in project tika by apache.
the class OOXMLParserTest method testEncrypted.
@Test
public void testEncrypted() throws Exception {
Map<String, String> tests = new HashMap<String, String>();
tests.put("testWORD_protected_passtika.docx", "This is an encrypted Word 2007 File");
tests.put("testPPT_protected_passtika.pptx", "This is an encrypted PowerPoint 2007 slide.");
tests.put("testEXCEL_protected_passtika.xlsx", "This is an Encrypted Excel spreadsheet.");
Parser parser = new AutoDetectParser();
Metadata m = new Metadata();
PasswordProvider passwordProvider = new PasswordProvider() {
@Override
public String getPassword(Metadata metadata) {
return "tika";
}
};
ParseContext passwordContext = new ParseContext();
passwordContext.set(org.apache.tika.parser.PasswordProvider.class, passwordProvider);
for (Map.Entry<String, String> e : tests.entrySet()) {
try (InputStream is = getTestDocument(e.getKey())) {
ContentHandler handler = new BodyContentHandler();
parser.parse(is, handler, m, passwordContext);
assertContains(e.getValue(), handler.toString());
}
}
ParseContext context = new ParseContext();
//now try with no password
for (Map.Entry<String, String> e : tests.entrySet()) {
boolean exc = false;
try (InputStream is = getTestDocument(e.getKey())) {
ContentHandler handler = new BodyContentHandler();
parser.parse(is, handler, m, context);
} catch (EncryptedDocumentException ex) {
exc = true;
}
assertTrue(exc);
}
}
use of org.xml.sax.ContentHandler in project tika by apache.
the class OOXMLParserTest method testWordArt.
@Test
public void testWordArt() throws Exception {
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
try (InputStream stream = OOXMLParserTest.class.getResourceAsStream("/test-documents/testWordArt.pptx")) {
new AutoDetectParser().parse(stream, handler, metadata, new ParseContext());
}
String content = handler.toString();
assertContains("Here is some red word Art", content);
}
use of org.xml.sax.ContentHandler in project tika by apache.
the class OOXMLParserTest method testWordFootnote.
/**
* Test the plain text output of the Word converter
*
* @throws Exception
*/
@Test
public void testWordFootnote() throws Exception {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
ParseContext context = new ParseContext();
try (InputStream input = getTestDocument("footnotes.docx")) {
parser.parse(input, handler, metadata, context);
assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document", metadata.get(Metadata.CONTENT_TYPE));
assertTrue(handler.toString().contains("snoska"));
}
}
use of org.xml.sax.ContentHandler in project tika by apache.
the class OOXMLParserTest method testWordMissingOOXMLBeans.
//TIKA-792; with room for future missing bean tests
@Test
public void testWordMissingOOXMLBeans() throws Exception {
//If a bean is missing, POI prints stack trace to stderr
String[] fileNames = new String[] { //TIKA-792
"testWORD_missing_ooxml_bean1.docx" };
PrintStream origErr = System.err;
for (String fileName : fileNames) {
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
ParseContext context = new ParseContext();
InputStream input = getTestDocument(fileName);
//grab stderr
ByteArrayOutputStream errContent = new ByteArrayOutputStream();
System.setErr(new PrintStream(errContent, true, UTF_8.name()));
parser.parse(input, handler, metadata, context);
//return stderr
System.setErr(origErr);
String err = errContent.toString(UTF_8.name());
assertTrue(err.length() == 0);
input.close();
}
}
use of org.xml.sax.ContentHandler in project tika by apache.
the class OOXMLParserTest method testProtectedExcelFile.
/**
* An excel document which is password protected.
* See TIKA-437.
*/
@Test
public void testProtectedExcelFile() throws Exception {
Parser parser = new AutoDetectParser();
Metadata metadata = new Metadata();
ContentHandler handler = new BodyContentHandler();
ParseContext context = new ParseContext();
try (InputStream input = getTestDocument("protectedFile.xlsx")) {
parser.parse(input, handler, metadata, context);
assertEquals("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", metadata.get(Metadata.CONTENT_TYPE));
assertEquals("true", metadata.get(TikaMetadataKeys.PROTECTED));
String content = handler.toString();
assertContains("Office", content);
}
}
Aggregations