use of org.apache.tika.parser.EmptyParser in project tika by apache.
the class ParsingExample method parseNoEmbeddedExample.
/**
* If you don't want content from embedded documents, send in
* a {@link org.apache.tika.parser.ParseContext} that does contains a
* {@link EmptyParser}.
*
* @return The content of a file.
*/
public String parseNoEmbeddedExample() throws IOException, SAXException, TikaException {
AutoDetectParser parser = new AutoDetectParser();
BodyContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
ParseContext parseContext = new ParseContext();
parseContext.set(Parser.class, new EmptyParser());
try (InputStream stream = ParsingExample.class.getResourceAsStream("test_recursive_embedded.docx")) {
parser.parse(stream, handler, metadata, parseContext);
return handler.toString();
}
}
use of org.apache.tika.parser.EmptyParser in project tika by apache.
the class ForkParserIntegrationTest method testForkedPDFParsing.
/**
* TIKA-808 - Ensure that parsing of our test PDFs work under
* the Fork Parser, to ensure that complex parsing behaves
*/
@Test
public void testForkedPDFParsing() throws Exception {
ForkParser parser = new ForkParser(ForkParserIntegrationTest.class.getClassLoader(), tika.getParser());
try {
ContentHandler output = new BodyContentHandler();
InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream("/test-documents/testPDF.pdf");
ParseContext context = new ParseContext();
context.set(Parser.class, new EmptyParser());
parser.parse(stream, output, new Metadata(), context);
String content = output.toString();
assertContains("Apache Tika", content);
assertContains("Tika - Content Analysis Toolkit", content);
assertContains("incubator", content);
assertContains("Apache Software Foundation", content);
} finally {
parser.close();
}
}
Aggregations