use of org.apache.tika.fork.ForkParser in project tika by apache.
the class ForkParserIntegrationTest method testForkedPDFParsing.
/**
* TIKA-808 - Ensure that parsing of our test PDFs work under
* the Fork Parser, to ensure that complex parsing behaves
*/
@Test
public void testForkedPDFParsing() throws Exception {
ForkParser parser = new ForkParser(ForkParserIntegrationTest.class.getClassLoader(), tika.getParser());
try {
ContentHandler output = new BodyContentHandler();
InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream("/test-documents/testPDF.pdf");
ParseContext context = new ParseContext();
context.set(Parser.class, new EmptyParser());
parser.parse(stream, output, new Metadata(), context);
String content = output.toString();
assertContains("Apache Tika", content);
assertContains("Tika - Content Analysis Toolkit", content);
assertContains("incubator", content);
assertContains("Apache Software Foundation", content);
} finally {
parser.close();
}
}
use of org.apache.tika.fork.ForkParser in project tika by apache.
the class ForkParserIntegrationTest method testForkedTextParsing.
/**
* Simple text parsing
*/
@Test
public void testForkedTextParsing() throws Exception {
ForkParser parser = new ForkParser(ForkParserIntegrationTest.class.getClassLoader(), tika.getParser());
try {
ContentHandler output = new BodyContentHandler();
InputStream stream = ForkParserIntegrationTest.class.getResourceAsStream("/test-documents/testTXT.txt");
ParseContext context = new ParseContext();
parser.parse(stream, output, new Metadata(), context);
String content = output.toString();
assertContains("Test d'indexation", content);
assertContains("http://www.apache.org", content);
} finally {
parser.close();
}
}
Aggregations