use of org.ontoware.rdf2go.model.node.impl.URIImpl in project stanbol by apache.
the class TestMetaxaCore method testHtmlExtraction.
/**
* This tests the html extraction.
*
* @throws ExtractorException if there is an error during extraction
* @throws IOException if there is an error when reading the document
*/
@Test
public void testHtmlExtraction() throws Exception {
String testFile = "test.html";
String testResultFile = "html-res.txt";
// extract text from html
InputStream in = getResourceAsStream(testFile);
assertNotNull("failed to load resource " + testFile, in);
Model m = extractor.extract(in, new URIImpl("file://" + testFile), "text/html");
String text = MetaxaCore.getText(m);
// get expected result
InputStream in2 = getResourceAsStream(testResultFile);
assertNotNull("failed to load resource " + testResultFile, in2);
String expectedText = IOUtils.toString(in2, "utf-8");
// test
assertEquals(cleanup(expectedText), cleanup(text));
// show triples
int tripleCounter = this.printTriples(m);
assertEquals(28, tripleCounter);
}
use of org.ontoware.rdf2go.model.node.impl.URIImpl in project stanbol by apache.
the class TestMetaxaCore method testMailExtraction.
@Test
public void testMailExtraction() throws Exception {
String testFile = "mail-multipart-test.eml";
InputStream in = getResourceAsStream(testFile);
assertNotNull("failed to load resource " + testFile, in);
Model m = extractor.extract(in, new URIImpl("file://" + testFile), "message/rfc822");
boolean textContained = m.contains(Variable.ANY, NMO.plainTextMessageContent, Variable.ANY);
assertTrue(textContained);
}
Aggregations