use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class TikaEngineTest method testOdt.
@Test
public void testOdt() throws EngineException, IOException {
log.info(">>> testOdt <<<");
ContentItem ci = createContentItem("test.odt", "application/vnd.oasis.opendocument.text");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
assertNotNull(contentPart);
Blob plainTextBlob = contentPart.getValue();
assertNotNull(plainTextBlob);
assertContentRegexp(plainTextBlob, "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities such as Paris and people such as Bob Marley.");
//validate XHTML results
contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
assertNotNull(contentPart);
Blob xhtmlBlob = contentPart.getValue();
assertNotNull(xhtmlBlob);
assertContentRegexp(xhtmlBlob, "<html xmlns=\"http://www.w3.org/1999/xhtml\">", "<head>", "<meta name=", "<title>", "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities", "</body></html>");
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class TikaEngineTest method testMp3.
@Test
public void testMp3() throws EngineException, IOException, ParseException {
log.info(">>> testMp3 <<<");
ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
assertNotNull(contentPart);
Blob plainTextBlob = contentPart.getValue();
assertNotNull(plainTextBlob);
assertContentRegexp(plainTextBlob, "Test Title", "Test Artist", "Test Album");
//validate XHTML results
contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
assertNotNull(contentPart);
Blob xhtmlBlob = contentPart.getValue();
assertNotNull(xhtmlBlob);
//Test AudioTrack metadata
BlankNodeOrIRI audioTrack = verifyBlankNodeOrIRI(ci, new IRI(NamespaceEnum.media + "hasTrack"));
//types
verifyValues(ci, audioTrack, RDF.type, new IRI(NamespaceEnum.media + "MediaFragment"), new IRI(NamespaceEnum.media + "Track"), new IRI(NamespaceEnum.media + "AudioTrack"));
//properties
verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "hasFormat"), XSD.string, "Mono");
verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "samplingRate"), XSD.int_, "44100");
verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "hasCompression"), XSD.string, "MP3");
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class TikaEngineTest method testMsWord.
@Test
public void testMsWord() throws EngineException, IOException {
log.info(">>> testMsWord <<<");
ContentItem ci = createContentItem("test.doc", "application/msword");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
assertNotNull(contentPart);
Blob plainTextBlob = contentPart.getValue();
assertNotNull(plainTextBlob);
assertContentRegexp(plainTextBlob, "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities such as Paris and people such as Bob Marley.");
//validate XHTML results
contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
assertNotNull(contentPart);
Blob xhtmlBlob = contentPart.getValue();
assertNotNull(xhtmlBlob);
assertContentRegexp(xhtmlBlob, "<html xmlns=\"http://www.w3.org/1999/xhtml\">", "<head>", "<meta name=", "<title>", "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities", "</body></html>");
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class TikaEngineTest method testHtml.
@Test
public void testHtml() throws EngineException, IOException {
log.info(">>> testHtml <<<");
ContentItem ci = createContentItem("test.html", "text/html; charset=UTF-8");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
assertNotNull(contentPart);
Blob plainTextBlob = contentPart.getValue();
assertNotNull(plainTextBlob);
assertContentRegexp(plainTextBlob, "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities such as Paris and people such as Bob Marley.");
//validate XHTML results
contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
assertNotNull(contentPart);
Blob xhtmlBlob = contentPart.getValue();
assertNotNull(xhtmlBlob);
assertContentRegexp(xhtmlBlob, "<html xmlns=\"http://www.w3.org/1999/xhtml\">", "<head>", "<meta name=", "<title>The Apache Stanbol Enhancer</title>", "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities", "</body></html>");
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class TestOpenCalaisEngine method testCalaisConnection.
@Test
public void testCalaisConnection() throws IOException, EngineException {
Assume.assumeNotNull(calaisExtractor.getLicenseKey());
ContentItem ci = wrapAsContentItem(TEST_TEXT);
ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, LiteralFactory.getInstance().createTypedLiteral("en")));
Graph model;
try {
model = calaisExtractor.getCalaisAnalysis(TEST_TEXT, "text/plain");
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
Assert.assertNotNull("No model", model);
Collection<CalaisEntityOccurrence> entities;
try {
entities = calaisExtractor.queryModel(model);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
LOG.info("Found entities: {}", entities.size());
LOG.debug("Entities:\n{}", entities);
Assert.assertFalse("No entities found!", entities.isEmpty());
}
Aggregations