use of org.apache.stanbol.enhancer.servicesapi.Blob in project stanbol by apache.
the class TikaEngineTest method testOdt.
@Test
public void testOdt() throws EngineException, IOException {
log.info(">>> testOdt <<<");
ContentItem ci = createContentItem("test.odt", "application/vnd.oasis.opendocument.text");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
assertNotNull(contentPart);
Blob plainTextBlob = contentPart.getValue();
assertNotNull(plainTextBlob);
assertContentRegexp(plainTextBlob, "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities such as Paris and people such as Bob Marley.");
//validate XHTML results
contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
assertNotNull(contentPart);
Blob xhtmlBlob = contentPart.getValue();
assertNotNull(xhtmlBlob);
assertContentRegexp(xhtmlBlob, "<html xmlns=\"http://www.w3.org/1999/xhtml\">", "<head>", "<meta name=", "<title>", "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities", "</body></html>");
}
use of org.apache.stanbol.enhancer.servicesapi.Blob in project stanbol by apache.
the class TikaEngineTest method testMp3.
@Test
public void testMp3() throws EngineException, IOException, ParseException {
log.info(">>> testMp3 <<<");
ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
assertNotNull(contentPart);
Blob plainTextBlob = contentPart.getValue();
assertNotNull(plainTextBlob);
assertContentRegexp(plainTextBlob, "Test Title", "Test Artist", "Test Album");
//validate XHTML results
contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
assertNotNull(contentPart);
Blob xhtmlBlob = contentPart.getValue();
assertNotNull(xhtmlBlob);
//Test AudioTrack metadata
BlankNodeOrIRI audioTrack = verifyBlankNodeOrIRI(ci, new IRI(NamespaceEnum.media + "hasTrack"));
//types
verifyValues(ci, audioTrack, RDF.type, new IRI(NamespaceEnum.media + "MediaFragment"), new IRI(NamespaceEnum.media + "Track"), new IRI(NamespaceEnum.media + "AudioTrack"));
//properties
verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "hasFormat"), XSD.string, "Mono");
verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "samplingRate"), XSD.int_, "44100");
verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "hasCompression"), XSD.string, "MP3");
}
use of org.apache.stanbol.enhancer.servicesapi.Blob in project stanbol by apache.
the class TikaEngineTest method testMsWord.
@Test
public void testMsWord() throws EngineException, IOException {
log.info(">>> testMsWord <<<");
ContentItem ci = createContentItem("test.doc", "application/msword");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
assertNotNull(contentPart);
Blob plainTextBlob = contentPart.getValue();
assertNotNull(plainTextBlob);
assertContentRegexp(plainTextBlob, "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities such as Paris and people such as Bob Marley.");
//validate XHTML results
contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
assertNotNull(contentPart);
Blob xhtmlBlob = contentPart.getValue();
assertNotNull(xhtmlBlob);
assertContentRegexp(xhtmlBlob, "<html xmlns=\"http://www.w3.org/1999/xhtml\">", "<head>", "<meta name=", "<title>", "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities", "</body></html>");
}
use of org.apache.stanbol.enhancer.servicesapi.Blob in project stanbol by apache.
the class TikaEngineTest method testHtml.
@Test
public void testHtml() throws EngineException, IOException {
log.info(">>> testHtml <<<");
ContentItem ci = createContentItem("test.html", "text/html; charset=UTF-8");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
assertNotNull(contentPart);
Blob plainTextBlob = contentPart.getValue();
assertNotNull(plainTextBlob);
assertContentRegexp(plainTextBlob, "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities such as Paris and people such as Bob Marley.");
//validate XHTML results
contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
assertNotNull(contentPart);
Blob xhtmlBlob = contentPart.getValue();
assertNotNull(xhtmlBlob);
assertContentRegexp(xhtmlBlob, "<html xmlns=\"http://www.w3.org/1999/xhtml\">", "<head>", "<meta name=", "<title>The Apache Stanbol Enhancer</title>", "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities", "</body></html>");
}
use of org.apache.stanbol.enhancer.servicesapi.Blob in project stanbol by apache.
the class OpenCalaisEngine method computeEnhancements.
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with an supported Mimetype '" + SUPPORTED_MIMETYPES + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
Graph calaisModel = getCalaisAnalysis(text, contentPart.getValue().getMimeType());
if (calaisModel != null) {
//Acquire a write lock on the ContentItem when adding the enhancements
ci.getLock().writeLock().lock();
try {
createEnhancements(queryModel(calaisModel), ci);
if (log.isDebugEnabled()) {
Serializer serializer = Serializer.getInstance();
ByteArrayOutputStream debugStream = new ByteArrayOutputStream();
serializer.serialize(debugStream, ci.getMetadata(), "application/rdf+xml");
try {
log.debug("Calais Enhancements:\n{}", debugStream.toString("UTF-8"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
} finally {
ci.getLock().writeLock().unlock();
}
}
}
Aggregations