use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class TikaEngineTest method testMetadata.
public void testMetadata() throws EngineException, ParseException, IOException {
log.info(">>> testMetadata <<<");
ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
verifyValue(ci, new IRI(NamespaceEnum.dc + "creator"), null, "Test Artist");
verifyValue(ci, new IRI(NamespaceEnum.dc + "title"), null, "Test Album");
verifyValue(ci, new IRI(NamespaceEnum.dc + "format"), null, "audio/mpeg");
verifyValue(ci, new IRI(NamespaceEnum.media + "hasFormat"), null, "audio/mpeg");
verifyValue(ci, new IRI(NamespaceEnum.media + "mainOriginalTitle"), null, "Test Album");
verifyValue(ci, new IRI(NamespaceEnum.media + "hasContributor"), null, "Test Artist");
verifyValue(ci, new IRI(NamespaceEnum.media + "releaseDate"), XSD.string, "2008");
verifyValue(ci, new IRI(NamespaceEnum.media + "hasGenre"), null, "Rock");
verifyValue(ci, new IRI(NamespaceEnum.media + "hasCreator"), null, "Test Artist");
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class TikaEngineTest method testContentTypeDetection.
@Test
public void testContentTypeDetection() throws EngineException, IOException {
log.info(">>> testContentTypeDetection <<<");
ContentItem ci = createContentItem("test.pdf", OCTET_STREAM.toString());
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
assertNotNull(contentPart);
Blob plainTextBlob = contentPart.getValue();
assertNotNull(plainTextBlob);
assertContentRegexp(plainTextBlob, "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities");
// validate XHTML results
contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
assertNotNull(contentPart);
Blob xhtmlBlob = contentPart.getValue();
assertNotNull(xhtmlBlob);
assertContentRegexp(xhtmlBlob, "<html xmlns=\"http://www.w3.org/1999/xhtml\">", "<head>", "<meta name=", "<div class=\"page\">", "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities", "</body></html>");
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class TikaEngineTest method testPdf.
@Test
public void testPdf() throws EngineException, IOException {
log.info(">>> testPdf <<<");
// PDF created by Apple Pages
ContentItem ci = createContentItem("test.pdf", "application/pdf");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
assertNotNull(contentPart);
Blob plainTextBlob = contentPart.getValue();
assertNotNull(plainTextBlob);
assertContentRegexp(plainTextBlob, "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities ");
// validate XHTML results
contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
assertNotNull(contentPart);
Blob xhtmlBlob = contentPart.getValue();
assertNotNull(xhtmlBlob);
assertContentRegexp(xhtmlBlob, "<html xmlns=\"http://www.w3.org/1999/xhtml\">", "<head>", "<meta name=", "<div class=\"page\">", "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities", "</body></html>");
// PDF created by OpenOffice
ci = createContentItem("test2.pdf", "application/pdf");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
// validate plain text results
contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
assertNotNull(contentPart);
plainTextBlob = contentPart.getValue();
assertNotNull(plainTextBlob);
assertContentRegexp(plainTextBlob, "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities");
// validate XHTML results
contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
assertNotNull(contentPart);
xhtmlBlob = contentPart.getValue();
assertNotNull(xhtmlBlob);
assertContentRegexp(xhtmlBlob, "<html xmlns=\"http://www.w3.org/1999/xhtml\">", "<head>", "<meta name=", "<div class=\"page\">", "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities", "</body></html>");
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class TikaEngineTest method testUnmappedProperties.
/**
* Tests unmapped properties as added by <a href="https://issues.apache.org/jira/browse/STANBOL-947">
* STANBOL-947</a>
* @throws EngineException
* @throws IOException
* @throws ParseException
*/
@Test
public void testUnmappedProperties() throws EngineException, IOException, ParseException {
log.info(">>> testUnmappedProperties <<<");
// reuses the image with EXIF metadata
ContentItem ci = createContentItem("testMP4.m4a", "audio/mp4");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
// test that the "xmpDM:logComment" is present
verifyValue(ci, new IRI("urn:tika.apache.org:tika:xmpDM:logComment"), null, "Test Comments");
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class TikaEngineTest method testUnsupported.
@Test
public void testUnsupported() throws EngineException, IOException {
log.info(">>> testUnsupported <<<");
ContentItem ci = createContentItem("test.pages", "application/x-iwork-pages-sffpages");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
// it MUST NOT give an error but also not add a content part
assertNull(contentPart);
// only the original content
assertEquals(1, ContentItemHelper.getContentParts(ci, Blob.class).size());
}
Aggregations