Search in sources :

Example 36 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class TikaEngineTest method testMetadata.

public void testMetadata() throws EngineException, ParseException, IOException {
    log.info(">>> testMetadata <<<");
    ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
    assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    verifyValue(ci, new IRI(NamespaceEnum.dc + "creator"), null, "Test Artist");
    verifyValue(ci, new IRI(NamespaceEnum.dc + "title"), null, "Test Album");
    verifyValue(ci, new IRI(NamespaceEnum.dc + "format"), null, "audio/mpeg");
    verifyValue(ci, new IRI(NamespaceEnum.media + "hasFormat"), null, "audio/mpeg");
    verifyValue(ci, new IRI(NamespaceEnum.media + "mainOriginalTitle"), null, "Test Album");
    verifyValue(ci, new IRI(NamespaceEnum.media + "hasContributor"), null, "Test Artist");
    verifyValue(ci, new IRI(NamespaceEnum.media + "releaseDate"), XSD.string, "2008");
    verifyValue(ci, new IRI(NamespaceEnum.media + "hasGenre"), null, "Rock");
    verifyValue(ci, new IRI(NamespaceEnum.media + "hasCreator"), null, "Test Artist");
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem)

Example 37 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class TikaEngineTest method testContentTypeDetection.

@Test
public void testContentTypeDetection() throws EngineException, IOException {
    log.info(">>> testContentTypeDetection <<<");
    ContentItem ci = createContentItem("test.pdf", OCTET_STREAM.toString());
    assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
    assertNotNull(contentPart);
    Blob plainTextBlob = contentPart.getValue();
    assertNotNull(plainTextBlob);
    assertContentRegexp(plainTextBlob, "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities");
    // validate XHTML results
    contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
    assertNotNull(contentPart);
    Blob xhtmlBlob = contentPart.getValue();
    assertNotNull(xhtmlBlob);
    assertContentRegexp(xhtmlBlob, "<html xmlns=\"http://www.w3.org/1999/xhtml\">", "<head>", "<meta name=", "<div class=\"page\">", "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities", "</body></html>");
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 38 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class TikaEngineTest method testPdf.

@Test
public void testPdf() throws EngineException, IOException {
    log.info(">>> testPdf <<<");
    // PDF created by Apple Pages
    ContentItem ci = createContentItem("test.pdf", "application/pdf");
    assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
    assertNotNull(contentPart);
    Blob plainTextBlob = contentPart.getValue();
    assertNotNull(plainTextBlob);
    assertContentRegexp(plainTextBlob, "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities ");
    // validate XHTML results
    contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
    assertNotNull(contentPart);
    Blob xhtmlBlob = contentPart.getValue();
    assertNotNull(xhtmlBlob);
    assertContentRegexp(xhtmlBlob, "<html xmlns=\"http://www.w3.org/1999/xhtml\">", "<head>", "<meta name=", "<div class=\"page\">", "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities", "</body></html>");
    // PDF created by OpenOffice
    ci = createContentItem("test2.pdf", "application/pdf");
    assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    // validate plain text results
    contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
    assertNotNull(contentPart);
    plainTextBlob = contentPart.getValue();
    assertNotNull(plainTextBlob);
    assertContentRegexp(plainTextBlob, "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities");
    // validate XHTML results
    contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
    assertNotNull(contentPart);
    xhtmlBlob = contentPart.getValue();
    assertNotNull(xhtmlBlob);
    assertContentRegexp(xhtmlBlob, "<html xmlns=\"http://www.w3.org/1999/xhtml\">", "<head>", "<meta name=", "<div class=\"page\">", "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities", "</body></html>");
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 39 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class TikaEngineTest method testUnmappedProperties.

/**
 * Tests unmapped properties as added by <a href="https://issues.apache.org/jira/browse/STANBOL-947">
 * STANBOL-947</a>
 * @throws EngineException
 * @throws IOException
 * @throws ParseException
 */
@Test
public void testUnmappedProperties() throws EngineException, IOException, ParseException {
    log.info(">>> testUnmappedProperties <<<");
    // reuses the image with EXIF metadata
    ContentItem ci = createContentItem("testMP4.m4a", "audio/mp4");
    assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    // test that the "xmpDM:logComment" is present
    verifyValue(ci, new IRI("urn:tika.apache.org:tika:xmpDM:logComment"), null, "Test Comments");
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 40 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class TikaEngineTest method testUnsupported.

@Test
public void testUnsupported() throws EngineException, IOException {
    log.info(">>> testUnsupported <<<");
    ContentItem ci = createContentItem("test.pages", "application/x-iwork-pages-sffpages");
    assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
    // it MUST NOT give an error but also not add a content part
    assertNull(contentPart);
    // only the original content
    assertEquals(1, ContentItemHelper.getContentParts(ci, Blob.class).size());
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Aggregations

IRI (org.apache.clerezza.commons.rdf.IRI)346 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)113 Graph (org.apache.clerezza.commons.rdf.Graph)109 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)104 Triple (org.apache.clerezza.commons.rdf.Triple)88 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)84 Test (org.junit.Test)78 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)58 HashSet (java.util.HashSet)50 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)46 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)39 HashMap (java.util.HashMap)38 IOException (java.io.IOException)37 ArrayList (java.util.ArrayList)37 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)36 Literal (org.apache.clerezza.commons.rdf.Literal)35 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)31 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)29 Recipe (org.apache.stanbol.rules.base.api.Recipe)29 Language (org.apache.clerezza.commons.rdf.Language)24