Search in sources :

Example 41 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class TikaEngineTest method testMp4.

/**
 * Tests mappings for the Mp4 metadata extraction capabilities added to
 * Tika 1.1 (STANBOL-627)
 * @throws EngineException
 * @throws IOException
 * @throws ParseException
 */
@Test
public void testMp4() throws EngineException, IOException, ParseException {
    log.info(">>> testMp4 <<<");
    ContentItem ci = createContentItem("testMP4.m4a", "audio/mp4");
    assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
    assertNotNull(contentPart);
    Blob plainTextBlob = contentPart.getValue();
    assertNotNull(plainTextBlob);
    assertContentRegexp(plainTextBlob, "Test Title", "Test Artist", "Test Album");
    // validate XHTML results
    contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
    assertNotNull(contentPart);
    Blob xhtmlBlob = contentPart.getValue();
    assertNotNull(xhtmlBlob);
    // Test AudioTrack metadata
    BlankNodeOrIRI audioTrack = verifyBlankNodeOrIRI(ci, new IRI(NamespaceEnum.media + "hasTrack"));
    // types
    verifyValues(ci, audioTrack, RDF.type, new IRI(NamespaceEnum.media + "MediaFragment"), new IRI(NamespaceEnum.media + "Track"), new IRI(NamespaceEnum.media + "AudioTrack"));
    // properties
    verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "hasFormat"), XSD.string, "Stereo");
    verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "samplingRate"), XSD.int_, "44100");
    verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "hasCompression"), XSD.string, "M4A");
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 42 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class TikaEngineTest method testGEOMetadata.

@Test
public void testGEOMetadata() throws EngineException, IOException, ParseException {
    log.info(">>> testGEOMetadata <<<");
    // first validate Media RDFTerm Ontology
    IRI hasLocation = new IRI(NamespaceEnum.media + "hasLocation");
    IRI locationLatitude = new IRI(NamespaceEnum.media + "locationLatitude");
    IRI locationLongitude = new IRI(NamespaceEnum.media + "locationLongitude");
    // IRI locationAltitude = new IRI(NamespaceEnum.media+"locationAltitude");
    // "video/x-ms-asf");
    ContentItem ci = createContentItem("testJPEG_GEO.jpg", OCTET_STREAM.toString());
    assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    Iterator<Triple> it = ci.getMetadata().filter(ci.getUri(), hasLocation, null);
    assertTrue(it.hasNext());
    RDFTerm r = it.next().getObject();
    assertFalse(it.hasNext());
    assertTrue(r instanceof BlankNodeOrIRI);
    BlankNodeOrIRI location = verifyBlankNodeOrIRI(ci, hasLocation);
    // lat
    verifyValue(ci, location, locationLatitude, XSD.double_, "12.54321");
    // long
    verifyValue(ci, location, locationLongitude, XSD.double_, "-54.1234");
    // second the GEO ont
    IRI lat = new IRI(NamespaceEnum.geo + "lat");
    IRI lon = new IRI(NamespaceEnum.geo + "long");
    // lat
    verifyValue(ci, lat, XSD.double_, "12.54321");
    // long
    verifyValue(ci, lon, XSD.double_, "-54.1234");
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 43 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class TikaEngineTest method testExifMetadata.

@Test
public void testExifMetadata() throws EngineException, ParseException, IOException {
    log.info(">>> testExifMetadata <<<");
    String exif = "http://www.semanticdesktop.org/ontologies/2007/05/10/nexif#";
    ContentItem ci = createContentItem("testJPEG_EXIF.jpg", "image/jpeg");
    assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    verifyValue(ci, new IRI(exif + "make"), null, "Canon");
    verifyValue(ci, new IRI(exif + "software"), null, "Adobe Photoshop CS3 Macintosh");
    verifyValue(ci, new IRI(exif + "dateTimeOriginal"), XSD.dateTime, "2009-08-11T09:09:45");
    verifyValue(ci, new IRI(exif + "relatedImageWidth"), XSD.int_, "100");
    verifyValue(ci, new IRI(exif + "fNumber"), XSD.double_, "5.6");
    verifyValue(ci, new IRI(exif + "model"), null, "Canon EOS 40D");
    verifyValue(ci, new IRI(exif + "isoSpeedRatings"), XSD.int_, "400");
    verifyValue(ci, new IRI(exif + "xResolution"), XSD.double_, "240.0");
    verifyValue(ci, new IRI(exif + "flash"), XSD.boolean_, "false");
    verifyValue(ci, new IRI(exif + "exposureTime"), XSD.double_, "6.25E-4");
    verifyValue(ci, new IRI(exif + "yResolution"), XSD.double_, "240.0");
    verifyValue(ci, new IRI(exif + "resolutionUnit"), XSD.string, "Inch");
    verifyValue(ci, new IRI(exif + "focalLength"), XSD.double_, "194.0");
    verifyValue(ci, new IRI(exif + "relatedImageLength"), XSD.int_, "68");
    verifyValue(ci, new IRI(exif + "bitsPerSample"), XSD.int_, "8");
    // also Media Ontology mappings for Exif
    verifyValue(ci, new IRI(NamespaceEnum.media + "frameHeight"), XSD.int_, "68");
    verifyValue(ci, new IRI(NamespaceEnum.media + "frameWidth"), XSD.int_, "100");
    verifyValue(ci, new IRI(NamespaceEnum.media + "hasFormat"), null, "image/jpeg");
    verifyValue(ci, new IRI(NamespaceEnum.media + "creationDate"), XSD.dateTime, "2009-08-11T09:09:45");
    verifyValues(ci, new IRI(NamespaceEnum.media + "hasKeyword"), null, "serbor", "moscow-birds", "canon-55-250");
    // and finally the mapped DC properties
    verifyValue(ci, new IRI(NamespaceEnum.dc + "format"), null, "image/jpeg");
    verifyValue(ci, new IRI(NamespaceEnum.dc + "created"), XSD.dateTime, "2009-08-11T09:09:45");
    verifyValue(ci, new IRI(NamespaceEnum.dc + "modified"), XSD.dateTime, "2009-10-02T23:02:49");
    verifyValues(ci, new IRI(NamespaceEnum.dc + "subject"), null, "serbor", "moscow-birds", "canon-55-250");
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 44 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class TikaEngineTest method testRtf.

@Test
public void testRtf() throws EngineException, IOException {
    log.info(">>> testRtf <<<");
    ContentItem ci = createContentItem("test.rtf", "application/rtf");
    assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
    assertNotNull(contentPart);
    Blob plainTextBlob = contentPart.getValue();
    assertNotNull(plainTextBlob);
    assertContentRegexp(plainTextBlob, "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities such as Paris and people such as Bob Marley.");
    // validate XHTML results
    contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
    assertNotNull(contentPart);
    Blob xhtmlBlob = contentPart.getValue();
    assertNotNull(xhtmlBlob);
    assertContentRegexp(xhtmlBlob, "<html xmlns=\"http://www.w3.org/1999/xhtml\">", "<head>", "<meta name=", "<title>", "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities", "</body></html>");
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 45 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class SentimentSummarizationEngine method writeSentiment.

private void writeSentiment(ContentItem ci, Section section, List<SentimentPhrase> sectionPhrases) {
    if (section == null || sectionPhrases == null || sectionPhrases.isEmpty()) {
        // nothing to do
        return;
    }
    IRI enh = createTextEnhancement(ci, this);
    Graph metadata = ci.getMetadata();
    if (section.getType() == SpanTypeEnum.Sentence) {
        // TODO use the fise:TextAnnotation new model for
        // add start/end positions
        metadata.add(new TripleImpl(enh, ENHANCER_START, lf.createTypedLiteral(section.getStart())));
        metadata.add(new TripleImpl(enh, ENHANCER_END, lf.createTypedLiteral(section.getEnd())));
    }
    // TODO: Summarize the sentiments of this section
    // add the sentiment information
    double positiveSent = 0.0;
    int positiveCount = 0;
    double negativeSent = 0.0;
    int negativeCount = 0;
    for (SentimentPhrase sentPhrase : sectionPhrases) {
        if (sentPhrase.getNegativeSentiment() != null) {
            double neg = sentPhrase.getNegativeSentiment();
            negativeSent = negativeSent + (neg * neg);
            negativeCount++;
        }
        if (sentPhrase.getPositiveSentiment() != null) {
            double pos = sentPhrase.getPositiveSentiment();
            positiveSent = positiveSent + (pos * pos);
            positiveCount++;
        }
    }
    if (positiveCount > 0) {
        positiveSent = Math.sqrt(positiveSent / (double) positiveCount);
        metadata.add(new TripleImpl(enh, POSITIVE_SENTIMENT_PROPERTY, lf.createTypedLiteral(Double.valueOf(positiveSent))));
    }
    if (negativeCount > 0) {
        negativeSent = Math.sqrt(negativeSent / (double) negativeCount) * -1;
        metadata.add(new TripleImpl(enh, NEGATIVE_SENTIMENT_PROPERTY, lf.createTypedLiteral(Double.valueOf(negativeSent))));
    }
    metadata.add(new TripleImpl(enh, SENTIMENT_PROPERTY, lf.createTypedLiteral(Double.valueOf(negativeSent + positiveSent))));
    // add the Sentiment type as well as the type of the SSO Ontology
    metadata.add(new TripleImpl(enh, DC_TYPE, SENTIMENT_TYPE));
    IRI ssoType = NIFHelper.SPAN_TYPE_TO_SSO_TYPE.get(section.getType());
    if (ssoType != null) {
        metadata.add(new TripleImpl(enh, DC_TYPE, ssoType));
    }
    if (section.getType() == SpanTypeEnum.Text) {
        metadata.add(new TripleImpl(enh, DC_TYPE, DOCUMENT_SENTIMENT_TYPE));
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Aggregations

IRI (org.apache.clerezza.commons.rdf.IRI)346 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)113 Graph (org.apache.clerezza.commons.rdf.Graph)109 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)104 Triple (org.apache.clerezza.commons.rdf.Triple)88 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)84 Test (org.junit.Test)78 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)58 HashSet (java.util.HashSet)50 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)46 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)39 HashMap (java.util.HashMap)38 IOException (java.io.IOException)37 ArrayList (java.util.ArrayList)37 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)36 Literal (org.apache.clerezza.commons.rdf.Literal)35 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)31 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)29 Recipe (org.apache.stanbol.rules.base.api.Recipe)29 Language (org.apache.clerezza.commons.rdf.Language)24