Search in sources :

Example 71 with BlankNodeOrIRI

use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.

the class TextAnnotationsNewModelEngine method computeEnhancements.

/**
     * Computes the enhancements on the provided ContentItem.
     */
@Override
public void computeEnhancements(ContentItem contentItem) throws EngineException {
    Entry<IRI, Blob> textBlob = getBlob(contentItem, supportedMimeTypes);
    if (textBlob == null) {
        return;
    }
    String language = EnhancementEngineHelper.getLanguage(contentItem);
    Language lang = language == null ? null : new Language(language);
    String text;
    try {
        text = ContentItemHelper.getText(textBlob.getValue());
    } catch (IOException e) {
        throw new EngineException(this, contentItem, "Unable to read Plain Text Blob", e);
    }
    Set<Triple> addedTriples = new HashSet<Triple>();
    Graph metadata = contentItem.getMetadata();
    //extract all the necessary information within a read lock
    contentItem.getLock().readLock().lock();
    try {
        Iterator<Triple> it = metadata.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
        while (it.hasNext()) {
            BlankNodeOrIRI ta = it.next().getSubject();
            boolean hasPrefix = metadata.filter(ta, ENHANCER_SELECTION_PREFIX, null).hasNext();
            boolean hasSuffix = metadata.filter(ta, ENHANCER_SELECTION_SUFFIX, null).hasNext();
            boolean hasSelected = metadata.filter(ta, ENHANCER_SELECTED_TEXT, null).hasNext();
            if (hasPrefix && hasSuffix && hasSelected) {
                //this TextAnnotation already uses the new model
                continue;
            }
            Integer start;
            if (!hasPrefix) {
                start = EnhancementEngineHelper.get(metadata, ta, ENHANCER_START, Integer.class, lf);
                if (start == null) {
                    log.debug("unable to add fise:selection-prefix to TextAnnotation {} " + "because fise:start is not present", ta);
                } else if (start < 0) {
                    log.warn("fise:start {} of TextAnnotation {} < 0! " + "Will not transform this TextAnnotation", start, ta);
                    start = 0;
                }
            } else {
                start = null;
            }
            Integer end;
            if (!hasSuffix) {
                end = EnhancementEngineHelper.get(metadata, ta, ENHANCER_END, Integer.class, lf);
                if (end == null) {
                    log.debug("unable to add fise:selection-suffix to TextAnnotation {} " + "because fise:end is not present", ta);
                } else if (end > text.length()) {
                    log.warn("fise:end {} of TextAnnotation {} > as the content length {}! " + "Will not transform this TextAnnotation", end, ta, text.length());
                    end = null;
                } else if (start != null && end < start) {
                    log.warn("fise:end {} < fise:start {} of TextAnnotation {}! " + "Will not transform this TextAnnotation", end, start, ta);
                    end = null;
                    start = null;
                }
            } else {
                end = null;
            }
            if (!hasPrefix && start != null) {
                addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_PREFIX, new PlainLiteralImpl(text.substring(Math.max(0, start - prefixSuffixSize), start), lang)));
            }
            if (!hasSuffix && end != null) {
                addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_SUFFIX, new PlainLiteralImpl(text.substring(end, Math.min(text.length(), end + prefixSuffixSize)), lang)));
            }
            if (!hasSelected && start != null && end != null) {
                //This adds missing fise:selected or fise:head/fise:tail if the selected text is to long
                int length = end - start;
                if (length > 3 * prefixSuffixSize) {
                    //add prefix/suffix
                    addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_HEAD, new PlainLiteralImpl(text.substring(start, start + prefixSuffixSize), lang)));
                    addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_TAIL, new PlainLiteralImpl(text.substring(end - prefixSuffixSize, end), lang)));
                } else {
                    //add missing fise:selected
                    String selection = text.substring(start, end);
                    addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(selection, lang)));
                    //check if we should also add an selection context
                    if (!metadata.filter(ta, ENHANCER_SELECTION_CONTEXT, null).hasNext()) {
                        addedTriples.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(EnhancementEngineHelper.getSelectionContext(text, selection, start), lang)));
                    }
                }
            }
        }
    } finally {
        contentItem.getLock().readLock().unlock();
    }
    //finally write the prefix/suffix triples within a write lock
    if (!addedTriples.isEmpty()) {
        contentItem.getLock().writeLock().lock();
        try {
            metadata.addAll(addedTriples);
        } finally {
            contentItem.getLock().writeLock().unlock();
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) ContentItemHelper.getBlob(org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper.getBlob) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) IOException(java.io.IOException) Triple(org.apache.clerezza.commons.rdf.Triple) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) HashSet(java.util.HashSet)

Example 72 with BlankNodeOrIRI

use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.

the class TextAnnotationNewModelEngineTest method testTextAnnotationNewModel.

@Test
public void testTextAnnotationNewModel() throws EngineException {
    Assert.assertEquals(EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(contentItem));
    engine.computeEnhancements(contentItem);
    //validate
    Graph g = contentItem.getMetadata();
    Iterator<Triple> it = g.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    Assert.assertTrue(it.hasNext());
    while (it.hasNext()) {
        BlankNodeOrIRI ta = it.next().getSubject();
        Assert.assertTrue(ta instanceof IRI);
        Map<IRI, RDFTerm> expected = new HashMap<IRI, RDFTerm>();
        expected.put(Properties.ENHANCER_EXTRACTED_FROM, contentItem.getUri());
        EnhancementStructureHelper.validateTextAnnotation(g, (IRI) ta, SINGLE_SENTENCE, expected, true);
    }
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Test(org.junit.Test)

Example 73 with BlankNodeOrIRI

use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.

the class TikaEngineTest method verifyValues.

private static Set<BlankNodeOrIRI> verifyValues(ContentItem ci, BlankNodeOrIRI subject, IRI property, BlankNodeOrIRI... references) {
    Iterator<Triple> it = ci.getMetadata().filter(subject, property, null);
    assertTrue(it.hasNext());
    Set<BlankNodeOrIRI> expected = new HashSet<BlankNodeOrIRI>(Arrays.asList(references));
    Set<BlankNodeOrIRI> found = new HashSet<BlankNodeOrIRI>(expected.size());
    while (it.hasNext()) {
        RDFTerm r = it.next().getObject();
        assertTrue(r instanceof BlankNodeOrIRI);
        assertTrue(expected.remove(r));
        found.add((BlankNodeOrIRI) r);
    }
    return found;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) HashSet(java.util.HashSet)

Example 74 with BlankNodeOrIRI

use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.

the class TikaEngineTest method testMp3.

@Test
public void testMp3() throws EngineException, IOException, ParseException {
    log.info(">>> testMp3 <<<");
    ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
    assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
    assertNotNull(contentPart);
    Blob plainTextBlob = contentPart.getValue();
    assertNotNull(plainTextBlob);
    assertContentRegexp(plainTextBlob, "Test Title", "Test Artist", "Test Album");
    //validate XHTML results
    contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
    assertNotNull(contentPart);
    Blob xhtmlBlob = contentPart.getValue();
    assertNotNull(xhtmlBlob);
    //Test AudioTrack metadata
    BlankNodeOrIRI audioTrack = verifyBlankNodeOrIRI(ci, new IRI(NamespaceEnum.media + "hasTrack"));
    //types
    verifyValues(ci, audioTrack, RDF.type, new IRI(NamespaceEnum.media + "MediaFragment"), new IRI(NamespaceEnum.media + "Track"), new IRI(NamespaceEnum.media + "AudioTrack"));
    //properties
    verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "hasFormat"), XSD.string, "Mono");
    verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "samplingRate"), XSD.int_, "44100");
    verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "hasCompression"), XSD.string, "MP3");
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 75 with BlankNodeOrIRI

use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.

the class TikaEngineTest method verifyValue.

private static IRI verifyValue(ContentItem ci, BlankNodeOrIRI subject, IRI property, IRI value) {
    Iterator<Triple> it = ci.getMetadata().filter(subject, property, null);
    assertTrue(it.hasNext());
    RDFTerm r = it.next().getObject();
    assertFalse(it.hasNext());
    assertTrue(r instanceof IRI);
    assertEquals(value, r);
    return (IRI) r;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm)

Aggregations

BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)89 Triple (org.apache.clerezza.commons.rdf.Triple)52 IRI (org.apache.clerezza.commons.rdf.IRI)41 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)30 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)27 HashSet (java.util.HashSet)24 Graph (org.apache.clerezza.commons.rdf.Graph)22 HashMap (java.util.HashMap)17 ArrayList (java.util.ArrayList)14 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)14 Literal (org.apache.clerezza.commons.rdf.Literal)13 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)12 Lock (java.util.concurrent.locks.Lock)10 BlankNode (org.apache.clerezza.commons.rdf.BlankNode)10 EnhancementEngineHelper.getString (org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getString)8 Test (org.junit.Test)8 Collection (java.util.Collection)7 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)7 Language (org.apache.clerezza.commons.rdf.Language)6 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)6