Search in sources :

Example 41 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class EnhancementStructureHelper method validateEntityAnnotation.

/**
     * Checks if a fise:EntityAnnotation is valid. NOTE that this also validates
     * all fise:Enhancement related requirements by calling
     * {@link #validateEnhancement(Graph, IRI, Map)}
     * @param enhancements the enhancements graph
     * @param entityAnnotation the entity annotation to validate
     * @param expectedValues expected values (properties for the values are used as keys)
     */
public static void validateEntityAnnotation(Graph enhancements, IRI entityAnnotation, Map<IRI, RDFTerm> expectedValues) {
    Iterator<Triple> relationToTextAnnotationIterator = enhancements.filter(entityAnnotation, DC_RELATION, null);
    // check if the relation to the text annotation is set
    assertTrue(relationToTextAnnotationIterator.hasNext());
    while (relationToTextAnnotationIterator.hasNext()) {
        // test if the referred annotations are text annotations or
        // the referenced annotations is a fise:EntityAnnotation AND also a
        // dc:requires link is defined (STANBOL-766)
        IRI referredTextAnnotation = (IRI) relationToTextAnnotationIterator.next().getObject();
        assertTrue("fise:EntityAnnotations MUST BE dc:related to a fise:TextAnnotation OR dc:requires and dc:related to the same fise:EntityAnnotation", enhancements.filter(referredTextAnnotation, RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext() || (enhancements.filter(referredTextAnnotation, RDF_TYPE, ENHANCER_ENTITYANNOTATION).hasNext() && enhancements.filter(entityAnnotation, Properties.DC_REQUIRES, referredTextAnnotation).hasNext()));
    }
    // test if an entity is referred
    Iterator<Triple> entityReferenceIterator = enhancements.filter(entityAnnotation, ENHANCER_ENTITY_REFERENCE, null);
    assertTrue("fise:entity-reference MUST BE present! (EntityAnnotation: '" + entityAnnotation + "')'", entityReferenceIterator.hasNext());
    RDFTerm expectedReferencedEntity = expectedValues.get(ENHANCER_ENTITY_REFERENCE);
    while (entityReferenceIterator.hasNext()) {
        //check possible multiple references
        RDFTerm entityReferenceResource = entityReferenceIterator.next().getObject();
        // test if the reference is an URI
        assertTrue("fise:entity-reference value MUST BE of URIs", entityReferenceResource instanceof IRI);
        if (expectedReferencedEntity != null && expectedReferencedEntity.equals(entityReferenceResource)) {
            //found
            expectedReferencedEntity = null;
        }
    }
    assertNull("EntityAnnotation " + entityAnnotation + "fise:entity-reference has not the expected value " + expectedReferencedEntity + "!", expectedReferencedEntity);
    //test if the entity label is set
    Iterator<Triple> entityLabelIterator = enhancements.filter(entityAnnotation, ENHANCER_ENTITY_LABEL, null);
    assertTrue(entityLabelIterator.hasNext());
    RDFTerm expectedEntityLabel = expectedValues.get(ENHANCER_ENTITY_LABEL);
    while (entityLabelIterator.hasNext()) {
        RDFTerm entityLabelResource = entityLabelIterator.next().getObject();
        assertTrue("fise:entity-label values MUST BE PlainLiterals (EntityAnnotation: " + entityAnnotation + ")!", entityLabelResource instanceof Literal);
        if (expectedEntityLabel != null && expectedEntityLabel.equals(entityLabelResource)) {
            expectedEntityLabel = null;
        }
    }
    assertNull("The expected EntityLabel " + expectedEntityLabel + " was not found", expectedEntityLabel);
    //test the optional entity types
    Iterator<Triple> entityTypeIterator = enhancements.filter(entityAnnotation, Properties.ENHANCER_ENTITY_TYPE, null);
    RDFTerm expectedEntityType = expectedValues.get(Properties.ENHANCER_ENTITY_TYPE);
    if (entityTypeIterator.hasNext()) {
        RDFTerm entityTypeResource = entityTypeIterator.next().getObject();
        assertTrue("fise:entity-type values MUST BE URIs", entityTypeResource instanceof IRI);
        if (expectedEntityType != null && expectedEntityType.equals(entityTypeResource)) {
            //found
            expectedEntityType = null;
        }
    }
    assertNull("The expected fise:entity-type value " + expectedEntityType + " was not found!", expectedEntityType);
    //test all properties required by fise:Enhancement
    validateEnhancement(enhancements, entityAnnotation, expectedValues);
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm)

Example 42 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class EnhancementStructureHelper method validateLanguageAnnotations.

/**
     * Validates the correctness of fise:TextAnnotations that annotate the language 
     * of the text as defined by 
     * <a href="https://issues.apache.org/jira/browse/STANBOL-613">STANBOL-613</a><p>
     * Called by {@link #validateTextAnnotation(Graph, IRI, String, Map)}
     * @param enhancements
     * @param textAnnotation
     */
private static void validateLanguageAnnotations(Graph enhancements, IRI textAnnotation) {
    Iterator<Triple> dcLanguageIterator = enhancements.filter(textAnnotation, DC_LANGUAGE, null);
    if (dcLanguageIterator.hasNext()) {
        //a language annotation
        RDFTerm dcLanguageResource = dcLanguageIterator.next().getObject();
        assertTrue("The dc:language value MUST BE a PlainLiteral", dcLanguageResource instanceof Literal);
        assertTrue("The dc:language value '" + dcLanguageResource + "'MUST BE at least two chars long", ((Literal) dcLanguageResource).getLexicalForm().length() >= 2);
        assertFalse("TextAnnotations with the dc:language property MUST only have a single dc:language value (uri " + textAnnotation + ")", dcLanguageIterator.hasNext());
        Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
        assertTrue("TextAnnotations with the dc:language property MUST use dc:type dc:LinguisticSystem (uri " + textAnnotation + ")", dcTypeIterator.hasNext());
        assertEquals("TextAnnotations with the dc:language property MUST use dc:type dc:LinguisticSystem (uri " + textAnnotation + ")", DCTERMS_LINGUISTIC_SYSTEM, dcTypeIterator.next().getObject());
        assertFalse("TextAnnotations with the dc:language property MUST only have a single dc:type value (uri " + textAnnotation + ")", dcTypeIterator.hasNext());
        //assert that the created TextAnnotation is correctly returned by the
        //EnhancementEngineHelper methods
        List<BlankNodeOrIRI> languageAnnotation = EnhancementEngineHelper.getLanguageAnnotations(enhancements);
        assertTrue("Language annotation " + textAnnotation + " was not returned by " + "EnhancementEngineHelper.getLanguageAnnotations(..)!", languageAnnotation.contains(textAnnotation));
    } else {
        //no language annotation
        Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
        while (dcTypeIterator.hasNext()) {
            assertFalse("Only fise:TextAnnotations without a dc:language value MUST NOT use the " + "dc:type value dc:LinguisticSystem (uri " + textAnnotation + ")", DCTERMS_LINGUISTIC_SYSTEM.equals(dcTypeIterator.next().getObject()));
        }
    }
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) Literal(org.apache.clerezza.commons.rdf.Literal) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm)

Example 43 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class ContentItemBackendTest method testContentWithAdditionalMetadata.

@Test
public void testContentWithAdditionalMetadata() throws IOException, LDPathParseException {
    byte[] content = "text content".getBytes();
    IRI uri = ContentItemHelper.makeDefaultUrn(content);
    ContentItem contentItem = ciFactory.createContentItem(uri, new ByteArraySource(content, "text/plain; charset=UTF-8"));
    Graph tc = new SimpleGraph();
    Literal literal = LiteralFactory.getInstance().createTypedLiteral("Michael Jackson");
    IRI subject = new IRI("dummyUri");
    tc.add(new TripleImpl(subject, new IRI("http://xmlns.com/foaf/0.1/givenName"), literal));
    contentItem.addPart(new IRI(uri.getUnicodeString() + "_additionalMetadata"), tc);
    ContentItemBackend ciBackend = new ContentItemBackend(contentItem, true);
    LDPath<RDFTerm> ldPath = new LDPath<RDFTerm>(ciBackend, EnhancerLDPath.getConfig());
    Collection<RDFTerm> result = ldPath.pathQuery(subject, "foaf:givenName", null);
    assertTrue("Additional metadata cannot be found", result.contains(literal));
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) LDPath(org.apache.marmotta.ldpath.LDPath) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) Literal(org.apache.clerezza.commons.rdf.Literal) ContentItemBackend(org.apache.stanbol.enhancer.ldpath.backend.ContentItemBackend) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) ByteArraySource(org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource) Test(org.junit.Test)

Example 44 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class ContentItemBackendTest method testTextAnnotationFunction.

@Test
public void testTextAnnotationFunction() throws LDPathParseException {
    String path = "fn:textAnnotation(.)/fise:selected-text";
    Collection<RDFTerm> result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 2);
    Set<String> expectedValues = new HashSet<String>(Arrays.asList("Bob Marley", "Paris"));
    for (RDFTerm r : result) {
        assertTrue(r instanceof Literal);
        assertTrue(expectedValues.remove(((Literal) r).getLexicalForm()));
    }
    assertTrue(expectedValues.isEmpty());
    //test with a filter for the type
    //same as the 1st example bat rather using an ld-path construct for
    //filtering for TextAnnotations representing persons
    path = "fn:textAnnotation(.)[dc:type is dbpedia-ont:Person]/fise:selected-text";
    result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 1);
    RDFTerm r = result.iterator().next();
    assertTrue(r instanceof Literal);
    assertEquals(((Literal) r).getLexicalForm(), "Bob Marley");
}
Also used : Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 45 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class ContentItemBackendTest method testEnhancementsWithoutParsedContext.

@Test
public void testEnhancementsWithoutParsedContext() throws LDPathParseException {
    String path = "fn:enhancement()";
    Collection<RDFTerm> result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 7);
    for (RDFTerm r : result) {
        assertTrue(r instanceof IRI);
        log.info("Entity: {}", r);
    }
    //and with a filter
    path = "fn:enhancement()[rdf:type is fise:TextAnnotation]";
    result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 3);
    //        assertTrue(result.contains(new IRI("http://dbpedia.org/resource/Bob_Marley")));
    path = "fn:enhancement()/dc:language";
    result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 1);
    RDFTerm r = result.iterator().next();
    assertTrue(r instanceof Literal);
    assertEquals("en", ((Literal) r).getLexicalForm());
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Test(org.junit.Test)

Aggregations

Literal (org.apache.clerezza.commons.rdf.Literal)71 IRI (org.apache.clerezza.commons.rdf.IRI)35 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)35 Triple (org.apache.clerezza.commons.rdf.Triple)30 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)22 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)20 ArrayList (java.util.ArrayList)16 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)16 Language (org.apache.clerezza.commons.rdf.Language)12 Graph (org.apache.clerezza.commons.rdf.Graph)11 Test (org.junit.Test)10 HashSet (java.util.HashSet)9 Date (java.util.Date)8 Lock (java.util.concurrent.locks.Lock)6 Entity (org.apache.stanbol.enhancer.engines.entitylinking.Entity)5 HashMap (java.util.HashMap)4 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)4 NoConvertorException (org.apache.clerezza.rdf.core.NoConvertorException)4 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)4 Collection (java.util.Collection)3