Search in sources :

Example 11 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class ContentItemBackendTest method testSuggestedEntityWithoutParsedContext.

@Test
public void testSuggestedEntityWithoutParsedContext() throws LDPathParseException {
    //The suggestedEntity function can be used for twi usecases
    //(1) get the {limit} top rated linked Entities per parsed context
    //    In this example we parse all TextAnnotations
    //NOTE: '.' MUST BE used as first argument in this case
    String path = "fn:textAnnotation()/fn:suggestedEntity(\"1\")";
    Collection<RDFTerm> result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 2);
    Set<IRI> expectedValues = new HashSet<IRI>(Arrays.asList(new IRI("http://dbpedia.org/resource/Paris"), new IRI("http://dbpedia.org/resource/Bob_Marley")));
    for (RDFTerm r : result) {
        assertTrue(r instanceof IRI);
        log.info("Entity: {}", r);
        assertTrue(expectedValues.remove(r));
    }
    assertTrue(expectedValues.isEmpty());
    //(2) get the {limit} top rated Entities for all Annotations parsed
    //    as the first argument
    //NOTE: the selector parsing all Annotations MUST BE used as first
    //      argument
    path = "fn:suggestedEntity(fn:textAnnotation(),\"1\")";
    result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 1);
    assertEquals(new IRI("http://dbpedia.org/resource/Paris"), result.iterator().next());
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 12 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class UsageExamples method exampleExtractedPlaces.

/**
     * This provides some example on how to select persons extracted from
     * a contentItem
     * @throws LDPathParseException
     */
@Test
public void exampleExtractedPlaces() throws LDPathParseException {
    StringBuilder program = new StringBuilder();
    program.append("locationMentions = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Place]/fise:selected-text :: xsd:string;");
    //this uses the labels of suggested places with the highest confidence
    //but also the selected-text as fallback if no entity is suggested.
    program.append("locationNames = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Place]/fn:first(fn:suggestion(\"1\")/fise:entity-label,fise:selected-text) :: xsd:string;");
    program.append("linkedPlaces = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Place]/fn:suggestedEntity(\"1\") :: xsd:anyURI;");
    //this selects only linked Artists
    program.append("linkedCountries = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Place]/fn:suggestion()" + "[fise:entity-type is dbpedia-ont:Country]/fise:entity-reference :: xsd:anyURI;");
    Program<RDFTerm> personProgram = ldpath.parseProgram(new StringReader(program.toString()));
    log.info("- - - - - - - - - - - - -");
    log.info("Places Indexing Examples");
    Map<String, Collection<?>> result = execute(personProgram);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    logResults(result);
}
Also used : StringReader(java.io.StringReader) Collection(java.util.Collection) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Test(org.junit.Test)

Example 13 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class TikaEngineTest method verifyBlankNodeOrIRI.

private static BlankNodeOrIRI verifyBlankNodeOrIRI(ContentItem ci, IRI subject, IRI property) {
    Iterator<Triple> it = ci.getMetadata().filter(subject, property, null);
    assertTrue(it.hasNext());
    RDFTerm r = it.next().getObject();
    assertFalse(it.hasNext());
    assertTrue(r instanceof BlankNodeOrIRI);
    return (BlankNodeOrIRI) r;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm)

Example 14 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class TestNamedEntityExtractionEnhancementEngine method testCustomModel.

@Test
public void testCustomModel() throws EngineException, IOException {
    ContentItem ci = wrapAsContentItem("urn:test:content-item:single:sentence", EHEALTH, "en");
    //this test does not use default models
    nerEngine.config.getDefaultModelTypes().clear();
    //but instead a custom model provided by the test data
    nerEngine.config.addCustomNameFinderModel("en", "bionlp2004-DNA-en.bin");
    nerEngine.config.setMappedType("DNA", new IRI("http://www.bootstrep.eu/ontology/GRO#DNA"));
    nerEngine.computeEnhancements(ci);
    Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(nerEngine.getClass().getName()));
    //adding null as expected for confidence makes it a required property
    expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
    //and dc:type values MUST be the URI set as mapped type
    expectedValues.put(Properties.DC_TYPE, new IRI("http://www.bootstrep.eu/ontology/GRO#DNA"));
    Graph g = ci.getMetadata();
    int textAnnotationCount = validateAllTextAnnotations(g, EHEALTH, expectedValues);
    assertEquals(7, textAnnotationCount);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 15 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class TestMetaxaCore method printTriples.

/**
     * This prints out the Stanbol Enhancer triples that would be created for the metadata
     * contained in the given model.
     *
     * @param m a {@link Model}
     *
     * @return an {@code int} with the number of added triples
     */
private int printTriples(Model m) {
    int tripleCounter = 0;
    HashMap<BlankNode, BlankNode> blankNodeMap = new HashMap<BlankNode, BlankNode>();
    ClosableIterator<Statement> it = m.iterator();
    while (it.hasNext()) {
        Statement oneStmt = it.next();
        BlankNodeOrIRI subject = (BlankNodeOrIRI) MetaxaEngine.asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
        IRI predicate = (IRI) MetaxaEngine.asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
        RDFTerm object = MetaxaEngine.asClerezzaResource(oneStmt.getObject(), blankNodeMap);
        if (null != subject && null != predicate && null != object) {
            Triple t = new TripleImpl(subject, predicate, object);
            LOG.debug("adding " + t);
            tripleCounter++;
        } else {
            LOG.debug("skipped " + oneStmt.toString());
        }
    }
    it.close();
    return tripleCounter;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashMap(java.util.HashMap) Statement(org.ontoware.rdf2go.model.Statement) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) BlankNode(org.ontoware.rdf2go.model.node.BlankNode) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Aggregations

RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)126 IRI (org.apache.clerezza.commons.rdf.IRI)84 Triple (org.apache.clerezza.commons.rdf.Triple)70 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)48 Literal (org.apache.clerezza.commons.rdf.Literal)35 Test (org.junit.Test)35 HashSet (java.util.HashSet)30 HashMap (java.util.HashMap)28 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)26 Graph (org.apache.clerezza.commons.rdf.Graph)24 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)18 ArrayList (java.util.ArrayList)17 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)16 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)13 OWLOntologyID (org.semanticweb.owlapi.model.OWLOntologyID)13 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)12 Collection (java.util.Collection)10 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)10 Lock (java.util.concurrent.locks.Lock)9 IOException (java.io.IOException)5