Search in sources :

Example 76 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class SuggestionFunction method apply.

@Override
public Collection<RDFTerm> apply(final RDFBackend<RDFTerm> backend, RDFTerm context, Collection<RDFTerm>... args) throws IllegalArgumentException {
    int paramIndex = 0;
    Collection<RDFTerm> contexts = null;
    if (args != null && args.length > 0 && args[0] != null && !args[0].isEmpty()) {
        contexts = new ArrayList<RDFTerm>();
        for (RDFTerm r : args[0]) {
            if (backend.isURI(r)) {
                contexts.add(r);
                paramIndex = 1;
            }
        }
    }
    if (paramIndex == 0) {
        //no contexts parsed os first param ... use the current context
        contexts = Collections.singleton(context);
    }
    Integer limit = parseParamLimit(backend, args, paramIndex);
    //        final String processingMode = parseParamProcessingMode(backend, args,2);
    final int missingConfidenceMode = parseParamMissingConfidenceMode(backend, args, paramIndex + 1);
    List<RDFTerm> result = new ArrayList<RDFTerm>();
    //        if(processingMode.equals(ANNOTATION_PROCESSING_MODE_UNION)){
    processAnnotations(backend, contexts, limit, missingConfidenceMode, result);
    //        }
    return result;
}
Also used : ArrayList(java.util.ArrayList) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm)

Example 77 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class UsageExamples method readTestData.

@BeforeClass
public static void readTestData() throws IOException {
    //add the metadata
    ParsingProvider parser = new JenaParserProvider();
    //create the content Item with the HTML content
    Graph rdfData = parseRdfData(parser, "example.rdf.zip");
    IRI contentItemId = null;
    Iterator<Triple> it = rdfData.filter(null, Properties.ENHANCER_EXTRACTED_FROM, null);
    while (it.hasNext()) {
        RDFTerm r = it.next().getObject();
        if (contentItemId == null) {
            if (r instanceof IRI) {
                contentItemId = (IRI) r;
            }
        } else {
            assertEquals("multiple ContentItems IDs contained in the RDF test data", contentItemId, r);
        }
    }
    assertNotNull("RDF data doe not contain an Enhancement extracted form " + "the content item", contentItemId);
    InputStream in = getTestResource("example.txt");
    assertNotNull("Example Plain text content not found", in);
    byte[] textData = IOUtils.toByteArray(in);
    IOUtils.closeQuietly(in);
    ci = ciFactory.createContentItem(contentItemId, new ByteArraySource(textData, "text/html; charset=UTF-8"));
    ci.getMetadata().addAll(rdfData);
}
Also used : JenaParserProvider(org.apache.clerezza.rdf.jena.parser.JenaParserProvider) Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) ParsingProvider(org.apache.clerezza.rdf.core.serializedform.ParsingProvider) Graph(org.apache.clerezza.commons.rdf.Graph) InputStream(java.io.InputStream) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ByteArraySource(org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource) BeforeClass(org.junit.BeforeClass)

Example 78 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class ContentItemBackendTest method testEntitySuggestionsWithoutParsedContext.

@Test
public void testEntitySuggestionsWithoutParsedContext() throws LDPathParseException {
    //NOTE: Sort while supported by fn:suggestion is currently not
    //      supported by LDPath. Therefore the sort of fn:suggestion can
    //      currently only ensure the the top most {limit} entities are
    //      selected if the "limit" parameter is set.
    // Because this test checks first that all three suggestions for Paris
    // are returned and later that a limit of 2 only returns the two top
    // most.
    String path = "fn:textAnnotation()[dc:type is dbpedia-ont:Place]/fn:suggestion()";
    Collection<RDFTerm> result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 3);
    Double lowestConfidence = null;
    //stores the lowest confidence suggestion for the 2nd part of this test
    IRI lowestConfidenceSuggestion = null;
    path = "fise:confidence :: xsd:double";
    for (RDFTerm r : result) {
        assertTrue(r instanceof IRI);
        log.info("confidence: {}", r);
        Double current = (Double) ldpath.pathTransform(r, path, null).iterator().next();
        assertNotNull(current);
        if (lowestConfidence == null || lowestConfidence > current) {
            lowestConfidence = current;
            lowestConfidenceSuggestion = (IRI) r;
        }
    }
    assertNotNull(lowestConfidenceSuggestion);
    path = "fn:textAnnotation()[dc:type is dbpedia-ont:Place]/fn:suggestion(\"2\")";
    Collection<RDFTerm> result2 = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result2);
    assertFalse(result2.isEmpty());
    assertTrue(result2.size() == 2);
    //first check that all results of the 2nd query are also part of the first
    assertTrue(result.containsAll(result2));
    //secondly check that the lowest confidence suggestion is now missing
    assertFalse(result2.contains(lowestConfidenceSuggestion));
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Test(org.junit.Test)

Example 79 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class ContentItemBackendTest method testTextAnnotationFunction.

@Test
public void testTextAnnotationFunction() throws LDPathParseException {
    String path = "fn:textAnnotation(.)/fise:selected-text";
    Collection<RDFTerm> result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 2);
    Set<String> expectedValues = new HashSet<String>(Arrays.asList("Bob Marley", "Paris"));
    for (RDFTerm r : result) {
        assertTrue(r instanceof Literal);
        assertTrue(expectedValues.remove(((Literal) r).getLexicalForm()));
    }
    assertTrue(expectedValues.isEmpty());
    //test with a filter for the type
    //same as the 1st example bat rather using an ld-path construct for
    //filtering for TextAnnotations representing persons
    path = "fn:textAnnotation(.)[dc:type is dbpedia-ont:Person]/fise:selected-text";
    result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 1);
    RDFTerm r = result.iterator().next();
    assertTrue(r instanceof Literal);
    assertEquals(((Literal) r).getLexicalForm(), "Bob Marley");
}
Also used : Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 80 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class ContentItemBackendTest method testEnhancementsWithoutParsedContext.

@Test
public void testEnhancementsWithoutParsedContext() throws LDPathParseException {
    String path = "fn:enhancement()";
    Collection<RDFTerm> result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 7);
    for (RDFTerm r : result) {
        assertTrue(r instanceof IRI);
        log.info("Entity: {}", r);
    }
    //and with a filter
    path = "fn:enhancement()[rdf:type is fise:TextAnnotation]";
    result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 3);
    //        assertTrue(result.contains(new IRI("http://dbpedia.org/resource/Bob_Marley")));
    path = "fn:enhancement()/dc:language";
    result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 1);
    RDFTerm r = result.iterator().next();
    assertTrue(r instanceof Literal);
    assertEquals("en", ((Literal) r).getLexicalForm());
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Test(org.junit.Test)

Aggregations

RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)126 IRI (org.apache.clerezza.commons.rdf.IRI)84 Triple (org.apache.clerezza.commons.rdf.Triple)70 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)48 Literal (org.apache.clerezza.commons.rdf.Literal)35 Test (org.junit.Test)35 HashSet (java.util.HashSet)30 HashMap (java.util.HashMap)28 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)26 Graph (org.apache.clerezza.commons.rdf.Graph)24 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)18 ArrayList (java.util.ArrayList)17 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)16 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)13 OWLOntologyID (org.semanticweb.owlapi.model.OWLOntologyID)13 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)12 Collection (java.util.Collection)10 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)10 Lock (java.util.concurrent.locks.Lock)9 IOException (java.io.IOException)5