Search in sources :

Example 81 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class ContentItemBackendTest method readTestData.

@BeforeClass
public static void readTestData() throws IOException {
    //add the metadata
    ParsingProvider parser = new JenaParserProvider();
    //create the content Item with the HTML content
    Graph rdfData = parseRdfData(parser, "metadata.rdf.zip");
    IRI contentItemId = null;
    Iterator<Triple> it = rdfData.filter(null, Properties.ENHANCER_EXTRACTED_FROM, null);
    while (it.hasNext()) {
        RDFTerm r = it.next().getObject();
        if (contentItemId == null) {
            if (r instanceof IRI) {
                contentItemId = (IRI) r;
            }
        } else {
            assertEquals("multiple ContentItems IDs contained in the RDF test data", contentItemId, r);
        }
    }
    assertNotNull("RDF data doe not contain an Enhancement extracted form " + "the content item", contentItemId);
    InputStream in = getTestResource("content.html");
    assertNotNull("HTML content not found", in);
    byte[] htmlData = IOUtils.toByteArray(in);
    IOUtils.closeQuietly(in);
    ci = ciFactory.createContentItem(contentItemId, new ByteArraySource(htmlData, "text/html; charset=UTF-8"));
    htmlContent = new String(htmlData, UTF8);
    //create a Blob with the text content
    in = getTestResource("content.txt");
    byte[] textData = IOUtils.toByteArray(in);
    IOUtils.closeQuietly(in);
    assertNotNull("Plain text content not found", in);
    ci.addPart(new IRI(ci.getUri().getUnicodeString() + "_text"), ciFactory.createBlob(new ByteArraySource(textData, "text/plain; charset=UTF-8")));
    textContent = new String(textData, UTF8);
    //add the metadata
    ci.getMetadata().addAll(rdfData);
}
Also used : JenaParserProvider(org.apache.clerezza.rdf.jena.parser.JenaParserProvider) Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) ParsingProvider(org.apache.clerezza.rdf.core.serializedform.ParsingProvider) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) BufferedInputStream(java.io.BufferedInputStream) ZipInputStream(java.util.zip.ZipInputStream) FilterInputStream(java.io.FilterInputStream) InputStream(java.io.InputStream) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ByteArraySource(org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource) BeforeClass(org.junit.BeforeClass)

Example 82 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class ContentItemBackendTest method testSuggestedEntity.

@Test
public void testSuggestedEntity() throws LDPathParseException {
    //The suggestedEntity function can be used for twi usecases
    //(1) get the {limit} top rated linked Entities per parsed context
    //    In this example we parse all TextAnnotations
    //NOTE: '.' MUST BE used as first argument in this case
    String path = "fn:textAnnotation(.)/fn:suggestedEntity(.,\"1\")";
    Collection<RDFTerm> result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 2);
    Set<IRI> expectedValues = new HashSet<IRI>(Arrays.asList(new IRI("http://dbpedia.org/resource/Paris"), new IRI("http://dbpedia.org/resource/Bob_Marley")));
    for (RDFTerm r : result) {
        assertTrue(r instanceof IRI);
        log.info("Entity: {}", r);
        assertTrue(expectedValues.remove(r));
    }
    assertTrue(expectedValues.isEmpty());
    //(2) get the {limit} top rated Entities for all Annotations parsed
    //    as the first argument
    //NOTE: the selector parsing all Annotations MUST BE used as first
    //      argument
    path = "fn:suggestedEntity(fn:textAnnotation(.),\"1\")";
    result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 1);
    assertEquals(new IRI("http://dbpedia.org/resource/Paris"), result.iterator().next());
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 83 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class UsageExamples method exampleExtractedPersons.

/**
     * This provides some example on how to select persons extracted from
     * a contentItem
     * @throws LDPathParseException
     */
@Test
public void exampleExtractedPersons() throws LDPathParseException {
    StringBuilder program = new StringBuilder();
    program.append("personMentions = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Person]/fise:selected-text :: xsd:string;");
    //this uses the labels of suggested person with the highest confidence
    //but also the selected-text as fallback if no entity is suggested.
    program.append("personNames = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Person]/fn:first(fn:suggestion(\"1\")/fise:entity-label,fise:selected-text) :: xsd:string;");
    program.append("linkedPersons = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Person]/fn:suggestedEntity(\"1\") :: xsd:anyURI;");
    //this selects only linked Artists
    program.append("linkedArtists = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Person]/fn:suggestion()" + "[fise:entity-type is dbpedia-ont:Artist]/fise:entity-reference :: xsd:anyURI;");
    Program<RDFTerm> personProgram = ldpath.parseProgram(new StringReader(program.toString()));
    log.info("- - - - - - - - - - - - - ");
    log.info("Person Indexing Examples");
    Map<String, Collection<?>> result = execute(personProgram);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    logResults(result);
}
Also used : StringReader(java.io.StringReader) Collection(java.util.Collection) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Test(org.junit.Test)

Example 84 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class UsageExamples method exampleExtractedOrganization.

/**
     * This provides some example on how to select persons extracted from
     * a contentItem
     * @throws LDPathParseException
     */
@Test
public void exampleExtractedOrganization() throws LDPathParseException {
    StringBuilder program = new StringBuilder();
    program.append("orgMentions = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Organisation]/fise:selected-text :: xsd:string;");
    //this uses the labels of suggested organisations with the highest confidence
    //but also the selected-text as fallback if no entity is suggested.
    program.append("orgNames = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Organisation]/fn:first(fn:suggestion(\"1\")/fise:entity-label,fise:selected-text) :: xsd:string;");
    program.append("linkedOrgs = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Organisation]/fn:suggestedEntity(\"1\") :: xsd:anyURI;");
    //this selects only linked education organisations
    //NOTE: this does not use a limit on suggestion(.)!
    program.append("linkedEducationOrg = fn:textAnnotation()" + "[dc:type is dbpedia-ont:Organisation]/fn:suggestion()" + "[fise:entity-type is dbpedia-ont:EducationalInstitution]/fise:entity-reference :: xsd:anyURI;");
    Program<RDFTerm> personProgram = ldpath.parseProgram(new StringReader(program.toString()));
    log.info("- - - - - - - - - - - - -");
    log.info("Places Indexing Examples");
    Map<String, Collection<?>> result = execute(personProgram);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    logResults(result);
}
Also used : StringReader(java.io.StringReader) Collection(java.util.Collection) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Test(org.junit.Test)

Example 85 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class UsageExamples method exampleExtractedConcepts.

/**
     * This provides some example on how to select persons extracted from
     * a contentItem
     * @throws LDPathParseException
     */
@Test
public void exampleExtractedConcepts() throws LDPathParseException {
    StringBuilder program = new StringBuilder();
    program.append("conceptNames = fn:entityAnnotation()" + "[fise:entity-type is skos:Concept]/fise:entity-label :: xsd:anyURI;");
    //this uses the labels of suggested person with the highest confidence
    //but also the selected-text as fallback if no entity is suggested.
    program.append("linkedConcepts = fn:entityAnnotation()" + "[fise:entity-type is skos:Concept]/fise:entity-reference :: xsd:anyURI;");
    Program<RDFTerm> personProgram = ldpath.parseProgram(new StringReader(program.toString()));
    log.info("- - - - - - - - - - - - -");
    log.info("Concept Indexing Examples");
    Map<String, Collection<?>> result = execute(personProgram);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    logResults(result);
}
Also used : StringReader(java.io.StringReader) Collection(java.util.Collection) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Test(org.junit.Test)

Aggregations

RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)126 IRI (org.apache.clerezza.commons.rdf.IRI)84 Triple (org.apache.clerezza.commons.rdf.Triple)70 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)48 Literal (org.apache.clerezza.commons.rdf.Literal)35 Test (org.junit.Test)35 HashSet (java.util.HashSet)30 HashMap (java.util.HashMap)28 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)26 Graph (org.apache.clerezza.commons.rdf.Graph)24 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)18 ArrayList (java.util.ArrayList)17 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)16 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)13 OWLOntologyID (org.semanticweb.owlapi.model.OWLOntologyID)13 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)12 Collection (java.util.Collection)10 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)10 Lock (java.util.concurrent.locks.Lock)9 IOException (java.io.IOException)5