Search in sources :

Example 16 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class CeliLemmatizerEnhancementEngineTest method testCompleteMorphoAnalysis.

@Test
public void testCompleteMorphoAnalysis() throws Exception {
    ContentItem ci = wrapAsContentItem(TERM);
    //add a simple triple to statically define the language of the test
    //content
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
    CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(true);
    try {
        morphoAnalysisEngine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
    TestUtils.logEnhancements(ci);
    //validate enhancements
    HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
    Iterator<Triple> textAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    // test if a textAnnotation is present
    //assertTrue(textAnnotationIterator.hasNext()); 
    //  -> this might be used to test that there are no TextAnnotations
    int textAnnotationCount = 0;
    while (textAnnotationIterator.hasNext()) {
        IRI textAnnotation = (IRI) textAnnotationIterator.next().getSubject();
        // test if selected Text is added
        validateTextAnnotation(ci.getMetadata(), textAnnotation, TERM, expectedValues);
        textAnnotationCount++;
        //perform additional tests for "hasMorphologicalFeature" and "hasLemmaForm"
        validateMorphoFeatureProperty(ci.getMetadata(), textAnnotation);
    }
    log.info("{} TextAnnotations found and validated ...", textAnnotationCount);
    int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
    //no EntityAnnotations expected
    Assert.assertEquals("No EntityAnnotations expected by this test", 0, entityAnnoNum);
    shutdownEngine(morphoAnalysisEngine);
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 17 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class CeliLemmatizerEnhancementEngineTest method testEngine.

@Test
public void testEngine() throws Exception {
    ContentItem ci = wrapAsContentItem(TEXT);
    //add a simple triple to statically define the language of the test
    //content
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
    //unit test should not depend on each other (if possible)
    //CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
    CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(false);
    try {
        morphoAnalysisEngine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
    TestUtils.logEnhancements(ci);
    //validate enhancement
    HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
    Iterator<Triple> lemmaTextAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    assertTrue("A TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
    BlankNodeOrIRI lemmaTextAnnotation = lemmaTextAnnotationIterator.next().getSubject();
    assertTrue("TextAnnoations MUST BE IRIs!", lemmaTextAnnotation instanceof IRI);
    assertFalse("Only a single TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
    //validate the enhancement metadata
    validateEnhancement(ci.getMetadata(), (IRI) lemmaTextAnnotation, expectedValues);
    //validate the lemma form TextAnnotation
    int lemmaForms = validateLemmaFormProperty(ci.getMetadata(), lemmaTextAnnotation, "it");
    assertTrue("Only a single LemmaForm property is expected if '" + MORPHOLOGICAL_ANALYSIS + "=false'", lemmaForms == 1);
    shutdownEngine(morphoAnalysisEngine);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Triple(org.apache.clerezza.commons.rdf.Triple) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 18 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class DereferenceEngineTest method validateDereferencedEntities.

private void validateDereferencedEntities(Graph metadata, IRI... entityReferenceFields) {
    Graph expected = new IndexedGraph();
    for (IRI entityReferenceField : entityReferenceFields) {
        Iterator<Triple> referenced = metadata.filter(null, entityReferenceField, null);
        while (referenced.hasNext()) {
            IRI entity = (IRI) referenced.next().getObject();
            Iterator<Triple> entityTriples = testData.filter(entity, null, null);
            while (entityTriples.hasNext()) {
                expected.add(entityTriples.next());
            }
        }
    }
    Graph notExpected = new IndexedGraph(testData);
    notExpected.removeAll(expected);
    Assert.assertTrue(metadata.containsAll(expected));
    Assert.assertTrue(Collections.disjoint(metadata, notExpected));
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph)

Example 19 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class DBPSpotlightDisambiguateEnhancementEngine method getSpottedXml.

private String getSpottedXml(String text, Graph graph) {
    StringBuilder xml = new StringBuilder();
    textAnnotationsMap = new Hashtable<String, IRI>();
    xml.append(String.format("<annotation text=\"%s\">", text));
    try {
        for (Iterator<Triple> it = graph.filter(null, RDF_TYPE, TechnicalClasses.ENHANCER_TEXTANNOTATION); it.hasNext(); ) {
            // Triple tAnnotation = it.next();
            IRI uri = (IRI) it.next().getSubject();
            String surfaceForm = EnhancementEngineHelper.getString(graph, uri, ENHANCER_SELECTED_TEXT);
            if (surfaceForm != null) {
                String offset = EnhancementEngineHelper.getString(graph, uri, ENHANCER_START);
                textAnnotationsMap.put(surfaceForm, uri);
                xml.append(String.format("<surfaceForm name=\"%s\" offset=\"%s\"/>", surfaceForm, offset));
            }
        }
    } catch (Exception e) {
        log.error(e.getMessage());
    }
    return xml.append("</annotation>").toString();
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) ConfigurationException(org.osgi.service.cm.ConfigurationException) SAXException(org.xml.sax.SAXException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException)

Example 20 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class TestMetaxaCore method printTriples.

/**
     * This prints out the Stanbol Enhancer triples that would be created for the metadata
     * contained in the given model.
     *
     * @param m a {@link Model}
     *
     * @return an {@code int} with the number of added triples
     */
private int printTriples(Model m) {
    int tripleCounter = 0;
    HashMap<BlankNode, BlankNode> blankNodeMap = new HashMap<BlankNode, BlankNode>();
    ClosableIterator<Statement> it = m.iterator();
    while (it.hasNext()) {
        Statement oneStmt = it.next();
        BlankNodeOrIRI subject = (BlankNodeOrIRI) MetaxaEngine.asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
        IRI predicate = (IRI) MetaxaEngine.asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
        RDFTerm object = MetaxaEngine.asClerezzaResource(oneStmt.getObject(), blankNodeMap);
        if (null != subject && null != predicate && null != object) {
            Triple t = new TripleImpl(subject, predicate, object);
            LOG.debug("adding " + t);
            tripleCounter++;
        } else {
            LOG.debug("skipped " + oneStmt.toString());
        }
    }
    it.close();
    return tripleCounter;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashMap(java.util.HashMap) Statement(org.ontoware.rdf2go.model.Statement) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) BlankNode(org.ontoware.rdf2go.model.node.BlankNode) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Aggregations

Triple (org.apache.clerezza.commons.rdf.Triple)151 IRI (org.apache.clerezza.commons.rdf.IRI)88 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)84 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)70 Graph (org.apache.clerezza.commons.rdf.Graph)45 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)41 HashSet (java.util.HashSet)34 Literal (org.apache.clerezza.commons.rdf.Literal)30 ArrayList (java.util.ArrayList)27 Lock (java.util.concurrent.locks.Lock)21 HashMap (java.util.HashMap)20 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)19 OWLOntologyID (org.semanticweb.owlapi.model.OWLOntologyID)19 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)15 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)12 Test (org.junit.Test)12 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)10 ImmutableGraph (org.apache.clerezza.commons.rdf.ImmutableGraph)9 GraphNode (org.apache.clerezza.rdf.utils.GraphNode)8 IOException (java.io.IOException)7