Search in sources :

Example 61 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class CeliLemmatizerEnhancementEngineTest method testCompleteMorphoAnalysis.

@Test
public void testCompleteMorphoAnalysis() throws Exception {
    ContentItem ci = wrapAsContentItem(TERM);
    //add a simple triple to statically define the language of the test
    //content
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
    CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(true);
    try {
        morphoAnalysisEngine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
    TestUtils.logEnhancements(ci);
    //validate enhancements
    HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
    Iterator<Triple> textAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    // test if a textAnnotation is present
    //assertTrue(textAnnotationIterator.hasNext()); 
    //  -> this might be used to test that there are no TextAnnotations
    int textAnnotationCount = 0;
    while (textAnnotationIterator.hasNext()) {
        IRI textAnnotation = (IRI) textAnnotationIterator.next().getSubject();
        // test if selected Text is added
        validateTextAnnotation(ci.getMetadata(), textAnnotation, TERM, expectedValues);
        textAnnotationCount++;
        //perform additional tests for "hasMorphologicalFeature" and "hasLemmaForm"
        validateMorphoFeatureProperty(ci.getMetadata(), textAnnotation);
    }
    log.info("{} TextAnnotations found and validated ...", textAnnotationCount);
    int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
    //no EntityAnnotations expected
    Assert.assertEquals("No EntityAnnotations expected by this test", 0, entityAnnoNum);
    shutdownEngine(morphoAnalysisEngine);
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 62 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class CeliLemmatizerEnhancementEngineTest method testEngine.

@Test
public void testEngine() throws Exception {
    ContentItem ci = wrapAsContentItem(TEXT);
    //add a simple triple to statically define the language of the test
    //content
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
    //unit test should not depend on each other (if possible)
    //CeliLanguageIdentifierEnhancementEngineTest.addEnanchements(ci);
    CeliLemmatizerEnhancementEngine morphoAnalysisEngine = initEngine(false);
    try {
        morphoAnalysisEngine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
    TestUtils.logEnhancements(ci);
    //validate enhancement
    HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(morphoAnalysisEngine.getClass().getName()));
    Iterator<Triple> lemmaTextAnnotationIterator = ci.getMetadata().filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    assertTrue("A TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
    BlankNodeOrIRI lemmaTextAnnotation = lemmaTextAnnotationIterator.next().getSubject();
    assertTrue("TextAnnoations MUST BE IRIs!", lemmaTextAnnotation instanceof IRI);
    assertFalse("Only a single TextAnnotation is expected by this Test", lemmaTextAnnotationIterator.hasNext());
    //validate the enhancement metadata
    validateEnhancement(ci.getMetadata(), (IRI) lemmaTextAnnotation, expectedValues);
    //validate the lemma form TextAnnotation
    int lemmaForms = validateLemmaFormProperty(ci.getMetadata(), lemmaTextAnnotation, "it");
    assertTrue("Only a single LemmaForm property is expected if '" + MORPHOLOGICAL_ANALYSIS + "=false'", lemmaForms == 1);
    shutdownEngine(morphoAnalysisEngine);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Triple(org.apache.clerezza.commons.rdf.Triple) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 63 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class CeliNamedEntityExtractionEnhancementEngineTest method testInput.

private void testInput(String txt, String lang) throws EngineException, IOException {
    ContentItem ci = wrapAsContentItem(txt);
    try {
        //add a simple triple to statically define the language of the test content
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
        nerEngine.computeEnhancements(ci);
        TestUtils.logEnhancements(ci);
        HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(nerEngine.getClass().getName()));
        int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
        log.info(textAnnoNum + " TextAnnotations found ...");
        int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
        log.info(entityAnnoNum + " EntityAnnotations found ...");
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem)

Example 64 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class DBPSpotlightDisambiguateEnhancementEngine method createEnhancements.

/**
	 * The method adds the returned DBpedia Spotlight annotations to the content
	 * item's metadata. For each DBpedia resource an EntityAnnotation is created
	 * and linked to the according TextAnnotation.
	 * 
	 * @param occs
	 *            a Collection of entity information
	 * @param ci
	 *            the content item
	 */
public void createEnhancements(Collection<Annotation> occs, ContentItem ci, Language language) {
    HashMap<RDFTerm, IRI> entityAnnotationMap = new HashMap<RDFTerm, IRI>();
    for (Annotation occ : occs) {
        if (textAnnotationsMap.get(occ.surfaceForm) != null) {
            IRI textAnnotation = textAnnotationsMap.get(occ.surfaceForm);
            Graph model = ci.getMetadata();
            IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            entityAnnotationMap.put(occ.uri, entityAnnotation);
            Literal label = new PlainLiteralImpl(occ.surfaceForm.name, language);
            model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
            model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
            Collection<String> t = occ.getTypeNames();
            if (t != null) {
                Iterator<String> it = t.iterator();
                while (it.hasNext()) model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, new IRI(it.next())));
            }
            model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, occ.uri));
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Annotation(org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation)

Example 65 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class TestLocationEnhancementEngine method testLocationEnhancementEngine.

@Test
public void testLocationEnhancementEngine() throws IOException, EngineException {
    //create a content item
    ContentItem ci = getContentItem("urn:org.apache:stanbol.enhancer:text:content-item:person", CONTEXT);
    //add three text annotations to be consumed by this test
    getTextAnnotation(ci, PERSON, CONTEXT, DBPEDIA_PERSON);
    getTextAnnotation(ci, ORGANISATION, CONTEXT, DBPEDIA_ORGANISATION);
    getTextAnnotation(ci, PLACE, CONTEXT, DBPEDIA_PLACE);
    //perform the computation of the enhancements
    try {
        locationEnhancementEngine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e, "overloaded with requests");
        return;
    }
    Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(locationEnhancementEngine.getClass().getName()));
    //adding null as expected for confidence makes it a required property
    expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
    /*
         * Note:
         *  - Expected results depend on the geonames.org data. So if the test
         *    fails it may also mean that the data provided by geonames.org have
         *    changed
         */
    int entityAnnotationCount = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
//two suggestions for New Zealand and one hierarchy entry for the first
//suggestion
//NOTE 2012-10-10: changed expected value back to "3" as geonames.org
//   again returns "Oceania" as parent for "New Zealand"
//NOTE: 2012-11-12: deactivated this check, because this the fact that
//   "Oceania" is returned as parent for "New Zealand" changes every
//   every view weeks
//assertEquals(3, entityAnnotationCount);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Aggregations

RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)126 IRI (org.apache.clerezza.commons.rdf.IRI)84 Triple (org.apache.clerezza.commons.rdf.Triple)70 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)48 Literal (org.apache.clerezza.commons.rdf.Literal)35 Test (org.junit.Test)35 HashSet (java.util.HashSet)30 HashMap (java.util.HashMap)28 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)26 Graph (org.apache.clerezza.commons.rdf.Graph)24 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)18 ArrayList (java.util.ArrayList)17 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)16 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)13 OWLOntologyID (org.semanticweb.owlapi.model.OWLOntologyID)13 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)12 Collection (java.util.Collection)10 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)10 Lock (java.util.concurrent.locks.Lock)9 IOException (java.io.IOException)5