Search in sources :

Example 1 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class IndexedGraphTest method bNodeConsitency.

@Test
public void bNodeConsitency() {
    Graph mGraph = getEmptyGraph();
    final BlankNode bNode = new BlankNode() {

        @Override
        public int hashCode() {
            return -1;
        }

        @Override
        public boolean equals(Object o) {
            return o instanceof BlankNode;
        }
    };
    final BlankNode bNodeClone = new BlankNode() {

        @Override
        public int hashCode() {
            return -1;
        }

        @Override
        public boolean equals(Object o) {
            return o instanceof BlankNode;
        }
    };
    mGraph.add(new TripleImpl(bNode, uriRef1, uriRef2));
    mGraph.add(new TripleImpl(bNodeClone, uriRef2, uriRef3));
    BlankNodeOrIRI bNodeBack = mGraph.filter(null, uriRef1, uriRef2).next().getSubject();
    Assert.assertEquals("The bnode we get back is not equals to the one we added", bNode, bNodeBack);
    BlankNodeOrIRI bNodeBack2 = mGraph.filter(null, uriRef2, uriRef3).next().getSubject();
    Assert.assertEquals("The returnned bnodes are no longer equals", bNodeBack, bNodeBack2);
    Assert.assertTrue("Not finding a triple when searching with equal bNode", mGraph.filter(bNodeBack, uriRef2, null).hasNext());
}
Also used : SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Test(org.junit.Test) GraphTest(org.apache.clerezza.rdf.core.test.GraphTest)

Example 2 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class CeliAnalyzedTextLemmatizerEngineTest method testEngineDe.

@Test
public void testEngineDe() throws IOException, EngineException {
    ContentItem ci = ciFactory.createContentItem(new StringSource(de_text));
    Assert.assertNotNull(ci);
    AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
    Assert.assertNotNull(at);
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("de")));
    Assert.assertEquals("de", EnhancementEngineHelper.getLanguage(ci));
    // Add some Tokens with POS annotations to test the usage of
    // existing POS annotations by the lemmatizer
    Token verbrachten = at.addToken(de_verbStart, de_verbStart + de_verb.length());
    verbrachten.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("V", LexicalCategory.Verb), de_verbProb));
    Token schonen = at.addToken(de_adjectiveStart, de_adjectiveStart + de_adjective.length());
    schonen.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("ADJ", LexicalCategory.Adjective), de_adjectiveProb));
    Token urlaub = at.addToken(de_nounStart, de_nounStart + de_noun.length());
    urlaub.addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NC", LexicalCategory.Noun), de_nounProb));
    Assert.assertEquals("Can not enhance Test ContentItem", EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(ci));
    // compute the enhancements
    try {
        engine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        // deactivate test
        return;
    }
    // now validate the enhancements
    boolean foundVerb = false;
    boolean foundAdjective = false;
    boolean foundNoun = false;
    for (Iterator<Token> tokens = at.getTokens(); tokens.hasNext(); ) {
        Token token = tokens.next();
        log.info("Token: {}", token);
        List<Value<MorphoFeatures>> mfs = token.getAnnotations(NlpAnnotations.MORPHO_ANNOTATION);
        if (de_verb.equals(token.getSpan())) {
            foundVerb = !mfs.isEmpty();
            validateMorphFeatureProbability(mfs, LexicalCategory.Verb, de_verbProb);
        } else if (de_adjective.equals(token.getSpan())) {
            foundAdjective = !mfs.isEmpty();
            validateMorphFeatureProbability(mfs, LexicalCategory.Adjective, de_adjectiveProb);
        } else if (de_noun.equals(token.getSpan())) {
            foundNoun = !mfs.isEmpty();
            validateMorphFeatureProbability(mfs, LexicalCategory.Noun, de_nounProb);
        }
        for (Value<MorphoFeatures> mf : mfs) {
            log.info("  - {}", mf);
            Assert.assertNotNull(mf.value().getLemma());
        }
    }
    Assert.assertTrue("No MorphoFeatures found for '" + de_verb + "'!", foundVerb);
    Assert.assertTrue("No MorphoFeatures found for '" + de_adjective + "'!", foundAdjective);
    Assert.assertTrue("No MorphoFeatures found for '" + de_noun + "'!", foundNoun);
}
Also used : PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) Token(org.apache.stanbol.enhancer.nlp.model.Token) AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) PosTag(org.apache.stanbol.enhancer.nlp.pos.PosTag) Value(org.apache.stanbol.enhancer.nlp.model.annotation.Value) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) MorphoFeatures(org.apache.stanbol.enhancer.nlp.morpho.MorphoFeatures) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 3 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class CeliNamedEntityExtractionEnhancementEngineTest method testInput.

private void testInput(String txt, String lang) throws EngineException, IOException {
    ContentItem ci = wrapAsContentItem(txt);
    try {
        // add a simple triple to statically define the language of the test content
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
        nerEngine.computeEnhancements(ci);
        TestUtils.logEnhancements(ci);
        HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(nerEngine.getClass().getName()));
        int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
        log.info(textAnnoNum + " TextAnnotations found ...");
        int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
        log.info(entityAnnoNum + " EntityAnnotations found ...");
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem)

Example 4 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class CeliSentimentAnalysisEngineTest method testInput.

private void testInput(String txt, String lang) throws EngineException, IOException {
    ContentItem ci = wrapAsContentItem(txt);
    try {
        // add a simple triple to statically define the language of the test content
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
        sentimentAnalysisEngine.computeEnhancements(ci);
        TestUtils.logEnhancements(ci);
        HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(sentimentAnalysisEngine.getClass().getName()));
        expectedValues.put(DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION);
        int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
        log.info(textAnnoNum + " TextAnnotations found ...");
        assertTrue("2 sentiment expressions should be recognized in: " + txt, textAnnoNum == 2);
        int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
        assertTrue("0 entity annotations should be recognized in: " + txt, entityAnnoNum == 0);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem)

Example 5 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class DBPSpotlightDisambiguateEnhancementEngine method createEnhancements.

/**
 * The method adds the returned DBpedia Spotlight annotations to the content
 * item's metadata. For each DBpedia resource an EntityAnnotation is created
 * and linked to the according TextAnnotation.
 *
 * @param occs
 *            a Collection of entity information
 * @param ci
 *            the content item
 */
public void createEnhancements(Collection<Annotation> occs, ContentItem ci, Language language) {
    HashMap<RDFTerm, IRI> entityAnnotationMap = new HashMap<RDFTerm, IRI>();
    for (Annotation occ : occs) {
        if (textAnnotationsMap.get(occ.surfaceForm) != null) {
            IRI textAnnotation = textAnnotationsMap.get(occ.surfaceForm);
            Graph model = ci.getMetadata();
            IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            entityAnnotationMap.put(occ.uri, entityAnnotation);
            Literal label = new PlainLiteralImpl(occ.surfaceForm.name, language);
            model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
            model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
            Collection<String> t = occ.getTypeNames();
            if (t != null) {
                Iterator<String> it = t.iterator();
                while (it.hasNext()) model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, new IRI(it.next())));
            }
            model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, occ.uri));
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Annotation(org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation)

Aggregations

TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)143 IRI (org.apache.clerezza.commons.rdf.IRI)104 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)69 Graph (org.apache.clerezza.commons.rdf.Graph)66 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)49 Triple (org.apache.clerezza.commons.rdf.Triple)41 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)26 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)23 HashMap (java.util.HashMap)20 Language (org.apache.clerezza.commons.rdf.Language)20 Literal (org.apache.clerezza.commons.rdf.Literal)20 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)20 IOException (java.io.IOException)18 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)17 Test (org.junit.Test)16 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)15 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)14 HashSet (java.util.HashSet)13 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)13 BlankNode (org.apache.clerezza.commons.rdf.BlankNode)11