Search in sources :

Example 1 with SimpleLabelTokenizer

use of org.apache.stanbol.enhancer.engines.entitylinking.labeltokenizer.SimpleLabelTokenizer in project stanbol by apache.

the class EntityLinkingEngineTest method setUpServices.

@BeforeClass
public static void setUpServices() throws IOException {
    searcher = new TestSearcherImpl(TEST_REFERENCED_SITE_NAME, NAME, new SimpleLabelTokenizer());
    //add some terms to the searcher
    Graph graph = new IndexedGraph();
    IRI uri = new IRI("urn:test:PatrickMarshall");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Patrick Marshall")));
    graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PERSON));
    searcher.addEntity(new Entity(uri, graph));
    uri = new IRI("urn:test:Geologist");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Geologist")));
    graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos + "Concept")));
    graph.add(new TripleImpl(uri, REDIRECT, new IRI("urn:test:redirect:Geologist")));
    searcher.addEntity(new Entity(uri, graph));
    //a redirect
    uri = new IRI("urn:test:redirect:Geologist");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Geologe (redirect)")));
    graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos + "Concept")));
    searcher.addEntity(new Entity(uri, graph));
    uri = new IRI("urn:test:NewZealand");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("New Zealand")));
    graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
    searcher.addEntity(new Entity(uri, graph));
    uri = new IRI("urn:test:UniversityOfOtago");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University of Otago")));
    graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_ORGANISATION));
    searcher.addEntity(new Entity(uri, graph));
    uri = new IRI("urn:test:University");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University")));
    graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos + "Concept")));
    searcher.addEntity(new Entity(uri, graph));
    uri = new IRI("urn:test:Otago");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago")));
    graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
    searcher.addEntity(new Entity(uri, graph));
    //add a 2nd Otago (Place and University
    uri = new IRI("urn:test:Otago_Texas");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago (Texas)")));
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago")));
    graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
    searcher.addEntity(new Entity(uri, graph));
    uri = new IRI("urn:test:UniversityOfOtago_Texas");
    graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University of Otago (Texas)")));
    graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_ORGANISATION));
    searcher.addEntity(new Entity(uri, graph));
    TEST_ANALYSED_TEXT = AnalysedTextFactory.getDefaultInstance().createAnalysedText(ciFactory.createBlob(new StringSource(TEST_TEXT)));
    TEST_ANALYSED_TEXT_WO = AnalysedTextFactory.getDefaultInstance().createAnalysedText(ciFactory.createBlob(new StringSource(TEST_TEXT_WO)));
    initAnalyzedText(TEST_ANALYSED_TEXT);
    TEST_ANALYSED_TEXT.addChunk(0, "Dr. Patrick Marshall".length()).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
    TEST_ANALYSED_TEXT.addToken(4, 11).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP", Pos.ProperNoun), 1d));
    TEST_ANALYSED_TEXT.addToken(12, 20).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP", Pos.ProperNoun), 1d));
    initAnalyzedText(TEST_ANALYSED_TEXT_WO);
    TEST_ANALYSED_TEXT_WO.addChunk(0, "Dr. Marshall Patrick".length()).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
    TEST_ANALYSED_TEXT_WO.addToken(4, 12).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP", Pos.ProperNoun), 1d));
    TEST_ANALYSED_TEXT_WO.addToken(13, 20).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP", Pos.ProperNoun), 1d));
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) Entity(org.apache.stanbol.enhancer.engines.entitylinking.Entity) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) PosTag(org.apache.stanbol.enhancer.nlp.pos.PosTag) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) SimpleLabelTokenizer(org.apache.stanbol.enhancer.engines.entitylinking.labeltokenizer.SimpleLabelTokenizer) TestSearcherImpl(org.apache.stanbol.enhancer.engines.entitylinking.impl.TestSearcherImpl) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) BeforeClass(org.junit.BeforeClass)

Aggregations

Graph (org.apache.clerezza.commons.rdf.Graph)1 IRI (org.apache.clerezza.commons.rdf.IRI)1 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)1 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)1 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)1 Entity (org.apache.stanbol.enhancer.engines.entitylinking.Entity)1 LinkedEntity (org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity)1 TestSearcherImpl (org.apache.stanbol.enhancer.engines.entitylinking.impl.TestSearcherImpl)1 SimpleLabelTokenizer (org.apache.stanbol.enhancer.engines.entitylinking.labeltokenizer.SimpleLabelTokenizer)1 PosTag (org.apache.stanbol.enhancer.nlp.pos.PosTag)1 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)1 BeforeClass (org.junit.BeforeClass)1