Search in sources :

Example 16 with StringSource

use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.

the class LangIdEngineTest method testEngine.

/**
     * Test the engine and validates the created enhancements
     * @throws EngineException
     * @throws IOException
     * @throws ConfigurationException
     */
@Test
public void testEngine() throws EngineException, IOException, ConfigurationException {
    LangIdEnhancementEngine langIdEngine = new LangIdEnhancementEngine();
    ComponentContext context = new MockComponentContext();
    context.getProperties().put(EnhancementEngine.PROPERTY_NAME, "langid");
    langIdEngine.activate(context);
    ContentItem ci = ciFactory.createContentItem(new StringSource(text));
    langIdEngine.computeEnhancements(ci);
    HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(langIdEngine.getClass().getName()));
    int textAnnotationCount = validateAllTextAnnotations(ci.getMetadata(), text, expectedValues);
    assertEquals("A single TextAnnotation is expected", 1, textAnnotationCount);
    //even through this tests do not validate service quality but rather
    //the correct integration of the CELI service as EnhancementEngine
    //we expect the "en" is detected for the parsed text
    assertEquals("The detected language for text '" + text + "' MUST BE 'en'", "en", EnhancementEngineHelper.getLanguage(ci));
    int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
    assertEquals("No EntityAnnotations are expected", 0, entityAnnoNum);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) LangIdEnhancementEngine(org.apache.stanbol.enhancer.engines.langid.LangIdEnhancementEngine) ComponentContext(org.osgi.service.component.ComponentContext) HashMap(java.util.HashMap) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 17 with StringSource

use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.

the class CustomDirFileContentItemFactoryTest method testCustomDir.

/**
     * Tests that the specified directory is actually used!
     */
@Test
public void testCustomDir() throws IOException {
    assertTrue("The custom dir '" + customDir + "'MUST exist", customDir.exists());
    assertTrue("The custom dir '" + customDir + "'MUST be an directory", customDir.isDirectory());
    int numFiles = customDir.list().length;
    Blob blob = contentItemFactory.createBlob(new StringSource("ensure a file exist"));
    assertNotNull(blob);
    Assert.assertEquals("Creating a new Blob has not increased the " + "number of files by one!", numFiles, customDir.list().length - 1);
}
Also used : Blob(org.apache.stanbol.enhancer.servicesapi.Blob) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) Test(org.junit.Test) ContentItemFactoryTest(org.apache.stanbol.enhancer.test.ContentItemFactoryTest)

Example 18 with StringSource

use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.

the class DBPSpotlightSpotEnhancementTest method initTest.

@Before
public void initTest() throws IOException {
    //create the contentItem for testing
    ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
    assertNotNull(ci);
    textContentPart = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
    assertNotNull(textContentPart);
    //add the language of the text
    ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
    assertEquals("en", EnhancementEngineHelper.getLanguage(ci));
}
Also used : PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Before(org.junit.Before)

Example 19 with StringSource

use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.

the class DBPSpotlightDisambiguateEnhancementTest method initTest.

@Before
public void initTest() throws IOException {
    //create the contentItem for testing
    ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
    assertNotNull(ci);
    textContentPart = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
    assertNotNull(textContentPart);
    //add the language of the text
    ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
    assertEquals("en", EnhancementEngineHelper.getLanguage(ci));
    LiteralFactory lf = LiteralFactory.getInstance();
    //we need also to create a fise:TextAnnotation to test disambiguation
    String selected = "Angela Merkel";
    Language en = new Language("en");
    IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, new DBPSpotlightSpotEnhancementEngine());
    Graph model = ci.getMetadata();
    model.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(selected, en)));
    model.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(TEST_TEXT, en)));
    model.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, lf.createTypedLiteral(TEST_TEXT.indexOf(selected))));
    model.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, lf.createTypedLiteral(TEST_TEXT.indexOf(selected) + selected.length())));
    model.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, OntologicalClasses.DBPEDIA_PERSON));
    //validate that the created TextAnnotation is valid (test the test ...)
    EnhancementStructureHelper.validateAllTextAnnotations(model, TEST_TEXT, null);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) DBPSpotlightSpotEnhancementEngine(org.apache.stanbol.enhancer.engines.dbpspotlight.spot.DBPSpotlightSpotEnhancementEngine) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) Before(org.junit.Before)

Example 20 with StringSource

use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.

the class DereferenceEngineTest method getContentItem.

public static ContentItem getContentItem(final String id) throws IOException {
    ContentItem ci = ciFactory.createContentItem(new IRI(id), new StringSource("Not used"));
    ci.getMetadata().addAll(testMetadata);
    return ci;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem)

Aggregations

StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)26 Test (org.junit.Test)14 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)13 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)13 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)12 IRI (org.apache.clerezza.commons.rdf.IRI)10 Before (org.junit.Before)5 HashMap (java.util.HashMap)4 Graph (org.apache.clerezza.commons.rdf.Graph)4 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)4 Token (org.apache.stanbol.enhancer.nlp.model.Token)4 PosTag (org.apache.stanbol.enhancer.nlp.pos.PosTag)4 Charset (java.nio.charset.Charset)3 AnalysedText (org.apache.stanbol.enhancer.nlp.model.AnalysedText)3 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)3 ContentSource (org.apache.stanbol.enhancer.servicesapi.ContentSource)3 BeforeClass (org.junit.BeforeClass)3 Chunk (org.apache.stanbol.enhancer.nlp.model.Chunk)2 Sentence (org.apache.stanbol.enhancer.nlp.model.Sentence)2 Value (org.apache.stanbol.enhancer.nlp.model.annotation.Value)2