Search in sources :

Example 26 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class FstLinkingEngineTest method setupTest.

/**
     * Initialises the {@link #ci} and {@link #content} fields for tests.
     * It creates a ContentItem containing a '<code>plain/text</code>' 
     * {@link Blob} for the {@value #TEST_TEXT_FILE} and an {@link AnalysedText}
     * filled with the NLP analysis results stored in 
     * {@link #TEST_TEXT_NLP_FILE}
     * @return the {@link ContentItem} as used for the tests
     * @throws IOException on any IO releated error while reading the test files
     */
@Before
public void setupTest() throws IOException {
    //create a contentItem for the plain text used for testing
    InputStream is = FstLinkingEngineTest.class.getClassLoader().getResourceAsStream(TEST_TEXT_FILE);
    Assert.assertNotNull("Unable to load '" + TEST_TEXT_FILE + "' via classpath", is);
    ContentItem ci = cif.createContentItem(new StreamSource(is, "text/plain"));
    AnalysedText at = atf.createAnalysedText(ci, ci.getBlob());
    is.close();
    //parse the prepared NLP results and add it to the ContentItem
    is = FstLinkingEngineTest.class.getClassLoader().getResourceAsStream(TEST_TEXT_NLP_FILE);
    Assert.assertNotNull("Unable to load '" + TEST_TEXT_NLP_FILE + "' via classpath", is);
    AnalyzedTextParser.getDefaultInstance().parse(is, Charset.forName("UTF-8"), at);
    is.close();
    //set the language of the contentItem
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, EN_LANGUAGE));
    //set the contentItem and also the content
    this.ci = ci;
    this.content = at.getText().toString();
}
Also used : AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) InputStream(java.io.InputStream) StreamSource(org.apache.stanbol.enhancer.servicesapi.impl.StreamSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Before(org.junit.Before)

Example 27 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class TestOpenCalaisEngine method testEntityExtraction.

@Test
public void testEntityExtraction() throws IOException, EngineException {
    String testFile = "calaisresult.owl";
    String format = "application/rdf+xml";
    InputStream in = this.getClass().getClassLoader().getResourceAsStream(testFile);
    Assert.assertNotNull("failed to load resource " + testFile, in);
    Graph model = calaisExtractor.readModel(in, format);
    Assert.assertNotNull("model reader failed with format: " + format, model);
    Collection<CalaisEntityOccurrence> entities;
    try {
        entities = calaisExtractor.queryModel(model);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        return;
    }
    LOG.info("Found entities: {}", entities.size());
    LOG.debug("Entities:\n{}", entities);
    Assert.assertFalse("No entities found!", entities.isEmpty());
    //test the generation of the Enhancements
    ContentItem ci = wrapAsContentItem(TEST_TEXT);
    calaisExtractor.createEnhancements(entities, ci);
    Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(calaisExtractor.getClass().getName()));
    //adding null as expected for confidence makes it a required property
    expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
    validateAllTextAnnotations(ci.getMetadata(), TEST_TEXT, expectedValues);
    validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) InputStream(java.io.InputStream) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 28 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class BenchmarkImpl method getGraph.

/** @inheritDoc */
public ImmutableGraph getGraph(EnhancementJobManager jobManager, ContentItemFactory ciFactory) throws EnhancementException {
    if (graph == null) {
        ContentItem ci;
        try {
            ci = ciFactory.createContentItem(new StringSource(inputText));
        } catch (IOException e) {
            throw new IllegalStateException("Unable to create a ContentItem" + "using '" + ciFactory.getClass().getSimpleName() + "'!", e);
        }
        if (chain == null) {
            jobManager.enhanceContent(ci);
        } else {
            //parsing null as chain does not work!
            jobManager.enhanceContent(ci, chain);
        }
        graph = ci.getMetadata().getImmutableGraph();
    }
    return graph;
}
Also used : StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) IOException(java.io.IOException) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem)

Example 29 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class TestEntityLinkingEnhancementEngine method testEntityLinkingEnhancementEngine.

@Test
public void testEntityLinkingEnhancementEngine() throws Exception {
    //create a content item
    ContentItem ci = initContentItem();
    NamedEntityTaggingEngine entityLinkingEngine = initEngine(true, true, true);
    //perform the computation of the enhancements
    entityLinkingEngine.computeEnhancements(ci);
    int entityAnnotationCount = validateAllEntityAnnotations(entityLinkingEngine, ci);
    assertEquals(3, entityAnnotationCount);
}
Also used : ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 30 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class TestEntityLinkingEnhancementEngine method testPersonLinking.

@Test
public void testPersonLinking() throws Exception {
    //create a content item
    ContentItem ci = initContentItem();
    NamedEntityTaggingEngine entityLinkingEngine = initEngine(true, false, false);
    //perform the computation of the enhancements
    entityLinkingEngine.computeEnhancements(ci);
    int entityAnnotationCount = validateAllEntityAnnotations(entityLinkingEngine, ci);
    assertEquals(1, entityAnnotationCount);
}
Also used : ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Aggregations

ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)73 Test (org.junit.Test)62 IRI (org.apache.clerezza.commons.rdf.IRI)46 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)18 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)18 HashMap (java.util.HashMap)15 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)15 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)15 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)13 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)12 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)11 Graph (org.apache.clerezza.commons.rdf.Graph)8 Date (java.util.Date)6 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)6 Hashtable (java.util.Hashtable)5 AnalysedText (org.apache.stanbol.enhancer.nlp.model.AnalysedText)4 IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 MediaType (javax.ws.rs.core.MediaType)3 Triple (org.apache.clerezza.commons.rdf.Triple)3