use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class FstLinkingEngineTest method setupTest.
/**
* Initialises the {@link #ci} and {@link #content} fields for tests.
* It creates a ContentItem containing a '<code>plain/text</code>'
* {@link Blob} for the {@value #TEST_TEXT_FILE} and an {@link AnalysedText}
* filled with the NLP analysis results stored in
* {@link #TEST_TEXT_NLP_FILE}
* @return the {@link ContentItem} as used for the tests
* @throws IOException on any IO releated error while reading the test files
*/
@Before
public void setupTest() throws IOException {
//create a contentItem for the plain text used for testing
InputStream is = FstLinkingEngineTest.class.getClassLoader().getResourceAsStream(TEST_TEXT_FILE);
Assert.assertNotNull("Unable to load '" + TEST_TEXT_FILE + "' via classpath", is);
ContentItem ci = cif.createContentItem(new StreamSource(is, "text/plain"));
AnalysedText at = atf.createAnalysedText(ci, ci.getBlob());
is.close();
//parse the prepared NLP results and add it to the ContentItem
is = FstLinkingEngineTest.class.getClassLoader().getResourceAsStream(TEST_TEXT_NLP_FILE);
Assert.assertNotNull("Unable to load '" + TEST_TEXT_NLP_FILE + "' via classpath", is);
AnalyzedTextParser.getDefaultInstance().parse(is, Charset.forName("UTF-8"), at);
is.close();
//set the language of the contentItem
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, EN_LANGUAGE));
//set the contentItem and also the content
this.ci = ci;
this.content = at.getText().toString();
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class TestOpenCalaisEngine method testEntityExtraction.
@Test
public void testEntityExtraction() throws IOException, EngineException {
String testFile = "calaisresult.owl";
String format = "application/rdf+xml";
InputStream in = this.getClass().getClassLoader().getResourceAsStream(testFile);
Assert.assertNotNull("failed to load resource " + testFile, in);
Graph model = calaisExtractor.readModel(in, format);
Assert.assertNotNull("model reader failed with format: " + format, model);
Collection<CalaisEntityOccurrence> entities;
try {
entities = calaisExtractor.queryModel(model);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
return;
}
LOG.info("Found entities: {}", entities.size());
LOG.debug("Entities:\n{}", entities);
Assert.assertFalse("No entities found!", entities.isEmpty());
//test the generation of the Enhancements
ContentItem ci = wrapAsContentItem(TEST_TEXT);
calaisExtractor.createEnhancements(entities, ci);
Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(calaisExtractor.getClass().getName()));
//adding null as expected for confidence makes it a required property
expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
validateAllTextAnnotations(ci.getMetadata(), TEST_TEXT, expectedValues);
validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class BenchmarkImpl method getGraph.
/** @inheritDoc */
public ImmutableGraph getGraph(EnhancementJobManager jobManager, ContentItemFactory ciFactory) throws EnhancementException {
if (graph == null) {
ContentItem ci;
try {
ci = ciFactory.createContentItem(new StringSource(inputText));
} catch (IOException e) {
throw new IllegalStateException("Unable to create a ContentItem" + "using '" + ciFactory.getClass().getSimpleName() + "'!", e);
}
if (chain == null) {
jobManager.enhanceContent(ci);
} else {
//parsing null as chain does not work!
jobManager.enhanceContent(ci, chain);
}
graph = ci.getMetadata().getImmutableGraph();
}
return graph;
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class TestEntityLinkingEnhancementEngine method testEntityLinkingEnhancementEngine.
@Test
public void testEntityLinkingEnhancementEngine() throws Exception {
//create a content item
ContentItem ci = initContentItem();
NamedEntityTaggingEngine entityLinkingEngine = initEngine(true, true, true);
//perform the computation of the enhancements
entityLinkingEngine.computeEnhancements(ci);
int entityAnnotationCount = validateAllEntityAnnotations(entityLinkingEngine, ci);
assertEquals(3, entityAnnotationCount);
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class TestEntityLinkingEnhancementEngine method testPersonLinking.
@Test
public void testPersonLinking() throws Exception {
//create a content item
ContentItem ci = initContentItem();
NamedEntityTaggingEngine entityLinkingEngine = initEngine(true, false, false);
//perform the computation of the enhancements
entityLinkingEngine.computeEnhancements(ci);
int entityAnnotationCount = validateAllEntityAnnotations(entityLinkingEngine, ci);
assertEquals(1, entityAnnotationCount);
}
Aggregations