use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.
the class KeywordLinkingEngineTest method testEngine.
/**
* This tests if the Enhancements created by the Engine confirm to the
* rules defined for the Stanbol Enhancement Structure.
* @throws IOException
* @throws EngineException
*/
@Test
public void testEngine() throws IOException, EngineException {
EntityLinkerConfig linkerConfig = new EntityLinkerConfig();
linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
KeywordLinkingEngine engine = KeywordLinkingEngine.createInstance(openNLP, searcher, new TextAnalyzerConfig(), linkerConfig);
engine.referencedSiteName = TEST_REFERENCED_SITE_NAME;
ContentItem ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
//tells the engine that this is an English text
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("en")));
//compute the enhancements
engine.computeEnhancements(ci);
//validate the enhancement results
Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(engine.getClass().getName()));
//adding null as expected for confidence makes it a required property
expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
//validate create fise:TextAnnotations
int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEST_TEXT, expectedValues);
assertEquals("Four fise:TextAnnotations are expected by this Test", 4, numTextAnnotations);
//validate create fise:EntityAnnotations
int numEntityAnnotations = validateAllEntityAnnotations(ci, expectedValues);
assertEquals("Five fise:EntityAnnotations are expected by this Test", 5, numEntityAnnotations);
}
use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.
the class TestKuromojiNlpEngine method setUpServices.
@Before
public void setUpServices() throws IOException, ConfigurationException {
engine = new KuromojiNlpEngine();
//we need to set some fields that would otherwise be injected by the container
engine.parentResourceLoader = new DataFileResourceLoader(dataFileProvider);
engine.analysedTextFactory = AnalysedTextFactory.getDefaultInstance();
Dictionary<String, Object> config = new Hashtable<String, Object>();
config.put(EnhancementEngine.PROPERTY_NAME, "gosen-nlp");
engine.activate(new MockComponentContext(config));
contentItem = contentItemFactory.createContentItem(id, new StringSource(text));
//add an annotation that this is Japanese
contentItem.getMetadata().add(new TripleImpl(id, Properties.DC_LANGUAGE, new PlainLiteralImpl("ja")));
}
use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.
the class LanguageDetectionEngineTest method testNonTextContent.
@Test
public void testNonTextContent() throws EngineException, ConfigurationException, LangDetectException, IOException {
LanguageDetectionEnhancementEngine langIdEngine = new LanguageDetectionEnhancementEngine();
ComponentContext context = new MockComponentContext();
context.getProperties().put(EnhancementEngine.PROPERTY_NAME, "langdetect");
langIdEngine.activate(context);
ContentItem ci = ciFactory.createContentItem(new StringSource("123"));
langIdEngine.computeEnhancements(ci);
}
use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.
the class ContentItemReaderWriterTest method createTestContentItem.
/**
* @return
*/
@BeforeClass
public static void createTestContentItem() throws IOException {
contentItem = ciFactory.createContentItem(new IRI("urn:test"), new StringSource("<html>\n" + " <body>\n" + " This is a <b>ContentItem</b> to <i>Mime Multipart</i> test!\n" + " </body>\n" + "</html>", "text/html"));
RuntimeDelegate.setInstance(new RuntimeDelegateImpl());
contentItem.addPart(new IRI("run:text:text"), ciFactory.createBlob(new StringSource("This is a ContentItem to Mime Multipart test!")));
contentItem.getMetadata().add(new TripleImpl(new IRI("urn:test"), RDF.type, new IRI("urn:types:Document")));
//mark the main content as parsed and also that all
//contents and contentparts should be included
Map<String, Object> properties = initRequestPropertiesContentPart(contentItem);
properties.put(PARSED_CONTENT_URIS, Collections.singleton(contentItem.getPartUri(0).getUnicodeString()));
properties.put(OUTPUT_CONTENT, Collections.singleton("*/*"));
properties.put(OUTPUT_CONTENT_PART, Collections.singleton("*"));
properties.put(RDF_FORMAT, "application/rdf+xml");
Graph em = initExecutionMetadataContentPart(contentItem);
BlankNodeOrIRI ep = createExecutionPlan(em, "testChain", null);
writeExecutionNode(em, ep, "testEngine", true, null, null);
initExecutionMetadata(em, em, contentItem.getUri(), "testChain", false);
ciWriter = new ContentItemWriter(Serializer.getInstance());
ciReader = new ContentItemReader() {
@Override
protected Parser getParser() {
return Parser.getInstance();
}
@Override
protected ContentItemFactory getContentItemFactory() {
return ciFactory;
}
};
}
use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.
the class TikaEngineTest method testText.
/**
* Tests that text is not processed
*/
@Test
public void testText() throws EngineException, IOException {
log.info(">>> testText <<<");
String text = "The Stanbol enhancer can detect famous cities such as " + "Paris and people such as Bob Marley.";
ContentItem ci = ciFactory.createContentItem(new StringSource(text));
Assert.assertEquals(1, ContentItemHelper.getContentParts(ci, Blob.class).size());
}
Aggregations