use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.
the class LanguageDetectionEngineTest method testEngine.
/**
* Test the engine and validates the created enhancements
* @throws EngineException
* @throws IOException
* @throws ConfigurationException
* @throws LangDetectException
*/
@Test
public void testEngine() throws EngineException, ConfigurationException, LangDetectException, IOException {
LOG.info("Testing engine: {}", TEST_FILE_NAMES[0]);
InputStream in = LanguageDetectionEngineTest.class.getClassLoader().getResourceAsStream(TEST_FILE_NAMES[0]);
assertNotNull("failed to load resource " + TEST_FILE_NAMES[0], in);
String text = IOUtils.toString(in, "UTF-8");
in.close();
LanguageDetectionEnhancementEngine langIdEngine = new LanguageDetectionEnhancementEngine();
ComponentContext context = new MockComponentContext();
context.getProperties().put(EnhancementEngine.PROPERTY_NAME, "langdetect");
langIdEngine.activate(context);
ContentItem ci = ciFactory.createContentItem(new StringSource(text));
langIdEngine.computeEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(langIdEngine.getClass().getName()));
int textAnnotationCount = validateAllTextAnnotations(ci.getMetadata(), text, expectedValues);
assertTrue("A TextAnnotation is expected", textAnnotationCount > 0);
//even through this tests do not validate detection quality
//we expect the "en" is detected as best guess for the parsed text
assertEquals("The detected language for text '" + text + "' MUST BE 'en'", "en", EnhancementEngineHelper.getLanguage(ci));
int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
assertEquals("No EntityAnnotations are expected", 0, entityAnnoNum);
}
use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.
the class GenericEnhancerUiResource method enhanceFromForm.
/**
* Form-based OpenCalais-compatible interface
*
* TODO: should we parse the OpenCalais paramsXML and find the closest Stanbol Enhancer semantics too?
*
* Note: the format parameter is not part of the official API
*
* @throws EngineException
* if the content is somehow corrupted
* @throws IOException
*/
@POST
@Consumes(APPLICATION_FORM_URLENCODED)
public Response enhanceFromForm(@FormParam("content") String content, @FormParam("format") String format, @FormParam("ajax") boolean buildAjaxview, @Context HttpHeaders headers) throws EnhancementException, IOException {
log.info("enhance from From: " + content);
if (content == null) {
// content parameter
throw new WebApplicationException(Response.status(Response.Status.UNSUPPORTED_MEDIA_TYPE).entity("Parsing Content as 'application/x-www-form-urlencoded' is not supported!" + "Please directly POST the content and set the 'Content-Type' " + "header to the media type of the parsed content. 'application/" + "octet-stream' SHOULD BE used if the media type of the parsed " + "content is not known.\n").build());
}
ContentItem ci = ciFactory.createContentItem(new StringSource(content));
if (!buildAjaxview) {
//rewrite to a normal EnhancementRequest
return enhanceFromData(ci, false, null, false, null, false, null, headers);
} else {
//enhance and build the AJAX response
EnhancementException enhancementException;
try {
enhance(ci, null);
enhancementException = null;
} catch (EnhancementException e) {
enhancementException = e;
}
ContentItemResource contentItemResource = new ContentItemResource(null, ci, getUriInfo(), "", serializer, getLayoutConfiguration(), enhancementException);
contentItemResource.setRdfSerializationFormat(format);
Viewable ajaxView = new Viewable("/ajax/contentitem", contentItemResource, ContentItemResource.class);
ResponseBuilder rb = Response.ok(ajaxView);
rb.header(HttpHeaders.CONTENT_TYPE, TEXT_HTML + "; charset=UTF-8");
//addCORSOrigin(servletContext, rb, headers);
return rb.build();
}
}
use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.
the class EntityLinkingEngineTest method testEngine.
/**
* This tests if the Enhancements created by the Engine confirm to the
* rules defined for the Stanbol Enhancement Structure.
* @throws IOException
* @throws EngineException
*/
@Test
public void testEngine() throws IOException, EngineException {
EntityLinkerConfig linkerConfig = new EntityLinkerConfig();
linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
//this is assumed by this test
linkerConfig.setMinFoundTokens(2);
EntityLinkingEngine engine = new EntityLinkingEngine("dummy", searcher, new TextProcessingConfig(), linkerConfig, labelTokenizer);
ContentItem ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
//tells the engine that this is an English text
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("en")));
//and add the AnalysedText instance used for this test
ci.addPart(AnalysedText.ANALYSED_TEXT_URI, TEST_ANALYSED_TEXT);
//compute the enhancements
engine.computeEnhancements(ci);
//validate the enhancement results
Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(engine.getClass().getName()));
//adding null as expected for confidence makes it a required property
expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
//validate create fise:TextAnnotations
int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEST_TEXT, expectedValues);
assertEquals("Four fise:TextAnnotations are expected by this Test", 4, numTextAnnotations);
//validate create fise:EntityAnnotations
int numEntityAnnotations = validateAllEntityAnnotations(ci, expectedValues);
assertEquals("Five fise:EntityAnnotations are expected by this Test", 5, numEntityAnnotations);
}
use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.
the class BlobTest method testStringWithCustomCharset.
/**
* This tests that texts with custom charsets are converted to UTF-8.
* @throws IOException
*/
@Test
public void testStringWithCustomCharset() throws IOException {
String test = "Exámplê";
Charset ISO8859_4 = Charset.forName("ISO-8859-4");
//first via a StringSource
ContentSource cs = new StringSource(test, ISO8859_4, "text/plain");
Blob blob = createBlob(cs);
Assert.assertEquals("text/plain", blob.getMimeType());
Assert.assertTrue(blob.getParameter().containsKey("charset"));
Assert.assertEquals(ISO8859_4.name(), blob.getParameter().get("charset"));
//2nd via a ByteArray
byte[] data = test.getBytes(ISO8859_4);
cs = new ByteArraySource(data, "text/plain; charset=" + ISO8859_4.name());
blob = createBlob(cs);
Assert.assertEquals("text/plain", blob.getMimeType());
Assert.assertTrue(blob.getParameter().containsKey("charset"));
Assert.assertEquals(ISO8859_4.name(), blob.getParameter().get("charset"));
//3rd as Stream
cs = new StreamSource(new ByteArrayInputStream(data), "text/plain; charset=" + ISO8859_4.name());
blob = createBlob(cs);
Assert.assertEquals("text/plain", blob.getMimeType());
Assert.assertTrue(blob.getParameter().containsKey("charset"));
Assert.assertEquals(ISO8859_4.name(), blob.getParameter().get("charset"));
cs = new StreamSource(new ByteArrayInputStream(data), "text/plain; " + ISO8859_4.name());
}
use of org.apache.stanbol.enhancer.servicesapi.impl.StringSource in project stanbol by apache.
the class BlobTest method testString.
/**
* Tests correct handling of UTF-8 as default charset
* @throws IOException
*/
@Test
public void testString() throws IOException {
String test = "Exámplê";
//first via a StringSource
ContentSource cs = new StringSource(test);
Blob blob = createBlob(cs);
Assert.assertEquals("text/plain", blob.getMimeType());
Assert.assertTrue(blob.getParameter().containsKey("charset"));
Assert.assertEquals(UTF8.name(), blob.getParameter().get("charset"));
String value = new String(IOUtils.toByteArray(blob.getStream()), UTF8);
Assert.assertEquals(test, value);
}
Aggregations