use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class TikaEngineTest method testText.
/**
* Tests that text is not processed
*/
@Test
public void testText() throws EngineException, IOException {
log.info(">>> testText <<<");
String text = "The Stanbol enhancer can detect famous cities such as " + "Paris and people such as Bob Marley.";
ContentItem ci = ciFactory.createContentItem(new StringSource(text));
Assert.assertEquals(1, ContentItemHelper.getContentParts(ci, Blob.class).size());
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class TikaEngineTest method testGEOMetadata.
@Test
public void testGEOMetadata() throws EngineException, IOException, ParseException {
log.info(">>> testGEOMetadata <<<");
//first validate Media RDFTerm Ontology
IRI hasLocation = new IRI(NamespaceEnum.media + "hasLocation");
IRI locationLatitude = new IRI(NamespaceEnum.media + "locationLatitude");
IRI locationLongitude = new IRI(NamespaceEnum.media + "locationLongitude");
//IRI locationAltitude = new IRI(NamespaceEnum.media+"locationAltitude");
//"video/x-ms-asf");
ContentItem ci = createContentItem("testJPEG_GEO.jpg", OCTET_STREAM.toString());
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Iterator<Triple> it = ci.getMetadata().filter(ci.getUri(), hasLocation, null);
assertTrue(it.hasNext());
RDFTerm r = it.next().getObject();
assertFalse(it.hasNext());
assertTrue(r instanceof BlankNodeOrIRI);
BlankNodeOrIRI location = verifyBlankNodeOrIRI(ci, hasLocation);
//lat
verifyValue(ci, location, locationLatitude, XSD.double_, "12.54321");
//long
verifyValue(ci, location, locationLongitude, XSD.double_, "-54.1234");
//second the GEO ont
IRI lat = new IRI(NamespaceEnum.geo + "lat");
IRI lon = new IRI(NamespaceEnum.geo + "long");
//lat
verifyValue(ci, lat, XSD.double_, "12.54321");
//long
verifyValue(ci, lon, XSD.double_, "-54.1234");
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class TikaEngineTest method testMetadata.
public void testMetadata() throws EngineException, ParseException, IOException {
log.info(">>> testMetadata <<<");
ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
verifyValue(ci, new IRI(NamespaceEnum.dc + "creator"), null, "Test Artist");
verifyValue(ci, new IRI(NamespaceEnum.dc + "title"), null, "Test Album");
verifyValue(ci, new IRI(NamespaceEnum.dc + "format"), null, "audio/mpeg");
verifyValue(ci, new IRI(NamespaceEnum.media + "hasFormat"), null, "audio/mpeg");
verifyValue(ci, new IRI(NamespaceEnum.media + "mainOriginalTitle"), null, "Test Album");
verifyValue(ci, new IRI(NamespaceEnum.media + "hasContributor"), null, "Test Artist");
verifyValue(ci, new IRI(NamespaceEnum.media + "releaseDate"), XSD.string, "2008");
verifyValue(ci, new IRI(NamespaceEnum.media + "hasGenre"), null, "Rock");
verifyValue(ci, new IRI(NamespaceEnum.media + "hasCreator"), null, "Test Artist");
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class CeliAnalyzedTextSentimentAnalysisEngineTest method testEngine.
@Test
public void testEngine() throws IOException, EngineException {
ContentItem ci = ciFactory.createContentItem(new StringSource(text));
Assert.assertNotNull(ci);
AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
Assert.assertNotNull(at);
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
Assert.assertEquals("it", EnhancementEngineHelper.getLanguage(ci));
Assert.assertEquals("Can not enhance Test ContentItem", EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(ci));
//compute the enhancements
try {
engine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
//deactivate test
return;
}
//now validate the enhancements
int sentimentExpressionCnt = 0;
for (Iterator<Token> tokens = at.getTokens(); tokens.hasNext(); ) {
Token token = tokens.next();
log.info("Token: {}", token);
List<Value<Double>> sentimentExpressionsList = token.getAnnotations(NlpAnnotations.SENTIMENT_ANNOTATION);
if (sentimentExpressionsList != null && sentimentExpressionsList.size() > 0)
sentimentExpressionCnt++;
}
Assert.assertTrue("2 sentiment expressions should be recognized in: " + text, sentimentExpressionCnt == 2);
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class CeliSentimentAnalysisEngineTest method testInput.
private void testInput(String txt, String lang) throws EngineException, IOException {
ContentItem ci = wrapAsContentItem(txt);
try {
// add a simple triple to statically define the language of the test content
ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
sentimentAnalysisEngine.computeEnhancements(ci);
TestUtils.logEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(sentimentAnalysisEngine.getClass().getName()));
expectedValues.put(DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION);
int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
log.info(textAnnoNum + " TextAnnotations found ...");
assertTrue("2 sentiment expressions should be recognized in: " + txt, textAnnoNum == 2);
int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
assertTrue("0 entity annotations should be recognized in: " + txt, entityAnnoNum == 0);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e);
}
}
Aggregations