use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class DereferenceEngineTest method testOfflineMode.
/**
* Test {@link OfflineMode} functionality
* @throws Exception
*/
@Test
public void testOfflineMode() throws Exception {
ContentItem ci = getContentItem("urn:test:testOfflineMode");
EntityDereferencer onlineDereferencer = new TestDereferencer(null) {
@Override
public boolean supportsOfflineMode() {
return false;
}
};
Dictionary<String, Object> dict = new Hashtable<String, Object>();
dict.put(EnhancementEngine.PROPERTY_NAME, "online");
dict.put(FILTER_CONTENT_LANGUAGES, false);
dict.put(FILTER_ACCEPT_LANGUAGES, false);
EntityDereferenceEngine engine = new EntityDereferenceEngine(onlineDereferencer, new DereferenceEngineConfig(dict, null));
//engine in online mode
Assert.assertNotEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
//set engine in offline mode
engine.setOfflineMode(true);
Assert.assertEquals(engine.canEnhance(ci), EnhancementEngine.CANNOT_ENHANCE);
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class TestLocationEnhancementEngine method testLocationEnhancementEngine.
@Test
public void testLocationEnhancementEngine() throws IOException, EngineException {
//create a content item
ContentItem ci = getContentItem("urn:org.apache:stanbol.enhancer:text:content-item:person", CONTEXT);
//add three text annotations to be consumed by this test
getTextAnnotation(ci, PERSON, CONTEXT, DBPEDIA_PERSON);
getTextAnnotation(ci, ORGANISATION, CONTEXT, DBPEDIA_ORGANISATION);
getTextAnnotation(ci, PLACE, CONTEXT, DBPEDIA_PLACE);
//perform the computation of the enhancements
try {
locationEnhancementEngine.computeEnhancements(ci);
} catch (EngineException e) {
RemoteServiceHelper.checkServiceUnavailable(e, "overloaded with requests");
return;
}
Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(locationEnhancementEngine.getClass().getName()));
//adding null as expected for confidence makes it a required property
expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
/*
* Note:
* - Expected results depend on the geonames.org data. So if the test
* fails it may also mean that the data provided by geonames.org have
* changed
*/
int entityAnnotationCount = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
//two suggestions for New Zealand and one hierarchy entry for the first
//suggestion
//NOTE 2012-10-10: changed expected value back to "3" as geonames.org
// again returns "Oceania" as parent for "New Zealand"
//NOTE: 2012-11-12: deactivated this check, because this the fact that
// "Oceania" is returned as parent for "New Zealand" changes every
// every view weeks
//assertEquals(3, entityAnnotationCount);
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class LanguageDetectionEngineTest method testEngine.
/**
* Test the engine and validates the created enhancements
* @throws EngineException
* @throws IOException
* @throws ConfigurationException
* @throws LangDetectException
*/
@Test
public void testEngine() throws EngineException, ConfigurationException, LangDetectException, IOException {
LOG.info("Testing engine: {}", TEST_FILE_NAMES[0]);
InputStream in = LanguageDetectionEngineTest.class.getClassLoader().getResourceAsStream(TEST_FILE_NAMES[0]);
assertNotNull("failed to load resource " + TEST_FILE_NAMES[0], in);
String text = IOUtils.toString(in, "UTF-8");
in.close();
LanguageDetectionEnhancementEngine langIdEngine = new LanguageDetectionEnhancementEngine();
ComponentContext context = new MockComponentContext();
context.getProperties().put(EnhancementEngine.PROPERTY_NAME, "langdetect");
langIdEngine.activate(context);
ContentItem ci = ciFactory.createContentItem(new StringSource(text));
langIdEngine.computeEnhancements(ci);
HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(langIdEngine.getClass().getName()));
int textAnnotationCount = validateAllTextAnnotations(ci.getMetadata(), text, expectedValues);
assertTrue("A TextAnnotation is expected", textAnnotationCount > 0);
//even through this tests do not validate detection quality
//we expect the "en" is detected as best guess for the parsed text
assertEquals("The detected language for text '" + text + "' MUST BE 'en'", "en", EnhancementEngineHelper.getLanguage(ci));
int entityAnnoNum = validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
assertEquals("No EntityAnnotations are expected", 0, entityAnnoNum);
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class ContentItemReaderWriterTest method testReader.
@Test
public void testReader() throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream();
MediaType contentType = serializeContentItem(out);
ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
ContentItem ci = ciReader.readFrom(ContentItem.class, null, null, contentType, null, in);
//assert ID
assertEquals(contentItem.getUri(), ci.getUri());
//assert metadata
Graph copy = new SimpleGraph();
copy.addAll(contentItem.getMetadata());
assertTrue(copy.removeAll(ci.getMetadata()));
assertTrue(copy.isEmpty());
//assert Blob
assertEquals(contentItem.getBlob().getMimeType(), ci.getBlob().getMimeType());
String content = IOUtils.toString(contentItem.getStream(), "UTF-8");
String readContent = IOUtils.toString(ci.getStream(), "UTF-8");
assertEquals(content, readContent);
Iterator<Entry<IRI, Blob>> contentItemBlobsIt = ContentItemHelper.getContentParts(contentItem, Blob.class).entrySet().iterator();
Iterator<Entry<IRI, Blob>> ciBlobsIt = ContentItemHelper.getContentParts(ci, Blob.class).entrySet().iterator();
//later used to validate enhancementMetadata
Set<String> expectedParsedContentIds = new HashSet<String>();
while (contentItemBlobsIt.hasNext() && ciBlobsIt.hasNext()) {
Entry<IRI, Blob> contentItemBlobPart = contentItemBlobsIt.next();
Entry<IRI, Blob> ciBlobPart = ciBlobsIt.next();
expectedParsedContentIds.add(ciBlobPart.getKey().getUnicodeString());
assertEquals(contentItemBlobPart.getKey(), ciBlobPart.getKey());
String partContentType = contentItemBlobPart.getValue().getMimeType();
String readPartContentType = ciBlobPart.getValue().getMimeType();
assertEquals(partContentType, readPartContentType);
String partContent = IOUtils.toString(contentItemBlobPart.getValue().getStream(), "UTF-8");
String readPartContent = IOUtils.toString(ciBlobPart.getValue().getStream(), "UTF-8");
assertEquals(partContent, readPartContent);
}
//validate ExecutionMetadata
Graph executionMetadata = contentItem.getPart(ExecutionMetadata.CHAIN_EXECUTION, Graph.class);
Graph readExecutionMetadata = ci.getPart(ExecutionMetadata.CHAIN_EXECUTION, Graph.class);
assertNotNull(executionMetadata);
assertNotNull(readExecutionMetadata);
assertEquals(executionMetadata.size(), readExecutionMetadata.size());
//validate EnhancemetnProperties
Map<String, Object> reqProp = ContentItemHelper.getRequestPropertiesContentPart(ci);
assertNotNull(reqProp);
//the parsed value MUST BE overridden by the two content parts parsed
assertEquals(expectedParsedContentIds, getParsedContentURIs(reqProp));
Collection<String> outputContent = getOutputContent(reqProp);
assertEquals(1, outputContent.size());
assertEquals(outputContent.iterator().next(), "*/*");
Collection<String> outputContentPart = Collections.singleton("*");
assertEquals(1, outputContentPart.size());
assertEquals(outputContentPart.iterator().next(), "*");
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.
the class ContentItemReader method createContentItem.
/**
* Creates a ContentItem
* @param id the ID or <code>null</code> if not known
* @param metadata the metadata or <code>null</code> if not parsed. NOTE that
* if <code>id == null</code> also <code>metadata == null</code> and
* <code>id != null</code> also <code>metadata != null</code>.
* @param content the {@link FileItemStream} of the MIME part representing
* the content. If {@link FileItemStream#getContentType()} is compatible with
* "multipart/*" than this will further parse for multiple parsed content
* version. In any other case the contents of the parsed {@link FileItemStream}
* will be directly add as content for the {@link ContentItem} created by
* this method.
* @param parsedContentParts used to add the IDs of parsed contentParts
* @return the created content item
* @throws IOException on any error while accessing the contents of the parsed
* {@link FileItemStream}
* @throws FileUploadException if the parsed contents are not correctly
* encoded Multipart MIME
*/
private ContentItem createContentItem(IRI id, Graph metadata, FileItemStream content, Set<String> parsedContentParts) throws IOException, FileUploadException {
MediaType partContentType = MediaType.valueOf(content.getContentType());
ContentItem contentItem = null;
ContentItemFactory ciFactory = getContentItemFactory();
if (MULTIPART.isCompatible(partContentType)) {
log.debug(" - multiple (alternate) ContentParts");
//multiple contentParts are parsed
FileItemIterator contentPartIterator = fu.getItemIterator(new MessageBodyReaderContext(content.openStream(), partContentType));
while (contentPartIterator.hasNext()) {
FileItemStream fis = contentPartIterator.next();
if (contentItem == null) {
log.debug(" - create ContentItem {} for content (type:{})", id, fis.getContentType());
contentItem = ciFactory.createContentItem(id, new StreamSource(fis.openStream(), fis.getContentType()), metadata);
} else {
log.debug(" - create Blob for content (type:{})", fis.getContentType());
Blob blob = ciFactory.createBlob(new StreamSource(fis.openStream(), fis.getContentType()));
IRI contentPartId = null;
if (fis.getFieldName() != null && !fis.getFieldName().isEmpty()) {
contentPartId = new IRI(fis.getFieldName());
} else {
//generating a random ID might break metadata
//TODO maybe we should throw an exception instead
contentPartId = new IRI("urn:contentpart:" + randomUUID());
}
log.debug(" ... add Blob {} to ContentItem {} with content (type:{})", new Object[] { contentPartId, id, fis.getContentType() });
contentItem.addPart(contentPartId, blob);
parsedContentParts.add(contentPartId.getUnicodeString());
}
}
} else {
log.debug(" - create ContentItem {} for content (type:{})", id, content.getContentType());
contentItem = ciFactory.createContentItem(id, new StreamSource(content.openStream(), content.getContentType()), metadata);
}
//add the URI of the main content to the parsed contentParts
parsedContentParts.add(contentItem.getPartUri(0).getUnicodeString());
return contentItem;
}
Aggregations