use of org.apache.stanbol.enhancer.servicesapi.ContentItemFactory in project stanbol by apache.
the class ContentItemReaderWriterTest method createTestContentItem.
/**
* @return
*/
@BeforeClass
public static void createTestContentItem() throws IOException {
contentItem = ciFactory.createContentItem(new IRI("urn:test"), new StringSource("<html>\n" + " <body>\n" + " This is a <b>ContentItem</b> to <i>Mime Multipart</i> test!\n" + " </body>\n" + "</html>", "text/html"));
RuntimeDelegate.setInstance(new RuntimeDelegateImpl());
contentItem.addPart(new IRI("run:text:text"), ciFactory.createBlob(new StringSource("This is a ContentItem to Mime Multipart test!")));
contentItem.getMetadata().add(new TripleImpl(new IRI("urn:test"), RDF.type, new IRI("urn:types:Document")));
// mark the main content as parsed and also that all
// contents and contentparts should be included
Map<String, Object> properties = initRequestPropertiesContentPart(contentItem);
properties.put(PARSED_CONTENT_URIS, Collections.singleton(contentItem.getPartUri(0).getUnicodeString()));
properties.put(OUTPUT_CONTENT, Collections.singleton("*/*"));
properties.put(OUTPUT_CONTENT_PART, Collections.singleton("*"));
properties.put(RDF_FORMAT, "application/rdf+xml");
Graph em = initExecutionMetadataContentPart(contentItem);
BlankNodeOrIRI ep = createExecutionPlan(em, "testChain", null);
writeExecutionNode(em, ep, "testEngine", true, null, null);
initExecutionMetadata(em, em, contentItem.getUri(), "testChain", false);
ciWriter = new ContentItemWriter(Serializer.getInstance());
ciReader = new ContentItemReader() {
@Override
protected Parser getParser() {
return Parser.getInstance();
}
@Override
protected ContentItemFactory getContentItemFactory() {
return ciFactory;
}
};
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItemFactory in project stanbol by apache.
the class ContentItemReader method readFrom.
@Override
public ContentItem readFrom(Class<ContentItem> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, String> httpHeaders, InputStream entityStream) throws IOException, WebApplicationException {
// boolean withMetadata = withMetadata(httpHeaders);
ContentItem contentItem = null;
IRI contentItemId = getContentItemId();
if (log.isTraceEnabled()) {
// NOTE: enabling TRACE level logging will copy the parsed content
// into a BYTE array
log.trace("Parse ContentItem from");
log.trace(" - MediaType: {}", mediaType);
log.trace(" - Headers:");
for (Entry<String, List<String>> header : httpHeaders.entrySet()) {
log.trace(" {}: {}", header.getKey(), header.getValue());
}
byte[] content = IOUtils.toByteArray(entityStream);
log.trace("content: \n{}", new String(content, "UTF-8"));
IOUtils.closeQuietly(entityStream);
entityStream = new ByteArrayInputStream(content);
}
Set<String> parsedContentIds = new HashSet<String>();
if (mediaType.isCompatible(MULTIPART)) {
log.debug(" - parse Multipart MIME ContentItem");
// try to read ContentItem from "multipart/from-data"
Graph metadata = null;
FileItemIterator fileItemIterator;
try {
fileItemIterator = fu.getItemIterator(new MessageBodyReaderContext(entityStream, mediaType));
while (fileItemIterator.hasNext()) {
FileItemStream fis = fileItemIterator.next();
if (fis.getFieldName().equals("metadata")) {
if (contentItem != null) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("The Multipart MIME part with the 'metadata' " + "MUST BE before the MIME part containing the " + "'content'!").build());
}
// only used if not parsed as query param
if (contentItemId == null && fis.getName() != null && !fis.getName().isEmpty()) {
contentItemId = new IRI(fis.getName());
}
metadata = new IndexedGraph();
try {
getParser().parse(metadata, fis.openStream(), fis.getContentType());
} catch (Exception e) {
throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity(String.format("Unable to parse Metadata " + "from Multipart MIME part '%s' (" + "contentItem: %s| contentType: %s)", fis.getFieldName(), fis.getName(), fis.getContentType())).build());
}
} else if (fis.getFieldName().equals("content")) {
contentItem = createContentItem(contentItemId, metadata, fis, parsedContentIds);
} else if (fis.getFieldName().equals("properties") || fis.getFieldName().equals(REQUEST_PROPERTIES_URI.getUnicodeString())) {
// parse the RequestProperties
if (contentItem == null) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts for " + "Request Properties MUST BE after the " + "MIME parts for 'metadata' AND 'content'").build());
}
MediaType propMediaType = MediaType.valueOf(fis.getContentType());
if (!APPLICATION_JSON_TYPE.isCompatible(propMediaType)) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Request Properties (Multipart MIME parts" + "with the name '" + fis.getFieldName() + "') MUST " + "BE encoded as 'appicaltion/json' (encountered: '" + fis.getContentType() + "')!").build());
}
String propCharset = propMediaType.getParameters().get("charset");
if (propCharset == null) {
propCharset = "UTF-8";
}
Map<String, Object> reqProp = ContentItemHelper.initRequestPropertiesContentPart(contentItem);
try {
reqProp.putAll(toMap(new JSONObject(IOUtils.toString(fis.openStream(), propCharset))));
} catch (JSONException e) {
throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity("Unable to parse Request Properties from" + "Multipart MIME parts with the name 'properties'!").build());
}
} else {
// additional metadata as serialised RDF
if (contentItem == null) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts for additional " + "contentParts MUST BE after the MIME " + "parts for 'metadata' AND 'content'").build());
}
if (fis.getFieldName() == null || fis.getFieldName().isEmpty()) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts representing " + "ContentParts for additional RDF metadata" + "MUST define the contentParts URI as" + "'name' of the MIME part!").build());
}
Graph graph = new IndexedGraph();
try {
getParser().parse(graph, fis.openStream(), fis.getContentType());
} catch (Exception e) {
throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity(String.format("Unable to parse RDF " + "for ContentPart '%s' ( contentType: %s)", fis.getName(), fis.getContentType())).build());
}
IRI contentPartId = new IRI(fis.getFieldName());
contentItem.addPart(contentPartId, graph);
}
}
if (contentItem == null) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("The parsed multipart content item does not contain " + "any content. The content is expected to be contained " + "in a MIME part with the name 'content'. This part can " + " be also a 'multipart/alternate' if multiple content " + "parts need to be included in requests.").build());
}
} catch (FileUploadException e) {
throw new WebApplicationException(e, Response.Status.BAD_REQUEST);
}
} else {
// normal content
ContentItemFactory ciFactory = getContentItemFactory();
contentItem = ciFactory.createContentItem(contentItemId, new StreamSource(entityStream, mediaType.toString()));
// add the URI of the main content
parsedContentIds.add(contentItem.getPartUri(0).getUnicodeString());
}
// set the parsed contentIDs to the EnhancementProperties
Map<String, Object> ep = ContentItemHelper.initRequestPropertiesContentPart(contentItem);
parseEnhancementPropertiesFromParameters(ep);
ep.put(PARSED_CONTENT_URIS, Collections.unmodifiableSet(parsedContentIds));
// STANBOL-660: set the language of the content if explicitly parsed in the request
String contentLanguage = getContentLanguage();
if (!StringUtils.isBlank(contentLanguage)) {
// language codes are case insensitive ... so we convert to lower case
contentLanguage = contentLanguage.toLowerCase(Locale.ROOT);
createParsedLanguageAnnotation(contentItem, contentLanguage);
// previously only the dc:language property was set to the contentItem. However this
// information is only used as fallback if no Language annotation is present. However
// if a user explicitly parses the language he expects this language to be used
// so this was change with STANBOL-1417
// EnhancementEngineHelper.set(contentItem.getMetadata(), contentItem.getUri(),
// DC_LANGUAGE, new PlainLiteralImpl(contentLanguage));
}
return contentItem;
}
use of org.apache.stanbol.enhancer.servicesapi.ContentItemFactory in project stanbol by apache.
the class ContentItemReader method createContentItem.
/**
* Creates a ContentItem
* @param id the ID or <code>null</code> if not known
* @param metadata the metadata or <code>null</code> if not parsed. NOTE that
* if <code>id == null</code> also <code>metadata == null</code> and
* <code>id != null</code> also <code>metadata != null</code>.
* @param content the {@link FileItemStream} of the MIME part representing
* the content. If {@link FileItemStream#getContentType()} is compatible with
* "multipart/*" than this will further parse for multiple parsed content
* version. In any other case the contents of the parsed {@link FileItemStream}
* will be directly add as content for the {@link ContentItem} created by
* this method.
* @param parsedContentParts used to add the IDs of parsed contentParts
* @return the created content item
* @throws IOException on any error while accessing the contents of the parsed
* {@link FileItemStream}
* @throws FileUploadException if the parsed contents are not correctly
* encoded Multipart MIME
*/
private ContentItem createContentItem(IRI id, Graph metadata, FileItemStream content, Set<String> parsedContentParts) throws IOException, FileUploadException {
MediaType partContentType = MediaType.valueOf(content.getContentType());
ContentItem contentItem = null;
ContentItemFactory ciFactory = getContentItemFactory();
if (MULTIPART.isCompatible(partContentType)) {
log.debug(" - multiple (alternate) ContentParts");
// multiple contentParts are parsed
FileItemIterator contentPartIterator = fu.getItemIterator(new MessageBodyReaderContext(content.openStream(), partContentType));
while (contentPartIterator.hasNext()) {
FileItemStream fis = contentPartIterator.next();
if (contentItem == null) {
log.debug(" - create ContentItem {} for content (type:{})", id, fis.getContentType());
contentItem = ciFactory.createContentItem(id, new StreamSource(fis.openStream(), fis.getContentType()), metadata);
} else {
log.debug(" - create Blob for content (type:{})", fis.getContentType());
Blob blob = ciFactory.createBlob(new StreamSource(fis.openStream(), fis.getContentType()));
IRI contentPartId = null;
if (fis.getFieldName() != null && !fis.getFieldName().isEmpty()) {
contentPartId = new IRI(fis.getFieldName());
} else {
// generating a random ID might break metadata
// TODO maybe we should throw an exception instead
contentPartId = new IRI("urn:contentpart:" + randomUUID());
}
log.debug(" ... add Blob {} to ContentItem {} with content (type:{})", new Object[] { contentPartId, id, fis.getContentType() });
contentItem.addPart(contentPartId, blob);
parsedContentParts.add(contentPartId.getUnicodeString());
}
}
} else {
log.debug(" - create ContentItem {} for content (type:{})", id, content.getContentType());
contentItem = ciFactory.createContentItem(id, new StreamSource(content.openStream(), content.getContentType()), metadata);
}
// add the URI of the main content to the parsed contentParts
parsedContentParts.add(contentItem.getPartUri(0).getUnicodeString());
return contentItem;
}
Aggregations