Search in sources :

Example 1 with ContentItemFactory

use of org.apache.stanbol.enhancer.servicesapi.ContentItemFactory in project stanbol by apache.

the class ContentItemReaderWriterTest method createTestContentItem.

/**
 * @return
 */
@BeforeClass
public static void createTestContentItem() throws IOException {
    contentItem = ciFactory.createContentItem(new IRI("urn:test"), new StringSource("<html>\n" + "  <body>\n" + "    This is a <b>ContentItem</b> to <i>Mime Multipart</i> test!\n" + "  </body>\n" + "</html>", "text/html"));
    RuntimeDelegate.setInstance(new RuntimeDelegateImpl());
    contentItem.addPart(new IRI("run:text:text"), ciFactory.createBlob(new StringSource("This is a ContentItem to Mime Multipart test!")));
    contentItem.getMetadata().add(new TripleImpl(new IRI("urn:test"), RDF.type, new IRI("urn:types:Document")));
    // mark the main content as parsed and also that all
    // contents and contentparts should be included
    Map<String, Object> properties = initRequestPropertiesContentPart(contentItem);
    properties.put(PARSED_CONTENT_URIS, Collections.singleton(contentItem.getPartUri(0).getUnicodeString()));
    properties.put(OUTPUT_CONTENT, Collections.singleton("*/*"));
    properties.put(OUTPUT_CONTENT_PART, Collections.singleton("*"));
    properties.put(RDF_FORMAT, "application/rdf+xml");
    Graph em = initExecutionMetadataContentPart(contentItem);
    BlankNodeOrIRI ep = createExecutionPlan(em, "testChain", null);
    writeExecutionNode(em, ep, "testEngine", true, null, null);
    initExecutionMetadata(em, em, contentItem.getUri(), "testChain", false);
    ciWriter = new ContentItemWriter(Serializer.getInstance());
    ciReader = new ContentItemReader() {

        @Override
        protected Parser getParser() {
            return Parser.getInstance();
        }

        @Override
        protected ContentItemFactory getContentItemFactory() {
            return ciFactory;
        }
    };
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) ContentItemFactory(org.apache.stanbol.enhancer.servicesapi.ContentItemFactory) InMemoryContentItemFactory(org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory) RuntimeDelegateImpl(org.glassfish.jersey.internal.RuntimeDelegateImpl) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Parser(org.apache.clerezza.rdf.core.serializedform.Parser) ContentItemReader(org.apache.stanbol.enhancer.jersey.reader.ContentItemReader) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItemWriter(org.apache.stanbol.enhancer.jersey.writers.ContentItemWriter) BeforeClass(org.junit.BeforeClass)

Example 2 with ContentItemFactory

use of org.apache.stanbol.enhancer.servicesapi.ContentItemFactory in project stanbol by apache.

the class ContentItemReader method readFrom.

@Override
public ContentItem readFrom(Class<ContentItem> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, String> httpHeaders, InputStream entityStream) throws IOException, WebApplicationException {
    // boolean withMetadata = withMetadata(httpHeaders);
    ContentItem contentItem = null;
    IRI contentItemId = getContentItemId();
    if (log.isTraceEnabled()) {
        // NOTE: enabling TRACE level logging will copy the parsed content
        // into a BYTE array
        log.trace("Parse ContentItem from");
        log.trace("  - MediaType: {}", mediaType);
        log.trace("  - Headers:");
        for (Entry<String, List<String>> header : httpHeaders.entrySet()) {
            log.trace("      {}: {}", header.getKey(), header.getValue());
        }
        byte[] content = IOUtils.toByteArray(entityStream);
        log.trace("content: \n{}", new String(content, "UTF-8"));
        IOUtils.closeQuietly(entityStream);
        entityStream = new ByteArrayInputStream(content);
    }
    Set<String> parsedContentIds = new HashSet<String>();
    if (mediaType.isCompatible(MULTIPART)) {
        log.debug(" - parse Multipart MIME ContentItem");
        // try to read ContentItem from "multipart/from-data"
        Graph metadata = null;
        FileItemIterator fileItemIterator;
        try {
            fileItemIterator = fu.getItemIterator(new MessageBodyReaderContext(entityStream, mediaType));
            while (fileItemIterator.hasNext()) {
                FileItemStream fis = fileItemIterator.next();
                if (fis.getFieldName().equals("metadata")) {
                    if (contentItem != null) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("The Multipart MIME part with the 'metadata' " + "MUST BE before the MIME part containing the " + "'content'!").build());
                    }
                    // only used if not parsed as query param
                    if (contentItemId == null && fis.getName() != null && !fis.getName().isEmpty()) {
                        contentItemId = new IRI(fis.getName());
                    }
                    metadata = new IndexedGraph();
                    try {
                        getParser().parse(metadata, fis.openStream(), fis.getContentType());
                    } catch (Exception e) {
                        throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity(String.format("Unable to parse Metadata " + "from Multipart MIME part '%s' (" + "contentItem: %s| contentType: %s)", fis.getFieldName(), fis.getName(), fis.getContentType())).build());
                    }
                } else if (fis.getFieldName().equals("content")) {
                    contentItem = createContentItem(contentItemId, metadata, fis, parsedContentIds);
                } else if (fis.getFieldName().equals("properties") || fis.getFieldName().equals(REQUEST_PROPERTIES_URI.getUnicodeString())) {
                    // parse the RequestProperties
                    if (contentItem == null) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts for " + "Request Properties MUST BE after the " + "MIME parts for 'metadata' AND 'content'").build());
                    }
                    MediaType propMediaType = MediaType.valueOf(fis.getContentType());
                    if (!APPLICATION_JSON_TYPE.isCompatible(propMediaType)) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Request Properties (Multipart MIME parts" + "with the name '" + fis.getFieldName() + "') MUST " + "BE encoded as 'appicaltion/json' (encountered: '" + fis.getContentType() + "')!").build());
                    }
                    String propCharset = propMediaType.getParameters().get("charset");
                    if (propCharset == null) {
                        propCharset = "UTF-8";
                    }
                    Map<String, Object> reqProp = ContentItemHelper.initRequestPropertiesContentPart(contentItem);
                    try {
                        reqProp.putAll(toMap(new JSONObject(IOUtils.toString(fis.openStream(), propCharset))));
                    } catch (JSONException e) {
                        throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity("Unable to parse Request Properties from" + "Multipart MIME parts with the name 'properties'!").build());
                    }
                } else {
                    // additional metadata as serialised RDF
                    if (contentItem == null) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts for additional " + "contentParts MUST BE after the MIME " + "parts for 'metadata' AND 'content'").build());
                    }
                    if (fis.getFieldName() == null || fis.getFieldName().isEmpty()) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts representing " + "ContentParts for additional RDF metadata" + "MUST define the contentParts URI as" + "'name' of the MIME part!").build());
                    }
                    Graph graph = new IndexedGraph();
                    try {
                        getParser().parse(graph, fis.openStream(), fis.getContentType());
                    } catch (Exception e) {
                        throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity(String.format("Unable to parse RDF " + "for ContentPart '%s' ( contentType: %s)", fis.getName(), fis.getContentType())).build());
                    }
                    IRI contentPartId = new IRI(fis.getFieldName());
                    contentItem.addPart(contentPartId, graph);
                }
            }
            if (contentItem == null) {
                throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("The parsed multipart content item does not contain " + "any content. The content is expected to be contained " + "in a MIME part with the name 'content'. This part can " + " be also a 'multipart/alternate' if multiple content " + "parts need to be included in requests.").build());
            }
        } catch (FileUploadException e) {
            throw new WebApplicationException(e, Response.Status.BAD_REQUEST);
        }
    } else {
        // normal content
        ContentItemFactory ciFactory = getContentItemFactory();
        contentItem = ciFactory.createContentItem(contentItemId, new StreamSource(entityStream, mediaType.toString()));
        // add the URI of the main content
        parsedContentIds.add(contentItem.getPartUri(0).getUnicodeString());
    }
    // set the parsed contentIDs to the EnhancementProperties
    Map<String, Object> ep = ContentItemHelper.initRequestPropertiesContentPart(contentItem);
    parseEnhancementPropertiesFromParameters(ep);
    ep.put(PARSED_CONTENT_URIS, Collections.unmodifiableSet(parsedContentIds));
    // STANBOL-660: set the language of the content if explicitly parsed in the request
    String contentLanguage = getContentLanguage();
    if (!StringUtils.isBlank(contentLanguage)) {
        // language codes are case insensitive ... so we convert to lower case
        contentLanguage = contentLanguage.toLowerCase(Locale.ROOT);
        createParsedLanguageAnnotation(contentItem, contentLanguage);
    // previously only the dc:language property was set to the contentItem. However this
    // information is only used as fallback if no Language annotation is present. However
    // if a user explicitly parses the language he expects this language to be used
    // so this was change with STANBOL-1417
    // EnhancementEngineHelper.set(contentItem.getMetadata(), contentItem.getUri(),
    // DC_LANGUAGE, new PlainLiteralImpl(contentLanguage));
    }
    return contentItem;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) ContentItemFactory(org.apache.stanbol.enhancer.servicesapi.ContentItemFactory) WebApplicationException(javax.ws.rs.WebApplicationException) StreamSource(org.apache.stanbol.enhancer.servicesapi.impl.StreamSource) JSONException(org.codehaus.jettison.json.JSONException) URISyntaxException(java.net.URISyntaxException) WebApplicationException(javax.ws.rs.WebApplicationException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) JSONException(org.codehaus.jettison.json.JSONException) FileUploadException(org.apache.commons.fileupload.FileUploadException) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) JSONObject(org.codehaus.jettison.json.JSONObject) ByteArrayInputStream(java.io.ByteArrayInputStream) FileItemStream(org.apache.commons.fileupload.FileItemStream) MediaType(javax.ws.rs.core.MediaType) List(java.util.List) ArrayList(java.util.ArrayList) JSONObject(org.codehaus.jettison.json.JSONObject) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) FileItemIterator(org.apache.commons.fileupload.FileItemIterator) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) FileUploadException(org.apache.commons.fileupload.FileUploadException) HashSet(java.util.HashSet)

Example 3 with ContentItemFactory

use of org.apache.stanbol.enhancer.servicesapi.ContentItemFactory in project stanbol by apache.

the class ContentItemReader method createContentItem.

/**
 * Creates a ContentItem
 * @param id the ID or <code>null</code> if not known
 * @param metadata the metadata or <code>null</code> if not parsed. NOTE that
 * if <code>id == null</code> also <code>metadata == null</code> and
 * <code>id != null</code> also <code>metadata != null</code>.
 * @param content the {@link FileItemStream} of the MIME part representing
 * the content. If {@link FileItemStream#getContentType()} is compatible with
 * "multipart/*" than this will further parse for multiple parsed content
 * version. In any other case the contents of the parsed {@link FileItemStream}
 * will be directly add as content for the {@link ContentItem} created by
 * this method.
 * @param parsedContentParts used to add the IDs of parsed contentParts
 * @return the created content item
 * @throws IOException on any error while accessing the contents of the parsed
 * {@link FileItemStream}
 * @throws FileUploadException if the parsed contents are not correctly
 * encoded Multipart MIME
 */
private ContentItem createContentItem(IRI id, Graph metadata, FileItemStream content, Set<String> parsedContentParts) throws IOException, FileUploadException {
    MediaType partContentType = MediaType.valueOf(content.getContentType());
    ContentItem contentItem = null;
    ContentItemFactory ciFactory = getContentItemFactory();
    if (MULTIPART.isCompatible(partContentType)) {
        log.debug("  - multiple (alternate) ContentParts");
        // multiple contentParts are parsed
        FileItemIterator contentPartIterator = fu.getItemIterator(new MessageBodyReaderContext(content.openStream(), partContentType));
        while (contentPartIterator.hasNext()) {
            FileItemStream fis = contentPartIterator.next();
            if (contentItem == null) {
                log.debug("  - create ContentItem {} for content (type:{})", id, fis.getContentType());
                contentItem = ciFactory.createContentItem(id, new StreamSource(fis.openStream(), fis.getContentType()), metadata);
            } else {
                log.debug("  - create Blob for content (type:{})", fis.getContentType());
                Blob blob = ciFactory.createBlob(new StreamSource(fis.openStream(), fis.getContentType()));
                IRI contentPartId = null;
                if (fis.getFieldName() != null && !fis.getFieldName().isEmpty()) {
                    contentPartId = new IRI(fis.getFieldName());
                } else {
                    // generating a random ID might break metadata
                    // TODO maybe we should throw an exception instead
                    contentPartId = new IRI("urn:contentpart:" + randomUUID());
                }
                log.debug("    ... add Blob {} to ContentItem {} with content (type:{})", new Object[] { contentPartId, id, fis.getContentType() });
                contentItem.addPart(contentPartId, blob);
                parsedContentParts.add(contentPartId.getUnicodeString());
            }
        }
    } else {
        log.debug("  - create ContentItem {} for content (type:{})", id, content.getContentType());
        contentItem = ciFactory.createContentItem(id, new StreamSource(content.openStream(), content.getContentType()), metadata);
    }
    // add the URI of the main content to the parsed contentParts
    parsedContentParts.add(contentItem.getPartUri(0).getUnicodeString());
    return contentItem;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) ContentItemFactory(org.apache.stanbol.enhancer.servicesapi.ContentItemFactory) FileItemStream(org.apache.commons.fileupload.FileItemStream) StreamSource(org.apache.stanbol.enhancer.servicesapi.impl.StreamSource) MediaType(javax.ws.rs.core.MediaType) FileItemIterator(org.apache.commons.fileupload.FileItemIterator) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem)

Aggregations

IRI (org.apache.clerezza.commons.rdf.IRI)3 ContentItemFactory (org.apache.stanbol.enhancer.servicesapi.ContentItemFactory)3 MediaType (javax.ws.rs.core.MediaType)2 Graph (org.apache.clerezza.commons.rdf.Graph)2 FileItemIterator (org.apache.commons.fileupload.FileItemIterator)2 FileItemStream (org.apache.commons.fileupload.FileItemStream)2 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)2 StreamSource (org.apache.stanbol.enhancer.servicesapi.impl.StreamSource)2 ByteArrayInputStream (java.io.ByteArrayInputStream)1 IOException (java.io.IOException)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 URISyntaxException (java.net.URISyntaxException)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 List (java.util.List)1 WebApplicationException (javax.ws.rs.WebApplicationException)1 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)1 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)1 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)1 Parser (org.apache.clerezza.rdf.core.serializedform.Parser)1