Search in sources :

Example 91 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class ContentItemWriter method writeTo.

@Override
public void writeTo(ContentItem ci, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException {
    //(0) handle default dataType
    Map<String, Object> reqProp = ContentItemHelper.getRequestPropertiesContentPart(ci);
    boolean omitMetadata = isOmitMetadata(reqProp);
    if (!MULTIPART.isCompatible(mediaType)) {
        //two possible cases
        if (!omitMetadata) {
            //  (1) just return the RDF data
            //(1.a) Backward support for default dataType if no Accept header is set
            StringBuilder ctb = new StringBuilder();
            if (mediaType.isWildcardType() || TEXT_PLAIN_TYPE.isCompatible(mediaType) || APPLICATION_OCTET_STREAM_TYPE.isCompatible(mediaType)) {
                ctb.append(APPLICATION_LD_JSON);
            } else {
                ctb.append(mediaType.getType()).append('/').append(mediaType.getSubtype());
            }
            ctb.append(";charset=").append(UTF8.name());
            String contentType = ctb.toString();
            httpHeaders.putSingle(HttpHeaders.CONTENT_TYPE, contentType);
            try {
                serializer.serialize(entityStream, ci.getMetadata(), contentType);
            } catch (UnsupportedSerializationFormatException e) {
                throw new WebApplicationException("The enhancement results " + "cannot be serialized in the requested media type: " + mediaType.toString(), Response.Status.NOT_ACCEPTABLE);
            }
        } else {
            //  (2) return a single content part
            Entry<IRI, Blob> contentPart = getBlob(ci, Collections.singleton(mediaType.toString()));
            if (contentPart == null) {
                //no alternate content with the requeste media type
                throw new WebApplicationException("The requested enhancement chain has not created an " + "version of the parsed content in the reuqest media type " + mediaType.toString(), Response.Status.UNSUPPORTED_MEDIA_TYPE);
            } else {
                //found -> stream the content to the client
                //NOTE: This assumes that the presence of a charset
                //      implies reading/writing character streams
                String requestedCharset = mediaType.getParameters().get("charset");
                String blobCharset = contentPart.getValue().getParameter().get("charset");
                Charset readerCharset = blobCharset == null ? UTF8 : Charset.forName(blobCharset);
                Charset writerCharset = requestedCharset == null ? null : Charset.forName(requestedCharset);
                if (writerCharset != null && !writerCharset.equals(readerCharset)) {
                    //we need to transcode
                    Reader reader = new InputStreamReader(contentPart.getValue().getStream(), readerCharset);
                    Writer writer = new OutputStreamWriter(entityStream, writerCharset);
                    IOUtils.copy(reader, writer);
                    IOUtils.closeQuietly(reader);
                } else {
                    //no transcoding
                    if (requestedCharset == null && blobCharset != null) {
                        httpHeaders.putSingle(HttpHeaders.CONTENT_TYPE, mediaType.toString() + "; charset=" + blobCharset);
                    }
                    InputStream in = contentPart.getValue().getStream();
                    IOUtils.copy(in, entityStream);
                    IOUtils.closeQuietly(in);
                }
            }
        }
    } else {
        // multipart mime requested!
        final String charsetName = mediaType.getParameters().get("charset");
        final Charset charset = charsetName != null ? Charset.forName(charsetName) : UTF8;
        MediaType rdfFormat;
        String rdfFormatString = getRdfFormat(reqProp);
        if (rdfFormatString == null || rdfFormatString.isEmpty()) {
            rdfFormat = DEFAULT_RDF_FORMAT;
        } else {
            try {
                rdfFormat = MediaType.valueOf(rdfFormatString);
                if (rdfFormat.getParameters().get("charset") == null) {
                    //use the charset of the default RDF format
                    rdfFormat = new MediaType(rdfFormat.getType(), rdfFormat.getSubtype(), DEFAULT_RDF_FORMAT.getParameters());
                }
            } catch (IllegalArgumentException e) {
                throw new WebApplicationException("The specified RDF format '" + rdfFormatString + "' (used to serialize all RDF parts of " + "multipart MIME responses) is not a well formated MIME type", Response.Status.BAD_REQUEST);
            }
        }
        //(1) setting the correct header
        String contentType = String.format("%s/%s; charset=%s; boundary=%s", mediaType.getType(), mediaType.getSubtype(), charset.toString(), CONTENT_ITEM_BOUNDARY);
        httpHeaders.putSingle(HttpHeaders.CONTENT_TYPE, contentType);
        MultipartEntityBuilder entityBuilder = MultipartEntityBuilder.create();
        entityBuilder.setBoundary(CONTENT_ITEM_BOUNDARY);
        //(2) serialising the metadata
        if (!isOmitMetadata(reqProp)) {
            entityBuilder.addPart("metadata", new ClerezzaContentBody(ci.getUri().getUnicodeString(), ci.getMetadata(), rdfFormat));
        //                entity.addBodyPart(new FormBodyPart("metadata", new ClerezzaContentBody(
        //                    ci.getUri().getUnicodeString(), ci.getMetadata(),
        //                    rdfFormat)));
        }
        //(3) serialising the Content (Bloby)
        //(3.a) Filter based on parameter
        List<Entry<IRI, Blob>> includedBlobs = filterBlobs(ci, reqProp);
        //(3.b) Serialise the filtered
        if (!includedBlobs.isEmpty()) {
            Map<String, ContentBody> contentParts = new LinkedHashMap<String, ContentBody>();
            for (Entry<IRI, Blob> entry : includedBlobs) {
                Blob blob = entry.getValue();
                ContentType ct = ContentType.create(blob.getMimeType());
                String cs = blob.getParameter().get("charset");
                if (StringUtils.isNotBlank(cs)) {
                    ct = ct.withCharset(cs);
                }
                contentParts.put(entry.getKey().getUnicodeString(), new InputStreamBody(blob.getStream(), ct));
            }
            //add all the blobs
            entityBuilder.addPart("content", new MultipartContentBody(contentParts, CONTENT_PARTS_BOUNDERY, MULTIPART_ALTERNATE));
        }
        //else no content to include
        Set<String> includeContentParts = getIncludedContentPartURIs(reqProp);
        if (includeContentParts != null) {
            //(4) serialise the Request Properties
            if (includeContentParts.isEmpty() || includeContentParts.contains(REQUEST_PROPERTIES_URI.getUnicodeString())) {
                JSONObject object;
                try {
                    object = toJson(reqProp);
                } catch (JSONException e) {
                    String message = "Unable to convert Request Properties " + "to JSON (values : " + reqProp + ")!";
                    log.error(message, e);
                    throw new WebApplicationException(message, Response.Status.INTERNAL_SERVER_ERROR);
                }
                entityBuilder.addTextBody(REQUEST_PROPERTIES_URI.getUnicodeString(), object.toString(), ContentType.APPLICATION_JSON.withCharset(UTF8));
            }
            //(5) additional RDF metadata stored in contentParts
            for (Entry<IRI, Graph> entry : getContentParts(ci, Graph.class).entrySet()) {
                if (includeContentParts.isEmpty() || includeContentParts.contains(entry.getKey())) {
                    entityBuilder.addPart(entry.getKey().getUnicodeString(), new //no file name
                    ClerezzaContentBody(//no file name
                    null, entry.getValue(), rdfFormat));
                }
            // else ignore this content part
            }
        }
        entityBuilder.build().writeTo(entityStream);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) MultipartEntityBuilder(org.apache.http.entity.mime.MultipartEntityBuilder) WebApplicationException(javax.ws.rs.WebApplicationException) ContentType(org.apache.http.entity.ContentType) Reader(java.io.Reader) InputStreamReader(java.io.InputStreamReader) LinkedHashMap(java.util.LinkedHashMap) Entry(java.util.Map.Entry) MediaType(javax.ws.rs.core.MediaType) InputStreamBody(org.apache.http.entity.mime.content.InputStreamBody) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) ContentItemHelper.getBlob(org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper.getBlob) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) Charset(java.nio.charset.Charset) JSONException(org.codehaus.jettison.json.JSONException) UnsupportedSerializationFormatException(org.apache.clerezza.rdf.core.serializedform.UnsupportedSerializationFormatException) Graph(org.apache.clerezza.commons.rdf.Graph) AbstractContentBody(org.apache.http.entity.mime.content.AbstractContentBody) ContentBody(org.apache.http.entity.mime.content.ContentBody) JSONObject(org.codehaus.jettison.json.JSONObject) JSONObject(org.codehaus.jettison.json.JSONObject) OutputStreamWriter(java.io.OutputStreamWriter) MessageBodyWriter(javax.ws.rs.ext.MessageBodyWriter) Writer(java.io.Writer) OutputStreamWriter(java.io.OutputStreamWriter)

Example 92 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class ContentItemReaderWriterTest method testReader.

@Test
public void testReader() throws Exception {
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    MediaType contentType = serializeContentItem(out);
    ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
    ContentItem ci = ciReader.readFrom(ContentItem.class, null, null, contentType, null, in);
    //assert ID
    assertEquals(contentItem.getUri(), ci.getUri());
    //assert metadata
    Graph copy = new SimpleGraph();
    copy.addAll(contentItem.getMetadata());
    assertTrue(copy.removeAll(ci.getMetadata()));
    assertTrue(copy.isEmpty());
    //assert Blob
    assertEquals(contentItem.getBlob().getMimeType(), ci.getBlob().getMimeType());
    String content = IOUtils.toString(contentItem.getStream(), "UTF-8");
    String readContent = IOUtils.toString(ci.getStream(), "UTF-8");
    assertEquals(content, readContent);
    Iterator<Entry<IRI, Blob>> contentItemBlobsIt = ContentItemHelper.getContentParts(contentItem, Blob.class).entrySet().iterator();
    Iterator<Entry<IRI, Blob>> ciBlobsIt = ContentItemHelper.getContentParts(ci, Blob.class).entrySet().iterator();
    //later used to validate enhancementMetadata
    Set<String> expectedParsedContentIds = new HashSet<String>();
    while (contentItemBlobsIt.hasNext() && ciBlobsIt.hasNext()) {
        Entry<IRI, Blob> contentItemBlobPart = contentItemBlobsIt.next();
        Entry<IRI, Blob> ciBlobPart = ciBlobsIt.next();
        expectedParsedContentIds.add(ciBlobPart.getKey().getUnicodeString());
        assertEquals(contentItemBlobPart.getKey(), ciBlobPart.getKey());
        String partContentType = contentItemBlobPart.getValue().getMimeType();
        String readPartContentType = ciBlobPart.getValue().getMimeType();
        assertEquals(partContentType, readPartContentType);
        String partContent = IOUtils.toString(contentItemBlobPart.getValue().getStream(), "UTF-8");
        String readPartContent = IOUtils.toString(ciBlobPart.getValue().getStream(), "UTF-8");
        assertEquals(partContent, readPartContent);
    }
    //validate ExecutionMetadata
    Graph executionMetadata = contentItem.getPart(ExecutionMetadata.CHAIN_EXECUTION, Graph.class);
    Graph readExecutionMetadata = ci.getPart(ExecutionMetadata.CHAIN_EXECUTION, Graph.class);
    assertNotNull(executionMetadata);
    assertNotNull(readExecutionMetadata);
    assertEquals(executionMetadata.size(), readExecutionMetadata.size());
    //validate EnhancemetnProperties
    Map<String, Object> reqProp = ContentItemHelper.getRequestPropertiesContentPart(ci);
    assertNotNull(reqProp);
    //the parsed value MUST BE overridden by the two content parts parsed
    assertEquals(expectedParsedContentIds, getParsedContentURIs(reqProp));
    Collection<String> outputContent = getOutputContent(reqProp);
    assertEquals(1, outputContent.size());
    assertEquals(outputContent.iterator().next(), "*/*");
    Collection<String> outputContentPart = Collections.singleton("*");
    assertEquals(1, outputContentPart.size());
    assertEquals(outputContentPart.iterator().next(), "*");
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Entry(java.util.Map.Entry) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) ByteArrayInputStream(java.io.ByteArrayInputStream) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) MediaType(javax.ws.rs.core.MediaType) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 93 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class ContentItemReader method createParsedLanguageAnnotation.

/**
     * Creates a fise:TextAnnotation for the explicitly parsed Content-Language
     * header. The confidence of this annotation is set <code>1.0</code> (see 
     * <a href="https://issues.apache.org/jira/browse/STANBOL-1417">STANBOL-1417</a>).
     * @param ci the {@link ContentItem} to the the language annotation
     * @param lang the parsed language
     */
private void createParsedLanguageAnnotation(ContentItem ci, String lang) {
    Graph m = ci.getMetadata();
    IRI la = new IRI("urn:enhancement-" + EnhancementEngineHelper.randomUUID());
    //add the fise:Enhancement information
    m.add(new TripleImpl(la, RDF_TYPE, ENHANCER_ENHANCEMENT));
    m.add(new TripleImpl(la, RDF_TYPE, ENHANCER_TEXTANNOTATION));
    m.add(new TripleImpl(la, ENHANCER_EXTRACTED_FROM, ci.getUri()));
    m.add(new TripleImpl(la, DC_CREATED, lf.createTypedLiteral(new Date())));
    m.add(new TripleImpl(la, DC_CREATOR, lf.createTypedLiteral("Content-Language Header of the request")));
    //add fise:TextAnnotation information as expected by a Language annotation.
    m.add(new TripleImpl(la, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM));
    m.add(new TripleImpl(la, DC_LANGUAGE, new PlainLiteralImpl(lang)));
    //we set the confidence to 1.0^^xsd:double
    m.add(new TripleImpl(la, ENHANCER_CONFIDENCE, lf.createTypedLiteral(Double.valueOf(1.0f))));
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Date(java.util.Date)

Example 94 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class ContentItemBackendTest method testContentWithAdditionalMetadata.

@Test
public void testContentWithAdditionalMetadata() throws IOException, LDPathParseException {
    byte[] content = "text content".getBytes();
    IRI uri = ContentItemHelper.makeDefaultUrn(content);
    ContentItem contentItem = ciFactory.createContentItem(uri, new ByteArraySource(content, "text/plain; charset=UTF-8"));
    Graph tc = new SimpleGraph();
    Literal literal = LiteralFactory.getInstance().createTypedLiteral("Michael Jackson");
    IRI subject = new IRI("dummyUri");
    tc.add(new TripleImpl(subject, new IRI("http://xmlns.com/foaf/0.1/givenName"), literal));
    contentItem.addPart(new IRI(uri.getUnicodeString() + "_additionalMetadata"), tc);
    ContentItemBackend ciBackend = new ContentItemBackend(contentItem, true);
    LDPath<RDFTerm> ldPath = new LDPath<RDFTerm>(ciBackend, EnhancerLDPath.getConfig());
    Collection<RDFTerm> result = ldPath.pathQuery(subject, "foaf:givenName", null);
    assertTrue("Additional metadata cannot be found", result.contains(literal));
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) LDPath(org.apache.marmotta.ldpath.LDPath) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) Literal(org.apache.clerezza.commons.rdf.Literal) ContentItemBackend(org.apache.stanbol.enhancer.ldpath.backend.ContentItemBackend) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) ByteArraySource(org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource) Test(org.junit.Test)

Example 95 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class UsageExamples method readTestData.

@BeforeClass
public static void readTestData() throws IOException {
    //add the metadata
    ParsingProvider parser = new JenaParserProvider();
    //create the content Item with the HTML content
    Graph rdfData = parseRdfData(parser, "example.rdf.zip");
    IRI contentItemId = null;
    Iterator<Triple> it = rdfData.filter(null, Properties.ENHANCER_EXTRACTED_FROM, null);
    while (it.hasNext()) {
        RDFTerm r = it.next().getObject();
        if (contentItemId == null) {
            if (r instanceof IRI) {
                contentItemId = (IRI) r;
            }
        } else {
            assertEquals("multiple ContentItems IDs contained in the RDF test data", contentItemId, r);
        }
    }
    assertNotNull("RDF data doe not contain an Enhancement extracted form " + "the content item", contentItemId);
    InputStream in = getTestResource("example.txt");
    assertNotNull("Example Plain text content not found", in);
    byte[] textData = IOUtils.toByteArray(in);
    IOUtils.closeQuietly(in);
    ci = ciFactory.createContentItem(contentItemId, new ByteArraySource(textData, "text/html; charset=UTF-8"));
    ci.getMetadata().addAll(rdfData);
}
Also used : JenaParserProvider(org.apache.clerezza.rdf.jena.parser.JenaParserProvider) Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) ParsingProvider(org.apache.clerezza.rdf.core.serializedform.ParsingProvider) Graph(org.apache.clerezza.commons.rdf.Graph) InputStream(java.io.InputStream) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ByteArraySource(org.apache.stanbol.enhancer.servicesapi.impl.ByteArraySource) BeforeClass(org.junit.BeforeClass)

Aggregations

Graph (org.apache.clerezza.commons.rdf.Graph)172 IRI (org.apache.clerezza.commons.rdf.IRI)110 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)66 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)57 Triple (org.apache.clerezza.commons.rdf.Triple)45 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)43 Test (org.junit.Test)38 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)36 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)34 IOException (java.io.IOException)27 ImmutableGraph (org.apache.clerezza.commons.rdf.ImmutableGraph)26 HashSet (java.util.HashSet)24 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)24 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)24 InputStream (java.io.InputStream)21 HashMap (java.util.HashMap)20 Language (org.apache.clerezza.commons.rdf.Language)17 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)17 ArrayList (java.util.ArrayList)16 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)15