use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class ContentItemWriter method writeTo.
@Override
public void writeTo(ContentItem ci, Class<?> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream) throws IOException, WebApplicationException {
//(0) handle default dataType
Map<String, Object> reqProp = ContentItemHelper.getRequestPropertiesContentPart(ci);
boolean omitMetadata = isOmitMetadata(reqProp);
if (!MULTIPART.isCompatible(mediaType)) {
//two possible cases
if (!omitMetadata) {
// (1) just return the RDF data
//(1.a) Backward support for default dataType if no Accept header is set
StringBuilder ctb = new StringBuilder();
if (mediaType.isWildcardType() || TEXT_PLAIN_TYPE.isCompatible(mediaType) || APPLICATION_OCTET_STREAM_TYPE.isCompatible(mediaType)) {
ctb.append(APPLICATION_LD_JSON);
} else {
ctb.append(mediaType.getType()).append('/').append(mediaType.getSubtype());
}
ctb.append(";charset=").append(UTF8.name());
String contentType = ctb.toString();
httpHeaders.putSingle(HttpHeaders.CONTENT_TYPE, contentType);
try {
serializer.serialize(entityStream, ci.getMetadata(), contentType);
} catch (UnsupportedSerializationFormatException e) {
throw new WebApplicationException("The enhancement results " + "cannot be serialized in the requested media type: " + mediaType.toString(), Response.Status.NOT_ACCEPTABLE);
}
} else {
// (2) return a single content part
Entry<IRI, Blob> contentPart = getBlob(ci, Collections.singleton(mediaType.toString()));
if (contentPart == null) {
//no alternate content with the requeste media type
throw new WebApplicationException("The requested enhancement chain has not created an " + "version of the parsed content in the reuqest media type " + mediaType.toString(), Response.Status.UNSUPPORTED_MEDIA_TYPE);
} else {
//found -> stream the content to the client
//NOTE: This assumes that the presence of a charset
// implies reading/writing character streams
String requestedCharset = mediaType.getParameters().get("charset");
String blobCharset = contentPart.getValue().getParameter().get("charset");
Charset readerCharset = blobCharset == null ? UTF8 : Charset.forName(blobCharset);
Charset writerCharset = requestedCharset == null ? null : Charset.forName(requestedCharset);
if (writerCharset != null && !writerCharset.equals(readerCharset)) {
//we need to transcode
Reader reader = new InputStreamReader(contentPart.getValue().getStream(), readerCharset);
Writer writer = new OutputStreamWriter(entityStream, writerCharset);
IOUtils.copy(reader, writer);
IOUtils.closeQuietly(reader);
} else {
//no transcoding
if (requestedCharset == null && blobCharset != null) {
httpHeaders.putSingle(HttpHeaders.CONTENT_TYPE, mediaType.toString() + "; charset=" + blobCharset);
}
InputStream in = contentPart.getValue().getStream();
IOUtils.copy(in, entityStream);
IOUtils.closeQuietly(in);
}
}
}
} else {
// multipart mime requested!
final String charsetName = mediaType.getParameters().get("charset");
final Charset charset = charsetName != null ? Charset.forName(charsetName) : UTF8;
MediaType rdfFormat;
String rdfFormatString = getRdfFormat(reqProp);
if (rdfFormatString == null || rdfFormatString.isEmpty()) {
rdfFormat = DEFAULT_RDF_FORMAT;
} else {
try {
rdfFormat = MediaType.valueOf(rdfFormatString);
if (rdfFormat.getParameters().get("charset") == null) {
//use the charset of the default RDF format
rdfFormat = new MediaType(rdfFormat.getType(), rdfFormat.getSubtype(), DEFAULT_RDF_FORMAT.getParameters());
}
} catch (IllegalArgumentException e) {
throw new WebApplicationException("The specified RDF format '" + rdfFormatString + "' (used to serialize all RDF parts of " + "multipart MIME responses) is not a well formated MIME type", Response.Status.BAD_REQUEST);
}
}
//(1) setting the correct header
String contentType = String.format("%s/%s; charset=%s; boundary=%s", mediaType.getType(), mediaType.getSubtype(), charset.toString(), CONTENT_ITEM_BOUNDARY);
httpHeaders.putSingle(HttpHeaders.CONTENT_TYPE, contentType);
MultipartEntityBuilder entityBuilder = MultipartEntityBuilder.create();
entityBuilder.setBoundary(CONTENT_ITEM_BOUNDARY);
//(2) serialising the metadata
if (!isOmitMetadata(reqProp)) {
entityBuilder.addPart("metadata", new ClerezzaContentBody(ci.getUri().getUnicodeString(), ci.getMetadata(), rdfFormat));
// entity.addBodyPart(new FormBodyPart("metadata", new ClerezzaContentBody(
// ci.getUri().getUnicodeString(), ci.getMetadata(),
// rdfFormat)));
}
//(3) serialising the Content (Bloby)
//(3.a) Filter based on parameter
List<Entry<IRI, Blob>> includedBlobs = filterBlobs(ci, reqProp);
//(3.b) Serialise the filtered
if (!includedBlobs.isEmpty()) {
Map<String, ContentBody> contentParts = new LinkedHashMap<String, ContentBody>();
for (Entry<IRI, Blob> entry : includedBlobs) {
Blob blob = entry.getValue();
ContentType ct = ContentType.create(blob.getMimeType());
String cs = blob.getParameter().get("charset");
if (StringUtils.isNotBlank(cs)) {
ct = ct.withCharset(cs);
}
contentParts.put(entry.getKey().getUnicodeString(), new InputStreamBody(blob.getStream(), ct));
}
//add all the blobs
entityBuilder.addPart("content", new MultipartContentBody(contentParts, CONTENT_PARTS_BOUNDERY, MULTIPART_ALTERNATE));
}
//else no content to include
Set<String> includeContentParts = getIncludedContentPartURIs(reqProp);
if (includeContentParts != null) {
//(4) serialise the Request Properties
if (includeContentParts.isEmpty() || includeContentParts.contains(REQUEST_PROPERTIES_URI.getUnicodeString())) {
JSONObject object;
try {
object = toJson(reqProp);
} catch (JSONException e) {
String message = "Unable to convert Request Properties " + "to JSON (values : " + reqProp + ")!";
log.error(message, e);
throw new WebApplicationException(message, Response.Status.INTERNAL_SERVER_ERROR);
}
entityBuilder.addTextBody(REQUEST_PROPERTIES_URI.getUnicodeString(), object.toString(), ContentType.APPLICATION_JSON.withCharset(UTF8));
}
//(5) additional RDF metadata stored in contentParts
for (Entry<IRI, Graph> entry : getContentParts(ci, Graph.class).entrySet()) {
if (includeContentParts.isEmpty() || includeContentParts.contains(entry.getKey())) {
entityBuilder.addPart(entry.getKey().getUnicodeString(), new //no file name
ClerezzaContentBody(//no file name
null, entry.getValue(), rdfFormat));
}
// else ignore this content part
}
}
entityBuilder.build().writeTo(entityStream);
}
}
use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class ContentItemReaderWriterTest method testReader.
@Test
public void testReader() throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream();
MediaType contentType = serializeContentItem(out);
ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
ContentItem ci = ciReader.readFrom(ContentItem.class, null, null, contentType, null, in);
//assert ID
assertEquals(contentItem.getUri(), ci.getUri());
//assert metadata
Graph copy = new SimpleGraph();
copy.addAll(contentItem.getMetadata());
assertTrue(copy.removeAll(ci.getMetadata()));
assertTrue(copy.isEmpty());
//assert Blob
assertEquals(contentItem.getBlob().getMimeType(), ci.getBlob().getMimeType());
String content = IOUtils.toString(contentItem.getStream(), "UTF-8");
String readContent = IOUtils.toString(ci.getStream(), "UTF-8");
assertEquals(content, readContent);
Iterator<Entry<IRI, Blob>> contentItemBlobsIt = ContentItemHelper.getContentParts(contentItem, Blob.class).entrySet().iterator();
Iterator<Entry<IRI, Blob>> ciBlobsIt = ContentItemHelper.getContentParts(ci, Blob.class).entrySet().iterator();
//later used to validate enhancementMetadata
Set<String> expectedParsedContentIds = new HashSet<String>();
while (contentItemBlobsIt.hasNext() && ciBlobsIt.hasNext()) {
Entry<IRI, Blob> contentItemBlobPart = contentItemBlobsIt.next();
Entry<IRI, Blob> ciBlobPart = ciBlobsIt.next();
expectedParsedContentIds.add(ciBlobPart.getKey().getUnicodeString());
assertEquals(contentItemBlobPart.getKey(), ciBlobPart.getKey());
String partContentType = contentItemBlobPart.getValue().getMimeType();
String readPartContentType = ciBlobPart.getValue().getMimeType();
assertEquals(partContentType, readPartContentType);
String partContent = IOUtils.toString(contentItemBlobPart.getValue().getStream(), "UTF-8");
String readPartContent = IOUtils.toString(ciBlobPart.getValue().getStream(), "UTF-8");
assertEquals(partContent, readPartContent);
}
//validate ExecutionMetadata
Graph executionMetadata = contentItem.getPart(ExecutionMetadata.CHAIN_EXECUTION, Graph.class);
Graph readExecutionMetadata = ci.getPart(ExecutionMetadata.CHAIN_EXECUTION, Graph.class);
assertNotNull(executionMetadata);
assertNotNull(readExecutionMetadata);
assertEquals(executionMetadata.size(), readExecutionMetadata.size());
//validate EnhancemetnProperties
Map<String, Object> reqProp = ContentItemHelper.getRequestPropertiesContentPart(ci);
assertNotNull(reqProp);
//the parsed value MUST BE overridden by the two content parts parsed
assertEquals(expectedParsedContentIds, getParsedContentURIs(reqProp));
Collection<String> outputContent = getOutputContent(reqProp);
assertEquals(1, outputContent.size());
assertEquals(outputContent.iterator().next(), "*/*");
Collection<String> outputContentPart = Collections.singleton("*");
assertEquals(1, outputContentPart.size());
assertEquals(outputContentPart.iterator().next(), "*");
}
use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class ContentItemReader method createParsedLanguageAnnotation.
/**
* Creates a fise:TextAnnotation for the explicitly parsed Content-Language
* header. The confidence of this annotation is set <code>1.0</code> (see
* <a href="https://issues.apache.org/jira/browse/STANBOL-1417">STANBOL-1417</a>).
* @param ci the {@link ContentItem} to the the language annotation
* @param lang the parsed language
*/
private void createParsedLanguageAnnotation(ContentItem ci, String lang) {
Graph m = ci.getMetadata();
IRI la = new IRI("urn:enhancement-" + EnhancementEngineHelper.randomUUID());
//add the fise:Enhancement information
m.add(new TripleImpl(la, RDF_TYPE, ENHANCER_ENHANCEMENT));
m.add(new TripleImpl(la, RDF_TYPE, ENHANCER_TEXTANNOTATION));
m.add(new TripleImpl(la, ENHANCER_EXTRACTED_FROM, ci.getUri()));
m.add(new TripleImpl(la, DC_CREATED, lf.createTypedLiteral(new Date())));
m.add(new TripleImpl(la, DC_CREATOR, lf.createTypedLiteral("Content-Language Header of the request")));
//add fise:TextAnnotation information as expected by a Language annotation.
m.add(new TripleImpl(la, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM));
m.add(new TripleImpl(la, DC_LANGUAGE, new PlainLiteralImpl(lang)));
//we set the confidence to 1.0^^xsd:double
m.add(new TripleImpl(la, ENHANCER_CONFIDENCE, lf.createTypedLiteral(Double.valueOf(1.0f))));
}
use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class ContentItemBackendTest method testContentWithAdditionalMetadata.
@Test
public void testContentWithAdditionalMetadata() throws IOException, LDPathParseException {
byte[] content = "text content".getBytes();
IRI uri = ContentItemHelper.makeDefaultUrn(content);
ContentItem contentItem = ciFactory.createContentItem(uri, new ByteArraySource(content, "text/plain; charset=UTF-8"));
Graph tc = new SimpleGraph();
Literal literal = LiteralFactory.getInstance().createTypedLiteral("Michael Jackson");
IRI subject = new IRI("dummyUri");
tc.add(new TripleImpl(subject, new IRI("http://xmlns.com/foaf/0.1/givenName"), literal));
contentItem.addPart(new IRI(uri.getUnicodeString() + "_additionalMetadata"), tc);
ContentItemBackend ciBackend = new ContentItemBackend(contentItem, true);
LDPath<RDFTerm> ldPath = new LDPath<RDFTerm>(ciBackend, EnhancerLDPath.getConfig());
Collection<RDFTerm> result = ldPath.pathQuery(subject, "foaf:givenName", null);
assertTrue("Additional metadata cannot be found", result.contains(literal));
}
use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class UsageExamples method readTestData.
@BeforeClass
public static void readTestData() throws IOException {
//add the metadata
ParsingProvider parser = new JenaParserProvider();
//create the content Item with the HTML content
Graph rdfData = parseRdfData(parser, "example.rdf.zip");
IRI contentItemId = null;
Iterator<Triple> it = rdfData.filter(null, Properties.ENHANCER_EXTRACTED_FROM, null);
while (it.hasNext()) {
RDFTerm r = it.next().getObject();
if (contentItemId == null) {
if (r instanceof IRI) {
contentItemId = (IRI) r;
}
} else {
assertEquals("multiple ContentItems IDs contained in the RDF test data", contentItemId, r);
}
}
assertNotNull("RDF data doe not contain an Enhancement extracted form " + "the content item", contentItemId);
InputStream in = getTestResource("example.txt");
assertNotNull("Example Plain text content not found", in);
byte[] textData = IOUtils.toByteArray(in);
IOUtils.closeQuietly(in);
ci = ciFactory.createContentItem(contentItemId, new ByteArraySource(textData, "text/html; charset=UTF-8"));
ci.getMetadata().addAll(rdfData);
}
Aggregations