use of org.apache.stanbol.commons.indexedgraph.IndexedGraph in project stanbol by apache.
the class ContentItemReader method readFrom.
@Override
public ContentItem readFrom(Class<ContentItem> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, String> httpHeaders, InputStream entityStream) throws IOException, WebApplicationException {
//boolean withMetadata = withMetadata(httpHeaders);
ContentItem contentItem = null;
IRI contentItemId = getContentItemId();
if (log.isTraceEnabled()) {
//NOTE: enabling TRACE level logging will copy the parsed content
// into a BYTE array
log.trace("Parse ContentItem from");
log.trace(" - MediaType: {}", mediaType);
log.trace(" - Headers:");
for (Entry<String, List<String>> header : httpHeaders.entrySet()) {
log.trace(" {}: {}", header.getKey(), header.getValue());
}
byte[] content = IOUtils.toByteArray(entityStream);
log.trace("content: \n{}", new String(content, "UTF-8"));
IOUtils.closeQuietly(entityStream);
entityStream = new ByteArrayInputStream(content);
}
Set<String> parsedContentIds = new HashSet<String>();
if (mediaType.isCompatible(MULTIPART)) {
log.debug(" - parse Multipart MIME ContentItem");
//try to read ContentItem from "multipart/from-data"
Graph metadata = null;
FileItemIterator fileItemIterator;
try {
fileItemIterator = fu.getItemIterator(new MessageBodyReaderContext(entityStream, mediaType));
while (fileItemIterator.hasNext()) {
FileItemStream fis = fileItemIterator.next();
if (fis.getFieldName().equals("metadata")) {
if (contentItem != null) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("The Multipart MIME part with the 'metadata' " + "MUST BE before the MIME part containing the " + "'content'!").build());
}
//only used if not parsed as query param
if (contentItemId == null && fis.getName() != null && !fis.getName().isEmpty()) {
contentItemId = new IRI(fis.getName());
}
metadata = new IndexedGraph();
try {
getParser().parse(metadata, fis.openStream(), fis.getContentType());
} catch (Exception e) {
throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity(String.format("Unable to parse Metadata " + "from Multipart MIME part '%s' (" + "contentItem: %s| contentType: %s)", fis.getFieldName(), fis.getName(), fis.getContentType())).build());
}
} else if (fis.getFieldName().equals("content")) {
contentItem = createContentItem(contentItemId, metadata, fis, parsedContentIds);
} else if (fis.getFieldName().equals("properties") || fis.getFieldName().equals(REQUEST_PROPERTIES_URI.getUnicodeString())) {
//parse the RequestProperties
if (contentItem == null) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts for " + "Request Properties MUST BE after the " + "MIME parts for 'metadata' AND 'content'").build());
}
MediaType propMediaType = MediaType.valueOf(fis.getContentType());
if (!APPLICATION_JSON_TYPE.isCompatible(propMediaType)) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Request Properties (Multipart MIME parts" + "with the name '" + fis.getFieldName() + "') MUST " + "BE encoded as 'appicaltion/json' (encountered: '" + fis.getContentType() + "')!").build());
}
String propCharset = propMediaType.getParameters().get("charset");
if (propCharset == null) {
propCharset = "UTF-8";
}
Map<String, Object> reqProp = ContentItemHelper.initRequestPropertiesContentPart(contentItem);
try {
reqProp.putAll(toMap(new JSONObject(IOUtils.toString(fis.openStream(), propCharset))));
} catch (JSONException e) {
throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity("Unable to parse Request Properties from" + "Multipart MIME parts with the name 'properties'!").build());
}
} else {
//additional metadata as serialised RDF
if (contentItem == null) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts for additional " + "contentParts MUST BE after the MIME " + "parts for 'metadata' AND 'content'").build());
}
if (fis.getFieldName() == null || fis.getFieldName().isEmpty()) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts representing " + "ContentParts for additional RDF metadata" + "MUST define the contentParts URI as" + "'name' of the MIME part!").build());
}
Graph graph = new IndexedGraph();
try {
getParser().parse(graph, fis.openStream(), fis.getContentType());
} catch (Exception e) {
throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity(String.format("Unable to parse RDF " + "for ContentPart '%s' ( contentType: %s)", fis.getName(), fis.getContentType())).build());
}
IRI contentPartId = new IRI(fis.getFieldName());
contentItem.addPart(contentPartId, graph);
}
}
if (contentItem == null) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("The parsed multipart content item does not contain " + "any content. The content is expected to be contained " + "in a MIME part with the name 'content'. This part can " + " be also a 'multipart/alternate' if multiple content " + "parts need to be included in requests.").build());
}
} catch (FileUploadException e) {
throw new WebApplicationException(e, Response.Status.BAD_REQUEST);
}
} else {
//normal content
ContentItemFactory ciFactory = getContentItemFactory();
contentItem = ciFactory.createContentItem(contentItemId, new StreamSource(entityStream, mediaType.toString()));
//add the URI of the main content
parsedContentIds.add(contentItem.getPartUri(0).getUnicodeString());
}
//set the parsed contentIDs to the EnhancementProperties
Map<String, Object> ep = ContentItemHelper.initRequestPropertiesContentPart(contentItem);
parseEnhancementPropertiesFromParameters(ep);
ep.put(PARSED_CONTENT_URIS, Collections.unmodifiableSet(parsedContentIds));
//STANBOL-660: set the language of the content if explicitly parsed in the request
String contentLanguage = getContentLanguage();
if (!StringUtils.isBlank(contentLanguage)) {
//language codes are case insensitive ... so we convert to lower case
contentLanguage = contentLanguage.toLowerCase(Locale.ROOT);
createParsedLanguageAnnotation(contentItem, contentLanguage);
// previously only the dc:language property was set to the contentItem. However this
// information is only used as fallback if no Language annotation is present. However
// if a user explicitly parses the language he expects this language to be used
// so this was change with STANBOL-1417
// EnhancementEngineHelper.set(contentItem.getMetadata(), contentItem.getUri(),
// DC_LANGUAGE, new PlainLiteralImpl(contentLanguage));
}
return contentItem;
}
use of org.apache.stanbol.commons.indexedgraph.IndexedGraph in project stanbol by apache.
the class ContentItemResource method getPlacesAsJSON.
/**
* @return an RDF/JSON descriptions of places for the word map widget
*/
public String getPlacesAsJSON() throws ParseException, UnsupportedEncodingException {
Graph g = new IndexedGraph();
LiteralFactory lf = LiteralFactory.getInstance();
Graph metadata = contentItem.getMetadata();
for (EntityExtractionSummary p : getPlaceOccurrences()) {
EntitySuggestion bestGuess = p.getBestGuess();
if (bestGuess == null) {
continue;
}
IRI uri = new IRI(bestGuess.getUri());
Iterator<Triple> latitudes = metadata.filter(uri, GEO_LAT, null);
if (latitudes.hasNext()) {
g.add(latitudes.next());
}
Iterator<Triple> longitutes = metadata.filter(uri, GEO_LONG, null);
if (longitutes.hasNext()) {
g.add(longitutes.next());
g.add(new TripleImpl(uri, Properties.RDFS_LABEL, lf.createTypedLiteral(bestGuess.getLabel())));
}
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
serializer.serialize(out, g, SupportedFormat.RDF_JSON);
String rdfString = out.toString("utf-8");
return rdfString;
}
use of org.apache.stanbol.commons.indexedgraph.IndexedGraph in project stanbol by apache.
the class SparqlSearcher method find.
@Override
public final QueryResultList<Representation> find(FieldQuery parsedQuery) throws IOException {
long start = System.currentTimeMillis();
final SparqlFieldQuery query = SparqlFieldQueryFactory.getSparqlFieldQuery(parsedQuery);
String sparqlQuery = query.toSparqlConstruct();
long initEnd = System.currentTimeMillis();
log.debug(" > InitTime: " + (initEnd - start));
log.debug(" > SPARQL query:\n" + sparqlQuery);
InputStream in = SparqlEndpointUtils.sendSparqlRequest(getQueryUri(), sparqlQuery, DEFAULT_RDF_CONTENT_TYPE);
long queryEnd = System.currentTimeMillis();
log.debug(" > QueryTime: " + (queryEnd - initEnd));
if (in != null) {
Graph graph;
Graph rdfData = parser.parse(in, DEFAULT_RDF_CONTENT_TYPE, new IRI(getBaseUri()));
if (rdfData instanceof Graph) {
graph = (Graph) rdfData;
} else {
graph = new IndexedGraph(rdfData);
}
long parseEnd = System.currentTimeMillis();
log.debug(" > ParseTime: " + (parseEnd - queryEnd));
return new RdfQueryResultList(query, graph);
} else {
return null;
}
}
use of org.apache.stanbol.commons.indexedgraph.IndexedGraph in project stanbol by apache.
the class RepresentationReader method parseFromContent.
public Map<String, Representation> parseFromContent(RequestData content, MediaType acceptedMediaType) {
// (3) Parse the Representtion(s) form the entity stream
if (content.getMediaType().isCompatible(MediaType.APPLICATION_JSON_TYPE)) {
//parse from json
throw new UnsupportedOperationException("Parsing of JSON not yet implemented :(");
} else if (isSupported(content.getMediaType())) {
//from RDF serialisation
RdfValueFactory valueFactory = RdfValueFactory.getInstance();
Map<String, Representation> representations = new HashMap<String, Representation>();
Set<BlankNodeOrIRI> processed = new HashSet<BlankNodeOrIRI>();
Graph graph = new IndexedGraph();
try {
parser.parse(graph, content.getEntityStream(), content.getMediaType().toString());
} catch (UnsupportedParsingFormatException e) {
//String acceptedMediaType = httpHeaders.getFirst("Accept");
//throw an internal server Error, because we check in
//isReadable(..) for supported types and still we get here a
//unsupported format -> therefore it looks like an configuration
//error the server (e.g. a missing Bundle with the required bundle)
String message = "Unable to create the Parser for the supported format" + content.getMediaType() + " (" + e + ")";
log.error(message, e);
throw new WebApplicationException(Response.status(Status.INTERNAL_SERVER_ERROR).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
} catch (RuntimeException e) {
//NOTE: Clerezza seams not to provide specific exceptions on
// parsing errors. Hence the catch for all RuntimeException
String message = "Unable to parse the provided RDF data (format: " + content.getMediaType() + ", message: " + e.getMessage() + ")";
log.error(message, e);
throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
}
for (Iterator<Triple> st = graph.iterator(); st.hasNext(); ) {
BlankNodeOrIRI resource = st.next().getSubject();
if (resource instanceof IRI && processed.add(resource)) {
//build a new representation
representations.put(((IRI) resource).getUnicodeString(), valueFactory.createRdfRepresentation((IRI) resource, graph));
}
}
return representations;
} else {
//unsupported media type
String message = String.format("Parsed Content-Type '%s' is not one of the supported %s", content.getMediaType(), supportedMediaTypes);
log.info("Bad Request: {}", message);
throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
}
}
use of org.apache.stanbol.commons.indexedgraph.IndexedGraph in project stanbol by apache.
the class RdfResultListTest method testRdfResultSorting.
/**
* Providing a sorted Iteration over query results stored in an RDF
* graph is not something trivial. Therefore this test
*/
@Test
public void testRdfResultSorting() {
SortedMap<Double, RdfRepresentation> sorted = new TreeMap<Double, RdfRepresentation>();
Graph resultGraph = new IndexedGraph();
RdfValueFactory vf = new RdfValueFactory(resultGraph);
IRI resultListNode = new IRI(RdfResourceEnum.QueryResultSet.getUri());
IRI resultProperty = new IRI(RdfResourceEnum.queryResult.getUri());
for (int i = 0; i < 100; i++) {
Double rank;
do {
//avoid duplicate keys
rank = Math.random();
} while (sorted.containsKey(rank));
RdfRepresentation r = vf.createRepresentation("urn:sortTest:rep." + i);
//link the representation with the query result set
resultGraph.add(new TripleImpl(resultListNode, resultProperty, r.getNode()));
r.set(RdfResourceEnum.resultScore.getUri(), rank);
sorted.put(rank, r);
}
RdfQueryResultList resultList = new RdfQueryResultList(new FieldQueryImpl(), resultGraph);
if (log.isDebugEnabled()) {
log.debug("---DEBUG Sorting ---");
for (Iterator<Representation> it = resultList.iterator(); it.hasNext(); ) {
Representation r = it.next();
log.debug("{}: {}", r.getFirst(RdfResourceEnum.resultScore.getUri()), r.getId());
}
}
log.debug("---ASSERT Sorting ---");
for (Iterator<Representation> it = resultList.iterator(); it.hasNext(); ) {
Representation r = it.next();
Double lastkey = sorted.lastKey();
Representation last = sorted.get(lastkey);
Assert.assertEquals("score: " + r.getFirst(RdfResourceEnum.resultScore.getUri()) + " of Representation " + r.getId() + " is not as expected " + last.getFirst(RdfResourceEnum.resultScore.getUri()) + " of Representation " + last.getId() + "!", r, last);
sorted.remove(lastkey);
}
Assert.assertTrue(sorted.isEmpty());
}
Aggregations