use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class TestHtmlExtractor method testRootExtraction.
/** This tests the merging of disconnected graphs under a single root
*
* @throws Exception
*/
@Test
public void testRootExtraction() throws Exception {
HtmlExtractor extractor = new HtmlExtractor(registry, parser);
Graph model = new SimpleGraph();
String testFile = "test-MultiRoot.html";
// extract text from RDFa annotated html
InputStream in = getResourceAsStream(testFile);
assertNotNull("failed to load resource " + testFile, in);
extractor.extract("file://" + testFile, in, null, "text/html", model);
// show triples
int tripleCounter = model.size();
LOG.debug("Triples: {}", tripleCounter);
printTriples(model);
Set<BlankNodeOrIRI> roots = ClerezzaRDFUtils.findRoots(model);
assertTrue(roots.size() > 1);
ClerezzaRDFUtils.makeConnected(model, new IRI("file://" + testFile), new IRI(NIE_NS + "contains"));
roots = ClerezzaRDFUtils.findRoots(model);
assertEquals(1, roots.size());
}
use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class ClerezzaRDFUtils method urifyBlankNodes.
public static void urifyBlankNodes(Graph model) {
HashMap<BlankNode, IRI> blankNodeMap = new HashMap<BlankNode, IRI>();
Graph remove = new SimpleGraph();
Graph add = new SimpleGraph();
for (Triple t : model) {
BlankNodeOrIRI subj = t.getSubject();
RDFTerm obj = t.getObject();
IRI pred = t.getPredicate();
boolean match = false;
if (subj instanceof BlankNode) {
match = true;
IRI ru = blankNodeMap.get(subj);
if (ru == null) {
ru = createRandomUri();
blankNodeMap.put((BlankNode) subj, ru);
}
subj = ru;
}
if (obj instanceof BlankNode) {
match = true;
IRI ru = blankNodeMap.get(obj);
if (ru == null) {
ru = createRandomUri();
blankNodeMap.put((BlankNode) obj, ru);
}
obj = ru;
}
if (match) {
remove.add(t);
add.add(new TripleImpl(subj, pred, obj));
}
}
model.removeAll(remove);
model.addAll(add);
}
use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class RdfSerializingWriter method getRecipe.
private GraphNode getRecipe(String templatePath) {
Graph rg = recipesGraphProvider.getRecipesGraph();
GraphNode literalNode = new GraphNode(new PlainLiteralImpl(templatePath), rg);
Iterator<GraphNode> recipes = literalNode.getSubjectNodes(RECIPES.recipeDomain);
if (recipes.hasNext()) {
return recipes.next();
} else {
return null;
}
}
use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class SparqlEndpointResource method sparql.
//TODO re-enable
/*@OPTIONS
public Response handleCorsPreflight(@Context HttpHeaders headers) {
ResponseBuilder res = Response.ok();
enableCORS(servletContext, res, headers);
return res.build();
}*/
/**
* HTTP GET service to execute SPARQL queries on {@link Graph}s registered to OSGi environment.
* If a <code>null</code>, it is assumed that the request is coming from the HTML interface of SPARQL
* endpoint. Otherwise the query is executed on the triple collection specified by <code>graphUri</code>.
* But, if no graph uri is passed, then the triple collection having highest service.ranking value is
* chosen.
*
* Type of the result is determined according to type of the query such that if the specified query is
* either a <b>describe query</b> or <b>construct query</b>, results are returned in
* <b>application/rdf+xml</b> format, otherwise in <b>pplication/sparql-results+xml</b> format.
*
* @param graphUri
* the URI of the graph on which the SPARQL query will be executed.
* @param sparqlQuery
* SPARQL query to be executed
* @param headers
* HTTP request Headers
* @throws InvalidSyntaxException Invalid SPARQL Syntax Exception
* @return Http Response
*/
@GET
@Consumes(APPLICATION_FORM_URLENCODED)
@Produces({ TEXT_HTML + ";qs=2", "application/sparql-results+xml", "application/rdf+xml" })
public Response sparql(@QueryParam(value = "graphuri") String graphUri, @QueryParam(value = "query") String sparqlQuery, @Context HttpHeaders headers) throws InvalidSyntaxException {
if (sparqlQuery == null) {
populateGraphList(getServices(null));
return Response.ok(new Viewable("index", this), TEXT_HTML).build();
}
String mediaType = "application/sparql-results+xml";
Graph tripleCollection = getGraph(graphUri);
ResponseBuilder rb;
if (tripleCollection != null) {
Object result;
try {
result = tcManager.executeSparqlQuery(sparqlQuery, tripleCollection);
if (result instanceof Graph) {
mediaType = "application/rdf+xml";
}
rb = Response.ok(result, mediaType);
} catch (ParseException e) {
rb = Response.status(Status.BAD_REQUEST).entity(e.getMessage());
}
} else {
rb = Response.status(Status.NOT_FOUND).entity(String.format("There is no registered graph with given uri: %s", graphUri));
}
//addCORSOrigin(servletContext, rb, headers);
return rb.build();
}
use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.
the class ContentItemReader method readFrom.
@Override
public ContentItem readFrom(Class<ContentItem> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, String> httpHeaders, InputStream entityStream) throws IOException, WebApplicationException {
//boolean withMetadata = withMetadata(httpHeaders);
ContentItem contentItem = null;
IRI contentItemId = getContentItemId();
if (log.isTraceEnabled()) {
//NOTE: enabling TRACE level logging will copy the parsed content
// into a BYTE array
log.trace("Parse ContentItem from");
log.trace(" - MediaType: {}", mediaType);
log.trace(" - Headers:");
for (Entry<String, List<String>> header : httpHeaders.entrySet()) {
log.trace(" {}: {}", header.getKey(), header.getValue());
}
byte[] content = IOUtils.toByteArray(entityStream);
log.trace("content: \n{}", new String(content, "UTF-8"));
IOUtils.closeQuietly(entityStream);
entityStream = new ByteArrayInputStream(content);
}
Set<String> parsedContentIds = new HashSet<String>();
if (mediaType.isCompatible(MULTIPART)) {
log.debug(" - parse Multipart MIME ContentItem");
//try to read ContentItem from "multipart/from-data"
Graph metadata = null;
FileItemIterator fileItemIterator;
try {
fileItemIterator = fu.getItemIterator(new MessageBodyReaderContext(entityStream, mediaType));
while (fileItemIterator.hasNext()) {
FileItemStream fis = fileItemIterator.next();
if (fis.getFieldName().equals("metadata")) {
if (contentItem != null) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("The Multipart MIME part with the 'metadata' " + "MUST BE before the MIME part containing the " + "'content'!").build());
}
//only used if not parsed as query param
if (contentItemId == null && fis.getName() != null && !fis.getName().isEmpty()) {
contentItemId = new IRI(fis.getName());
}
metadata = new IndexedGraph();
try {
getParser().parse(metadata, fis.openStream(), fis.getContentType());
} catch (Exception e) {
throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity(String.format("Unable to parse Metadata " + "from Multipart MIME part '%s' (" + "contentItem: %s| contentType: %s)", fis.getFieldName(), fis.getName(), fis.getContentType())).build());
}
} else if (fis.getFieldName().equals("content")) {
contentItem = createContentItem(contentItemId, metadata, fis, parsedContentIds);
} else if (fis.getFieldName().equals("properties") || fis.getFieldName().equals(REQUEST_PROPERTIES_URI.getUnicodeString())) {
//parse the RequestProperties
if (contentItem == null) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts for " + "Request Properties MUST BE after the " + "MIME parts for 'metadata' AND 'content'").build());
}
MediaType propMediaType = MediaType.valueOf(fis.getContentType());
if (!APPLICATION_JSON_TYPE.isCompatible(propMediaType)) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Request Properties (Multipart MIME parts" + "with the name '" + fis.getFieldName() + "') MUST " + "BE encoded as 'appicaltion/json' (encountered: '" + fis.getContentType() + "')!").build());
}
String propCharset = propMediaType.getParameters().get("charset");
if (propCharset == null) {
propCharset = "UTF-8";
}
Map<String, Object> reqProp = ContentItemHelper.initRequestPropertiesContentPart(contentItem);
try {
reqProp.putAll(toMap(new JSONObject(IOUtils.toString(fis.openStream(), propCharset))));
} catch (JSONException e) {
throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity("Unable to parse Request Properties from" + "Multipart MIME parts with the name 'properties'!").build());
}
} else {
//additional metadata as serialised RDF
if (contentItem == null) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts for additional " + "contentParts MUST BE after the MIME " + "parts for 'metadata' AND 'content'").build());
}
if (fis.getFieldName() == null || fis.getFieldName().isEmpty()) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts representing " + "ContentParts for additional RDF metadata" + "MUST define the contentParts URI as" + "'name' of the MIME part!").build());
}
Graph graph = new IndexedGraph();
try {
getParser().parse(graph, fis.openStream(), fis.getContentType());
} catch (Exception e) {
throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity(String.format("Unable to parse RDF " + "for ContentPart '%s' ( contentType: %s)", fis.getName(), fis.getContentType())).build());
}
IRI contentPartId = new IRI(fis.getFieldName());
contentItem.addPart(contentPartId, graph);
}
}
if (contentItem == null) {
throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("The parsed multipart content item does not contain " + "any content. The content is expected to be contained " + "in a MIME part with the name 'content'. This part can " + " be also a 'multipart/alternate' if multiple content " + "parts need to be included in requests.").build());
}
} catch (FileUploadException e) {
throw new WebApplicationException(e, Response.Status.BAD_REQUEST);
}
} else {
//normal content
ContentItemFactory ciFactory = getContentItemFactory();
contentItem = ciFactory.createContentItem(contentItemId, new StreamSource(entityStream, mediaType.toString()));
//add the URI of the main content
parsedContentIds.add(contentItem.getPartUri(0).getUnicodeString());
}
//set the parsed contentIDs to the EnhancementProperties
Map<String, Object> ep = ContentItemHelper.initRequestPropertiesContentPart(contentItem);
parseEnhancementPropertiesFromParameters(ep);
ep.put(PARSED_CONTENT_URIS, Collections.unmodifiableSet(parsedContentIds));
//STANBOL-660: set the language of the content if explicitly parsed in the request
String contentLanguage = getContentLanguage();
if (!StringUtils.isBlank(contentLanguage)) {
//language codes are case insensitive ... so we convert to lower case
contentLanguage = contentLanguage.toLowerCase(Locale.ROOT);
createParsedLanguageAnnotation(contentItem, contentLanguage);
// previously only the dc:language property was set to the contentItem. However this
// information is only used as fallback if no Language annotation is present. However
// if a user explicitly parses the language he expects this language to be used
// so this was change with STANBOL-1417
// EnhancementEngineHelper.set(contentItem.getMetadata(), contentItem.getUri(),
// DC_LANGUAGE, new PlainLiteralImpl(contentLanguage));
}
return contentItem;
}
Aggregations