Search in sources :

Example 6 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class TestHtmlExtractor method testRootExtraction.

/** This tests the merging of disconnected graphs under a single root
     * 
     * @throws Exception
     */
@Test
public void testRootExtraction() throws Exception {
    HtmlExtractor extractor = new HtmlExtractor(registry, parser);
    Graph model = new SimpleGraph();
    String testFile = "test-MultiRoot.html";
    // extract text from RDFa annotated html
    InputStream in = getResourceAsStream(testFile);
    assertNotNull("failed to load resource " + testFile, in);
    extractor.extract("file://" + testFile, in, null, "text/html", model);
    // show triples
    int tripleCounter = model.size();
    LOG.debug("Triples: {}", tripleCounter);
    printTriples(model);
    Set<BlankNodeOrIRI> roots = ClerezzaRDFUtils.findRoots(model);
    assertTrue(roots.size() > 1);
    ClerezzaRDFUtils.makeConnected(model, new IRI("file://" + testFile), new IRI(NIE_NS + "contains"));
    roots = ClerezzaRDFUtils.findRoots(model);
    assertEquals(1, roots.size());
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) InputStream(java.io.InputStream) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HtmlExtractor(org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor) Test(org.junit.Test)

Example 7 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class ClerezzaRDFUtils method urifyBlankNodes.

public static void urifyBlankNodes(Graph model) {
    HashMap<BlankNode, IRI> blankNodeMap = new HashMap<BlankNode, IRI>();
    Graph remove = new SimpleGraph();
    Graph add = new SimpleGraph();
    for (Triple t : model) {
        BlankNodeOrIRI subj = t.getSubject();
        RDFTerm obj = t.getObject();
        IRI pred = t.getPredicate();
        boolean match = false;
        if (subj instanceof BlankNode) {
            match = true;
            IRI ru = blankNodeMap.get(subj);
            if (ru == null) {
                ru = createRandomUri();
                blankNodeMap.put((BlankNode) subj, ru);
            }
            subj = ru;
        }
        if (obj instanceof BlankNode) {
            match = true;
            IRI ru = blankNodeMap.get(obj);
            if (ru == null) {
                ru = createRandomUri();
                blankNodeMap.put((BlankNode) obj, ru);
            }
            obj = ru;
        }
        if (match) {
            remove.add(t);
            add.add(new TripleImpl(subj, pred, obj));
        }
    }
    model.removeAll(remove);
    model.addAll(add);
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 8 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class RdfSerializingWriter method getRecipe.

private GraphNode getRecipe(String templatePath) {
    Graph rg = recipesGraphProvider.getRecipesGraph();
    GraphNode literalNode = new GraphNode(new PlainLiteralImpl(templatePath), rg);
    Iterator<GraphNode> recipes = literalNode.getSubjectNodes(RECIPES.recipeDomain);
    if (recipes.hasNext()) {
        return recipes.next();
    } else {
        return null;
    }
}
Also used : SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) GraphNode(org.apache.clerezza.rdf.utils.GraphNode)

Example 9 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class SparqlEndpointResource method sparql.

//TODO re-enable
/*@OPTIONS
    public Response handleCorsPreflight(@Context HttpHeaders headers) {
        ResponseBuilder res = Response.ok();
        enableCORS(servletContext, res, headers);
        return res.build();
    }*/
/**
     * HTTP GET service to execute SPARQL queries on {@link Graph}s registered to OSGi environment.
     * If a <code>null</code>, it is assumed that the request is coming from the HTML interface of SPARQL
     * endpoint. Otherwise the query is executed on the triple collection specified by <code>graphUri</code>.
     * But, if no graph uri is passed, then the triple collection having highest service.ranking value is
     * chosen.
     * 
     * Type of the result is determined according to type of the query such that if the specified query is
     * either a <b>describe query</b> or <b>construct query</b>, results are returned in
     * <b>application/rdf+xml</b> format, otherwise in <b>pplication/sparql-results+xml</b> format.
     * 
     * @param graphUri
     *            the URI of the graph on which the SPARQL query will be executed.
     * @param sparqlQuery
     *            SPARQL query to be executed
     * @param headers
     *            HTTP request Headers
     * @throws InvalidSyntaxException Invalid SPARQL Syntax Exception
     * @return Http Response 
     */
@GET
@Consumes(APPLICATION_FORM_URLENCODED)
@Produces({ TEXT_HTML + ";qs=2", "application/sparql-results+xml", "application/rdf+xml" })
public Response sparql(@QueryParam(value = "graphuri") String graphUri, @QueryParam(value = "query") String sparqlQuery, @Context HttpHeaders headers) throws InvalidSyntaxException {
    if (sparqlQuery == null) {
        populateGraphList(getServices(null));
        return Response.ok(new Viewable("index", this), TEXT_HTML).build();
    }
    String mediaType = "application/sparql-results+xml";
    Graph tripleCollection = getGraph(graphUri);
    ResponseBuilder rb;
    if (tripleCollection != null) {
        Object result;
        try {
            result = tcManager.executeSparqlQuery(sparqlQuery, tripleCollection);
            if (result instanceof Graph) {
                mediaType = "application/rdf+xml";
            }
            rb = Response.ok(result, mediaType);
        } catch (ParseException e) {
            rb = Response.status(Status.BAD_REQUEST).entity(e.getMessage());
        }
    } else {
        rb = Response.status(Status.NOT_FOUND).entity(String.format("There is no registered graph with given uri: %s", graphUri));
    }
    //addCORSOrigin(servletContext, rb, headers);
    return rb.build();
}
Also used : Graph(org.apache.clerezza.commons.rdf.Graph) Viewable(org.apache.stanbol.commons.web.viewable.Viewable) ParseException(org.apache.clerezza.rdf.core.sparql.ParseException) ResponseBuilder(javax.ws.rs.core.Response.ResponseBuilder) Consumes(javax.ws.rs.Consumes) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Example 10 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class ContentItemReader method readFrom.

@Override
public ContentItem readFrom(Class<ContentItem> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, String> httpHeaders, InputStream entityStream) throws IOException, WebApplicationException {
    //boolean withMetadata = withMetadata(httpHeaders);
    ContentItem contentItem = null;
    IRI contentItemId = getContentItemId();
    if (log.isTraceEnabled()) {
        //NOTE: enabling TRACE level logging will copy the parsed content
        //      into a BYTE array
        log.trace("Parse ContentItem from");
        log.trace("  - MediaType: {}", mediaType);
        log.trace("  - Headers:");
        for (Entry<String, List<String>> header : httpHeaders.entrySet()) {
            log.trace("      {}: {}", header.getKey(), header.getValue());
        }
        byte[] content = IOUtils.toByteArray(entityStream);
        log.trace("content: \n{}", new String(content, "UTF-8"));
        IOUtils.closeQuietly(entityStream);
        entityStream = new ByteArrayInputStream(content);
    }
    Set<String> parsedContentIds = new HashSet<String>();
    if (mediaType.isCompatible(MULTIPART)) {
        log.debug(" - parse Multipart MIME ContentItem");
        //try to read ContentItem from "multipart/from-data"
        Graph metadata = null;
        FileItemIterator fileItemIterator;
        try {
            fileItemIterator = fu.getItemIterator(new MessageBodyReaderContext(entityStream, mediaType));
            while (fileItemIterator.hasNext()) {
                FileItemStream fis = fileItemIterator.next();
                if (fis.getFieldName().equals("metadata")) {
                    if (contentItem != null) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("The Multipart MIME part with the 'metadata' " + "MUST BE before the MIME part containing the " + "'content'!").build());
                    }
                    //only used if not parsed as query param
                    if (contentItemId == null && fis.getName() != null && !fis.getName().isEmpty()) {
                        contentItemId = new IRI(fis.getName());
                    }
                    metadata = new IndexedGraph();
                    try {
                        getParser().parse(metadata, fis.openStream(), fis.getContentType());
                    } catch (Exception e) {
                        throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity(String.format("Unable to parse Metadata " + "from Multipart MIME part '%s' (" + "contentItem: %s| contentType: %s)", fis.getFieldName(), fis.getName(), fis.getContentType())).build());
                    }
                } else if (fis.getFieldName().equals("content")) {
                    contentItem = createContentItem(contentItemId, metadata, fis, parsedContentIds);
                } else if (fis.getFieldName().equals("properties") || fis.getFieldName().equals(REQUEST_PROPERTIES_URI.getUnicodeString())) {
                    //parse the RequestProperties
                    if (contentItem == null) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts for " + "Request Properties MUST BE after the " + "MIME parts for 'metadata' AND 'content'").build());
                    }
                    MediaType propMediaType = MediaType.valueOf(fis.getContentType());
                    if (!APPLICATION_JSON_TYPE.isCompatible(propMediaType)) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Request Properties (Multipart MIME parts" + "with the name '" + fis.getFieldName() + "') MUST " + "BE encoded as 'appicaltion/json' (encountered: '" + fis.getContentType() + "')!").build());
                    }
                    String propCharset = propMediaType.getParameters().get("charset");
                    if (propCharset == null) {
                        propCharset = "UTF-8";
                    }
                    Map<String, Object> reqProp = ContentItemHelper.initRequestPropertiesContentPart(contentItem);
                    try {
                        reqProp.putAll(toMap(new JSONObject(IOUtils.toString(fis.openStream(), propCharset))));
                    } catch (JSONException e) {
                        throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity("Unable to parse Request Properties from" + "Multipart MIME parts with the name 'properties'!").build());
                    }
                } else {
                    //additional metadata as serialised RDF
                    if (contentItem == null) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts for additional " + "contentParts MUST BE after the MIME " + "parts for 'metadata' AND 'content'").build());
                    }
                    if (fis.getFieldName() == null || fis.getFieldName().isEmpty()) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts representing " + "ContentParts for additional RDF metadata" + "MUST define the contentParts URI as" + "'name' of the MIME part!").build());
                    }
                    Graph graph = new IndexedGraph();
                    try {
                        getParser().parse(graph, fis.openStream(), fis.getContentType());
                    } catch (Exception e) {
                        throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity(String.format("Unable to parse RDF " + "for ContentPart '%s' ( contentType: %s)", fis.getName(), fis.getContentType())).build());
                    }
                    IRI contentPartId = new IRI(fis.getFieldName());
                    contentItem.addPart(contentPartId, graph);
                }
            }
            if (contentItem == null) {
                throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("The parsed multipart content item does not contain " + "any content. The content is expected to be contained " + "in a MIME part with the name 'content'. This part can " + " be also a 'multipart/alternate' if multiple content " + "parts need to be included in requests.").build());
            }
        } catch (FileUploadException e) {
            throw new WebApplicationException(e, Response.Status.BAD_REQUEST);
        }
    } else {
        //normal content
        ContentItemFactory ciFactory = getContentItemFactory();
        contentItem = ciFactory.createContentItem(contentItemId, new StreamSource(entityStream, mediaType.toString()));
        //add the URI of the main content
        parsedContentIds.add(contentItem.getPartUri(0).getUnicodeString());
    }
    //set the parsed contentIDs to the EnhancementProperties
    Map<String, Object> ep = ContentItemHelper.initRequestPropertiesContentPart(contentItem);
    parseEnhancementPropertiesFromParameters(ep);
    ep.put(PARSED_CONTENT_URIS, Collections.unmodifiableSet(parsedContentIds));
    //STANBOL-660: set the language of the content if explicitly parsed in the request
    String contentLanguage = getContentLanguage();
    if (!StringUtils.isBlank(contentLanguage)) {
        //language codes are case insensitive ... so we convert to lower case
        contentLanguage = contentLanguage.toLowerCase(Locale.ROOT);
        createParsedLanguageAnnotation(contentItem, contentLanguage);
    // previously only the dc:language property was set to the contentItem. However this
    // information is only used as fallback if no Language annotation is present. However
    // if a user explicitly parses the language he expects this language to be used
    // so this was change with STANBOL-1417
    //            EnhancementEngineHelper.set(contentItem.getMetadata(), contentItem.getUri(), 
    //                DC_LANGUAGE, new PlainLiteralImpl(contentLanguage));
    }
    return contentItem;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) ContentItemFactory(org.apache.stanbol.enhancer.servicesapi.ContentItemFactory) WebApplicationException(javax.ws.rs.WebApplicationException) StreamSource(org.apache.stanbol.enhancer.servicesapi.impl.StreamSource) JSONException(org.codehaus.jettison.json.JSONException) URISyntaxException(java.net.URISyntaxException) WebApplicationException(javax.ws.rs.WebApplicationException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) JSONException(org.codehaus.jettison.json.JSONException) FileUploadException(org.apache.commons.fileupload.FileUploadException) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) JSONObject(org.codehaus.jettison.json.JSONObject) ByteArrayInputStream(java.io.ByteArrayInputStream) FileItemStream(org.apache.commons.fileupload.FileItemStream) MediaType(javax.ws.rs.core.MediaType) List(java.util.List) ArrayList(java.util.ArrayList) JSONObject(org.codehaus.jettison.json.JSONObject) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) FileItemIterator(org.apache.commons.fileupload.FileItemIterator) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) FileUploadException(org.apache.commons.fileupload.FileUploadException) HashSet(java.util.HashSet)

Aggregations

Graph (org.apache.clerezza.commons.rdf.Graph)172 IRI (org.apache.clerezza.commons.rdf.IRI)110 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)66 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)57 Triple (org.apache.clerezza.commons.rdf.Triple)45 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)43 Test (org.junit.Test)38 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)36 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)34 IOException (java.io.IOException)27 ImmutableGraph (org.apache.clerezza.commons.rdf.ImmutableGraph)26 HashSet (java.util.HashSet)24 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)24 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)24 InputStream (java.io.InputStream)21 HashMap (java.util.HashMap)20 Language (org.apache.clerezza.commons.rdf.Language)17 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)17 ArrayList (java.util.ArrayList)16 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)15