Search in sources :

Example 1 with IndexedGraph

use of org.apache.stanbol.commons.indexedgraph.IndexedGraph in project stanbol by apache.

the class ContentItemReader method readFrom.

@Override
public ContentItem readFrom(Class<ContentItem> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, String> httpHeaders, InputStream entityStream) throws IOException, WebApplicationException {
    //boolean withMetadata = withMetadata(httpHeaders);
    ContentItem contentItem = null;
    IRI contentItemId = getContentItemId();
    if (log.isTraceEnabled()) {
        //NOTE: enabling TRACE level logging will copy the parsed content
        //      into a BYTE array
        log.trace("Parse ContentItem from");
        log.trace("  - MediaType: {}", mediaType);
        log.trace("  - Headers:");
        for (Entry<String, List<String>> header : httpHeaders.entrySet()) {
            log.trace("      {}: {}", header.getKey(), header.getValue());
        }
        byte[] content = IOUtils.toByteArray(entityStream);
        log.trace("content: \n{}", new String(content, "UTF-8"));
        IOUtils.closeQuietly(entityStream);
        entityStream = new ByteArrayInputStream(content);
    }
    Set<String> parsedContentIds = new HashSet<String>();
    if (mediaType.isCompatible(MULTIPART)) {
        log.debug(" - parse Multipart MIME ContentItem");
        //try to read ContentItem from "multipart/from-data"
        Graph metadata = null;
        FileItemIterator fileItemIterator;
        try {
            fileItemIterator = fu.getItemIterator(new MessageBodyReaderContext(entityStream, mediaType));
            while (fileItemIterator.hasNext()) {
                FileItemStream fis = fileItemIterator.next();
                if (fis.getFieldName().equals("metadata")) {
                    if (contentItem != null) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("The Multipart MIME part with the 'metadata' " + "MUST BE before the MIME part containing the " + "'content'!").build());
                    }
                    //only used if not parsed as query param
                    if (contentItemId == null && fis.getName() != null && !fis.getName().isEmpty()) {
                        contentItemId = new IRI(fis.getName());
                    }
                    metadata = new IndexedGraph();
                    try {
                        getParser().parse(metadata, fis.openStream(), fis.getContentType());
                    } catch (Exception e) {
                        throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity(String.format("Unable to parse Metadata " + "from Multipart MIME part '%s' (" + "contentItem: %s| contentType: %s)", fis.getFieldName(), fis.getName(), fis.getContentType())).build());
                    }
                } else if (fis.getFieldName().equals("content")) {
                    contentItem = createContentItem(contentItemId, metadata, fis, parsedContentIds);
                } else if (fis.getFieldName().equals("properties") || fis.getFieldName().equals(REQUEST_PROPERTIES_URI.getUnicodeString())) {
                    //parse the RequestProperties
                    if (contentItem == null) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts for " + "Request Properties MUST BE after the " + "MIME parts for 'metadata' AND 'content'").build());
                    }
                    MediaType propMediaType = MediaType.valueOf(fis.getContentType());
                    if (!APPLICATION_JSON_TYPE.isCompatible(propMediaType)) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Request Properties (Multipart MIME parts" + "with the name '" + fis.getFieldName() + "') MUST " + "BE encoded as 'appicaltion/json' (encountered: '" + fis.getContentType() + "')!").build());
                    }
                    String propCharset = propMediaType.getParameters().get("charset");
                    if (propCharset == null) {
                        propCharset = "UTF-8";
                    }
                    Map<String, Object> reqProp = ContentItemHelper.initRequestPropertiesContentPart(contentItem);
                    try {
                        reqProp.putAll(toMap(new JSONObject(IOUtils.toString(fis.openStream(), propCharset))));
                    } catch (JSONException e) {
                        throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity("Unable to parse Request Properties from" + "Multipart MIME parts with the name 'properties'!").build());
                    }
                } else {
                    //additional metadata as serialised RDF
                    if (contentItem == null) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts for additional " + "contentParts MUST BE after the MIME " + "parts for 'metadata' AND 'content'").build());
                    }
                    if (fis.getFieldName() == null || fis.getFieldName().isEmpty()) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts representing " + "ContentParts for additional RDF metadata" + "MUST define the contentParts URI as" + "'name' of the MIME part!").build());
                    }
                    Graph graph = new IndexedGraph();
                    try {
                        getParser().parse(graph, fis.openStream(), fis.getContentType());
                    } catch (Exception e) {
                        throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity(String.format("Unable to parse RDF " + "for ContentPart '%s' ( contentType: %s)", fis.getName(), fis.getContentType())).build());
                    }
                    IRI contentPartId = new IRI(fis.getFieldName());
                    contentItem.addPart(contentPartId, graph);
                }
            }
            if (contentItem == null) {
                throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("The parsed multipart content item does not contain " + "any content. The content is expected to be contained " + "in a MIME part with the name 'content'. This part can " + " be also a 'multipart/alternate' if multiple content " + "parts need to be included in requests.").build());
            }
        } catch (FileUploadException e) {
            throw new WebApplicationException(e, Response.Status.BAD_REQUEST);
        }
    } else {
        //normal content
        ContentItemFactory ciFactory = getContentItemFactory();
        contentItem = ciFactory.createContentItem(contentItemId, new StreamSource(entityStream, mediaType.toString()));
        //add the URI of the main content
        parsedContentIds.add(contentItem.getPartUri(0).getUnicodeString());
    }
    //set the parsed contentIDs to the EnhancementProperties
    Map<String, Object> ep = ContentItemHelper.initRequestPropertiesContentPart(contentItem);
    parseEnhancementPropertiesFromParameters(ep);
    ep.put(PARSED_CONTENT_URIS, Collections.unmodifiableSet(parsedContentIds));
    //STANBOL-660: set the language of the content if explicitly parsed in the request
    String contentLanguage = getContentLanguage();
    if (!StringUtils.isBlank(contentLanguage)) {
        //language codes are case insensitive ... so we convert to lower case
        contentLanguage = contentLanguage.toLowerCase(Locale.ROOT);
        createParsedLanguageAnnotation(contentItem, contentLanguage);
    // previously only the dc:language property was set to the contentItem. However this
    // information is only used as fallback if no Language annotation is present. However
    // if a user explicitly parses the language he expects this language to be used
    // so this was change with STANBOL-1417
    //            EnhancementEngineHelper.set(contentItem.getMetadata(), contentItem.getUri(), 
    //                DC_LANGUAGE, new PlainLiteralImpl(contentLanguage));
    }
    return contentItem;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) ContentItemFactory(org.apache.stanbol.enhancer.servicesapi.ContentItemFactory) WebApplicationException(javax.ws.rs.WebApplicationException) StreamSource(org.apache.stanbol.enhancer.servicesapi.impl.StreamSource) JSONException(org.codehaus.jettison.json.JSONException) URISyntaxException(java.net.URISyntaxException) WebApplicationException(javax.ws.rs.WebApplicationException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) JSONException(org.codehaus.jettison.json.JSONException) FileUploadException(org.apache.commons.fileupload.FileUploadException) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) JSONObject(org.codehaus.jettison.json.JSONObject) ByteArrayInputStream(java.io.ByteArrayInputStream) FileItemStream(org.apache.commons.fileupload.FileItemStream) MediaType(javax.ws.rs.core.MediaType) List(java.util.List) ArrayList(java.util.ArrayList) JSONObject(org.codehaus.jettison.json.JSONObject) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) FileItemIterator(org.apache.commons.fileupload.FileItemIterator) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) FileUploadException(org.apache.commons.fileupload.FileUploadException) HashSet(java.util.HashSet)

Example 2 with IndexedGraph

use of org.apache.stanbol.commons.indexedgraph.IndexedGraph in project stanbol by apache.

the class ContentItemResource method getPlacesAsJSON.

/**
     * @return an RDF/JSON descriptions of places for the word map widget
     */
public String getPlacesAsJSON() throws ParseException, UnsupportedEncodingException {
    Graph g = new IndexedGraph();
    LiteralFactory lf = LiteralFactory.getInstance();
    Graph metadata = contentItem.getMetadata();
    for (EntityExtractionSummary p : getPlaceOccurrences()) {
        EntitySuggestion bestGuess = p.getBestGuess();
        if (bestGuess == null) {
            continue;
        }
        IRI uri = new IRI(bestGuess.getUri());
        Iterator<Triple> latitudes = metadata.filter(uri, GEO_LAT, null);
        if (latitudes.hasNext()) {
            g.add(latitudes.next());
        }
        Iterator<Triple> longitutes = metadata.filter(uri, GEO_LONG, null);
        if (longitutes.hasNext()) {
            g.add(longitutes.next());
            g.add(new TripleImpl(uri, Properties.RDFS_LABEL, lf.createTypedLiteral(bestGuess.getLabel())));
        }
    }
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    serializer.serialize(out, g, SupportedFormat.RDF_JSON);
    String rdfString = out.toString("utf-8");
    return rdfString;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ByteArrayOutputStream(java.io.ByteArrayOutputStream) EnhancementEngineHelper.getString(org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getString) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory)

Example 3 with IndexedGraph

use of org.apache.stanbol.commons.indexedgraph.IndexedGraph in project stanbol by apache.

the class SparqlSearcher method find.

@Override
public final QueryResultList<Representation> find(FieldQuery parsedQuery) throws IOException {
    long start = System.currentTimeMillis();
    final SparqlFieldQuery query = SparqlFieldQueryFactory.getSparqlFieldQuery(parsedQuery);
    String sparqlQuery = query.toSparqlConstruct();
    long initEnd = System.currentTimeMillis();
    log.debug("  > InitTime: " + (initEnd - start));
    log.debug("  > SPARQL query:\n" + sparqlQuery);
    InputStream in = SparqlEndpointUtils.sendSparqlRequest(getQueryUri(), sparqlQuery, DEFAULT_RDF_CONTENT_TYPE);
    long queryEnd = System.currentTimeMillis();
    log.debug("  > QueryTime: " + (queryEnd - initEnd));
    if (in != null) {
        Graph graph;
        Graph rdfData = parser.parse(in, DEFAULT_RDF_CONTENT_TYPE, new IRI(getBaseUri()));
        if (rdfData instanceof Graph) {
            graph = (Graph) rdfData;
        } else {
            graph = new IndexedGraph(rdfData);
        }
        long parseEnd = System.currentTimeMillis();
        log.debug("  > ParseTime: " + (parseEnd - queryEnd));
        return new RdfQueryResultList(query, graph);
    } else {
        return null;
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) RdfQueryResultList(org.apache.stanbol.entityhub.query.clerezza.RdfQueryResultList) InputStream(java.io.InputStream) SparqlFieldQuery(org.apache.stanbol.entityhub.query.sparql.SparqlFieldQuery) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph)

Example 4 with IndexedGraph

use of org.apache.stanbol.commons.indexedgraph.IndexedGraph in project stanbol by apache.

the class RepresentationReader method parseFromContent.

public Map<String, Representation> parseFromContent(RequestData content, MediaType acceptedMediaType) {
    // (3) Parse the Representtion(s) form the entity stream
    if (content.getMediaType().isCompatible(MediaType.APPLICATION_JSON_TYPE)) {
        //parse from json
        throw new UnsupportedOperationException("Parsing of JSON not yet implemented :(");
    } else if (isSupported(content.getMediaType())) {
        //from RDF serialisation
        RdfValueFactory valueFactory = RdfValueFactory.getInstance();
        Map<String, Representation> representations = new HashMap<String, Representation>();
        Set<BlankNodeOrIRI> processed = new HashSet<BlankNodeOrIRI>();
        Graph graph = new IndexedGraph();
        try {
            parser.parse(graph, content.getEntityStream(), content.getMediaType().toString());
        } catch (UnsupportedParsingFormatException e) {
            //String acceptedMediaType = httpHeaders.getFirst("Accept");
            //throw an internal server Error, because we check in
            //isReadable(..) for supported types and still we get here a
            //unsupported format -> therefore it looks like an configuration
            //error the server (e.g. a missing Bundle with the required bundle)
            String message = "Unable to create the Parser for the supported format" + content.getMediaType() + " (" + e + ")";
            log.error(message, e);
            throw new WebApplicationException(Response.status(Status.INTERNAL_SERVER_ERROR).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
        } catch (RuntimeException e) {
            //NOTE: Clerezza seams not to provide specific exceptions on
            //      parsing errors. Hence the catch for all RuntimeException
            String message = "Unable to parse the provided RDF data (format: " + content.getMediaType() + ", message: " + e.getMessage() + ")";
            log.error(message, e);
            throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
        }
        for (Iterator<Triple> st = graph.iterator(); st.hasNext(); ) {
            BlankNodeOrIRI resource = st.next().getSubject();
            if (resource instanceof IRI && processed.add(resource)) {
                //build a new representation
                representations.put(((IRI) resource).getUnicodeString(), valueFactory.createRdfRepresentation((IRI) resource, graph));
            }
        }
        return representations;
    } else {
        //unsupported media type
        String message = String.format("Parsed Content-Type '%s' is not one of the supported %s", content.getMediaType(), supportedMediaTypes);
        log.info("Bad Request: {}", message);
        throw new WebApplicationException(Response.status(Status.BAD_REQUEST).entity(message).header(HttpHeaders.ACCEPT, acceptedMediaType).build());
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashSet(java.util.HashSet) Set(java.util.Set) WebApplicationException(javax.ws.rs.WebApplicationException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) Iterator(java.util.Iterator) RdfValueFactory(org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) HashMap(java.util.HashMap) Map(java.util.Map) MultivaluedMap(javax.ws.rs.core.MultivaluedMap) UnsupportedParsingFormatException(org.apache.clerezza.rdf.core.serializedform.UnsupportedParsingFormatException)

Example 5 with IndexedGraph

use of org.apache.stanbol.commons.indexedgraph.IndexedGraph in project stanbol by apache.

the class RdfResultListTest method testRdfResultSorting.

/**
     * Providing a sorted Iteration over query results stored in an RDF
     * graph is not something trivial. Therefore this test
     */
@Test
public void testRdfResultSorting() {
    SortedMap<Double, RdfRepresentation> sorted = new TreeMap<Double, RdfRepresentation>();
    Graph resultGraph = new IndexedGraph();
    RdfValueFactory vf = new RdfValueFactory(resultGraph);
    IRI resultListNode = new IRI(RdfResourceEnum.QueryResultSet.getUri());
    IRI resultProperty = new IRI(RdfResourceEnum.queryResult.getUri());
    for (int i = 0; i < 100; i++) {
        Double rank;
        do {
            //avoid duplicate keys
            rank = Math.random();
        } while (sorted.containsKey(rank));
        RdfRepresentation r = vf.createRepresentation("urn:sortTest:rep." + i);
        //link the representation with the query result set
        resultGraph.add(new TripleImpl(resultListNode, resultProperty, r.getNode()));
        r.set(RdfResourceEnum.resultScore.getUri(), rank);
        sorted.put(rank, r);
    }
    RdfQueryResultList resultList = new RdfQueryResultList(new FieldQueryImpl(), resultGraph);
    if (log.isDebugEnabled()) {
        log.debug("---DEBUG Sorting ---");
        for (Iterator<Representation> it = resultList.iterator(); it.hasNext(); ) {
            Representation r = it.next();
            log.debug("{}: {}", r.getFirst(RdfResourceEnum.resultScore.getUri()), r.getId());
        }
    }
    log.debug("---ASSERT Sorting ---");
    for (Iterator<Representation> it = resultList.iterator(); it.hasNext(); ) {
        Representation r = it.next();
        Double lastkey = sorted.lastKey();
        Representation last = sorted.get(lastkey);
        Assert.assertEquals("score: " + r.getFirst(RdfResourceEnum.resultScore.getUri()) + " of Representation " + r.getId() + " is not as expected " + last.getFirst(RdfResourceEnum.resultScore.getUri()) + " of Representation " + last.getId() + "!", r, last);
        sorted.remove(lastkey);
    }
    Assert.assertTrue(sorted.isEmpty());
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) RdfRepresentation(org.apache.stanbol.entityhub.model.clerezza.RdfRepresentation) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) TreeMap(java.util.TreeMap) FieldQueryImpl(org.apache.stanbol.entityhub.core.query.FieldQueryImpl) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) RdfRepresentation(org.apache.stanbol.entityhub.model.clerezza.RdfRepresentation) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) RdfValueFactory(org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory) Test(org.junit.Test)

Aggregations

IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)34 Graph (org.apache.clerezza.commons.rdf.Graph)27 IRI (org.apache.clerezza.commons.rdf.IRI)20 HashSet (java.util.HashSet)9 InputStream (java.io.InputStream)8 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)8 RdfValueFactory (org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory)8 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)8 Triple (org.apache.clerezza.commons.rdf.Triple)7 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)6 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)5 RdfQueryResultList (org.apache.stanbol.entityhub.query.clerezza.RdfQueryResultList)5 Test (org.junit.Test)5 EntityhubLDPath (org.apache.stanbol.entityhub.ldpath.EntityhubLDPath)4 RdfRepresentation (org.apache.stanbol.entityhub.model.clerezza.RdfRepresentation)4 Entity (org.apache.stanbol.entityhub.servicesapi.model.Entity)4 BeforeClass (org.junit.BeforeClass)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 IOException (java.io.IOException)3 ResponseBuilder (javax.ws.rs.core.Response.ResponseBuilder)3