Search in sources :

Example 96 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class Nif20MetadataEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    AnalysedText at = getAnalysedText(this, ci, true);
    String lang = EnhancementEngineHelper.getLanguage(ci);
    Language language = lang == null ? null : new Language(lang);
    //now iterate over the AnalysedText data and create the RDF representation
    //TODO: make configureable
    boolean sentences = true;
    boolean phrases = true;
    boolean words = true;
    EnumSet<SpanTypeEnum> activeTypes = EnumSet.noneOf(SpanTypeEnum.class);
    if (sentences) {
        activeTypes.add(SpanTypeEnum.Sentence);
    }
    if (phrases) {
        activeTypes.add(SpanTypeEnum.Chunk);
    }
    if (words) {
        activeTypes.add(SpanTypeEnum.Token);
    }
    Graph metadata = ci.getMetadata();
    IRI base = ci.getUri();
    ci.getLock().writeLock().lock();
    try {
        //write the context
        IRI text = writeSpan(metadata, base, at, language, at);
        metadata.add(new TripleImpl(text, Nif20.sourceUrl.getUri(), ci.getUri()));
        Iterator<Span> spans = at.getEnclosed(activeTypes);
        IRI sentence = null;
        IRI phrase = null;
        IRI word = null;
        boolean firstWordInSentence = true;
        while (spans.hasNext()) {
            Span span = spans.next();
            //TODO: filter Spans based on additional requirements
            //(1) write generic information about the span
            IRI current = writeSpan(metadata, base, at, language, span);
            //write the context
            metadata.add(new TripleImpl(current, Nif20.referenceContext.getUri(), text));
            //(2) add the relations between the different spans
            switch(span.getType()) {
                case Sentence:
                    if (sentence != null && writePrevNext) {
                        metadata.add(new TripleImpl(sentence, Nif20.nextSentence.getUri(), current));
                        metadata.add(new TripleImpl(current, Nif20.previousSentence.getUri(), sentence));
                    }
                    if (word != null) {
                        metadata.add(new TripleImpl(sentence, Nif20.lastWord.getUri(), word));
                    }
                    sentence = current;
                    firstWordInSentence = true;
                    break;
                case Chunk:
                    if (sentence != null && writeHierary) {
                        metadata.add(new TripleImpl(current, Nif20.superString.getUri(), sentence));
                    }
                    phrase = current;
                    break;
                case Token:
                    if (sentence != null) {
                        if (writeHierary) {
                            metadata.add(new TripleImpl(current, Nif20.sentence.getUri(), sentence));
                        }
                        //metadata.add(new TripleImpl(sentence, Nif20.word.getUri(), current));
                        if (firstWordInSentence) {
                            metadata.add(new TripleImpl(sentence, Nif20.firstWord.getUri(), current));
                            firstWordInSentence = false;
                        }
                    }
                    if (writeHierary && phrase != null && !phrase.equals(current)) {
                        metadata.add(new TripleImpl(current, Nif20.subString.getUri(), phrase));
                    }
                    if (word != null && writePrevNext) {
                        metadata.add(new TripleImpl(word, Nif20.nextWord.getUri(), current));
                        metadata.add(new TripleImpl(current, Nif20.previousWord.getUri(), word));
                    }
                    word = current;
                    break;
                default:
                    break;
            }
            //(3) add specific information such as POS, chunk type ...
            Nif20Helper.writePhrase(metadata, span, current);
            Nif20Helper.writePos(metadata, span, current);
            //TODO: sentiment support
            Value<Double> sentiment = span.getAnnotation(NlpAnnotations.SENTIMENT_ANNOTATION);
            if (sentiment != null && sentiment.value() != null) {
                metadata.add(new TripleImpl(current, SENTIMENT_PROPERTY, lf.createTypedLiteral(sentiment.value())));
            }
        }
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) SpanTypeEnum(org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum) Span(org.apache.stanbol.enhancer.nlp.model.Span) NlpEngineHelper.getAnalysedText(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getAnalysedText) AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 97 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class MetaxaEngine method computeEnhancements.

public void computeEnhancements(ContentItem ci) throws EngineException {
    // get model from the extraction
    URIImpl docId;
    Model m = null;
    ci.getLock().readLock().lock();
    try {
        docId = new URIImpl(ci.getUri().getUnicodeString());
        m = this.extractor.extract(ci.getStream(), docId, ci.getMimeType());
    } catch (ExtractorException e) {
        throw new EngineException("Error while processing ContentItem " + ci.getUri() + " with Metaxa", e);
    } catch (IOException e) {
        throw new EngineException("Error while processing ContentItem " + ci.getUri() + " with Metaxa", e);
    } finally {
        ci.getLock().readLock().unlock();
    }
    // the extracted plain text from the model
    if (null == m) {
        log.debug("Unable to preocess ContentItem {} (mime type {}) with Metaxa", ci.getUri(), ci.getMimeType());
        return;
    }
    ContentSink plainTextSink;
    try {
        plainTextSink = ciFactory.createContentSink("text/plain");
    } catch (IOException e) {
        m.close();
        throw new EngineException("Unable to initialise Blob for storing" + "the plain text content", e);
    }
    HashMap<BlankNode, BlankNode> blankNodeMap = new HashMap<BlankNode, BlankNode>();
    RDF2GoUtils.urifyBlankNodes(m);
    ClosableIterator<Statement> it = m.iterator();
    BufferedWriter out = new BufferedWriter(new OutputStreamWriter(plainTextSink.getOutputStream(), UTF8));
    //used to detect if some text was extracted
    boolean textExtracted = false;
    try {
        //first add to a temporary graph
        Graph g = new SimpleGraph();
        while (it.hasNext()) {
            Statement oneStmt = it.next();
            //the plain text Blob!
            if (oneStmt.getSubject().equals(docId) && oneStmt.getPredicate().equals(NIE_PLAINTEXT_PROPERTY)) {
                String text = oneStmt.getObject().toString();
                if (text != null && !text.isEmpty()) {
                    try {
                        out.write(oneStmt.getObject().toString());
                    } catch (IOException e) {
                        throw new EngineException("Unable to write extracted" + "plain text to Blob (blob impl: " + plainTextSink.getBlob().getClass() + ")", e);
                    }
                    textExtracted = true;
                    if (includeText) {
                        BlankNodeOrIRI subject = (BlankNodeOrIRI) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
                        IRI predicate = (IRI) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
                        RDFTerm object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
                        g.add(new TripleImpl(subject, predicate, object));
                    }
                }
            } else {
                //add metadata to the metadata of the contentItem
                BlankNodeOrIRI subject = (BlankNodeOrIRI) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
                IRI predicate = (IRI) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
                RDFTerm object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
                if (null != subject && null != predicate && null != object) {
                    Triple t = new TripleImpl(subject, predicate, object);
                    g.add(t);
                    log.debug("added " + t.toString());
                }
            }
        }
        //add the extracted triples to the metadata of the ContentItem
        ci.getLock().writeLock().lock();
        try {
            ci.getMetadata().addAll(g);
            g = null;
        } finally {
            ci.getLock().writeLock().unlock();
        }
    } finally {
        it.close();
        m.close();
        IOUtils.closeQuietly(out);
    }
    if (textExtracted) {
        //add plain text to the content item
        IRI blobUri = new IRI("urn:metaxa:plain-text:" + randomUUID());
        ci.addPart(blobUri, plainTextSink.getBlob());
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashMap(java.util.HashMap) Statement(org.ontoware.rdf2go.model.Statement) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) BlankNode(org.ontoware.rdf2go.model.node.BlankNode) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) URIImpl(org.ontoware.rdf2go.model.node.impl.URIImpl) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) Triple(org.apache.clerezza.commons.rdf.Triple) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) Model(org.ontoware.rdf2go.model.Model) ExtractorException(org.semanticdesktop.aperture.extractor.ExtractorException) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) OutputStreamWriter(java.io.OutputStreamWriter) ContentSink(org.apache.stanbol.enhancer.servicesapi.ContentSink) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 98 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class RootResource method getGraph.

private Graph getGraph(String ontologyId, boolean merged, URI requestUri) {
    long before = System.currentTimeMillis();
    OWLOntologyID key = OntologyUtils.decode(ontologyId);
    log.debug("Will try to retrieve ontology {} from provider.", key);
    /*
         * Export directly to Graph since the OWLOntologyWriter uses (de-)serializing converters for the
         * other formats.
         * 
         * Use oTemp for the "real" graph and o for the graph that will be exported. This is due to the fact
         * that in o we want to change import statements, but we do not want these changes to be stored
         * permanently.
         */
    Graph o = null, oTemp = null;
    try {
        oTemp = ontologyProvider.getStoredOntology(key, Graph.class, merged);
    } catch (Exception ex) {
        log.warn("Retrieval of ontology with ID " + key + " failed.", ex);
    }
    if (oTemp == null) {
        log.debug("Ontology {} missing from provider. Trying libraries...", key);
        // TODO remove once registry supports OWLOntologyID as public key.
        IRI iri = URIUtils.sanitize(IRI.create(ontologyId));
        // See if we can touch a library. TODO: replace with event model on the ontology provider.
        int minSize = -1;
        IRI smallest = null;
        for (Library lib : registryManager.getLibraries(iri)) {
            int size = lib.getChildren().length;
            if (minSize < 1 || size < minSize) {
                smallest = lib.getIRI();
                minSize = size;
            }
        }
        if (smallest != null) {
            log.debug("Selected library for ontology {} is {} .", iri, smallest);
            try {
                oTemp = registryManager.getLibrary(smallest).getOntology(iri, Graph.class);
            } catch (RegistryContentException e) {
                log.warn("The content of library " + smallest + " could not be accessed.", e);
            }
        }
    }
    // resource-intensive IndexedGraph, since both o and oTemp will be GC'ed after serialization.
    if (oTemp != null) {
        o = new SimpleGraph(oTemp);
    }
    if (o == null) {
        log.debug("Ontology {} not found in any ontology provider or library.", ontologyId);
        return null;
    }
    log.debug("Retrieved ontology {} .", ontologyId);
    // Rewrite imports
    String uri = uriInfo.getRequestUri().toString();
    URI base = URI.create(uri.substring(0, uri.lastIndexOf(ontologyId) - 1));
    // Rewrite import statements
    /*
         * TODO manage import rewrites better once the container ID is fully configurable (i.e. instead of
         * going upOne() add "session" or "ontology" if needed).
         */
    Iterator<Triple> imports = o.filter(null, OWL.imports, null);
    Set<Triple> oldImports = new HashSet<Triple>();
    while (imports.hasNext()) {
        oldImports.add(imports.next());
    }
    for (Triple t : oldImports) {
        // construct new statement
        String s = ((org.apache.clerezza.commons.rdf.IRI) t.getObject()).getUnicodeString();
        if (s.contains("::")) {
            s = s.substring(s.indexOf("::") + 2, s.length());
        }
        org.apache.clerezza.commons.rdf.IRI target = new org.apache.clerezza.commons.rdf.IRI(base + "/" + s);
        o.add(new TripleImpl(t.getSubject(), OWL.imports, target));
        // remove old statement
        o.remove(t);
    }
    // Versioning.
    OWLOntologyID id = OWLUtils.extractOntologyID(o);
    if (id != null && !id.isAnonymous() && id.getVersionIRI() == null) {
        org.apache.clerezza.commons.rdf.IRI viri = new org.apache.clerezza.commons.rdf.IRI(requestUri.toString());
        log.debug("Setting version IRI for export : {}", viri);
        o.add(new TripleImpl(new org.apache.clerezza.commons.rdf.IRI(id.getOntologyIRI().toString()), new org.apache.clerezza.commons.rdf.IRI(OWL2Constants.OWL_VERSION_IRI), viri));
    }
    log.debug("Exported as Clerezza ImmutableGraph in {} ms. Handing over to writer.", System.currentTimeMillis() - before);
    return o;
}
Also used : IRI(org.semanticweb.owlapi.model.IRI) URI(java.net.URI) UnsupportedFormatException(org.apache.clerezza.rdf.core.serializedform.UnsupportedFormatException) WebApplicationException(javax.ws.rs.WebApplicationException) ConcurrentModificationException(java.util.ConcurrentModificationException) IOException(java.io.IOException) OntologyLoadingException(org.apache.stanbol.ontologymanager.servicesapi.ontology.OntologyLoadingException) OWLOntologyCreationException(org.semanticweb.owlapi.model.OWLOntologyCreationException) OntologyHandleException(org.apache.stanbol.ontologymanager.servicesapi.ontology.OntologyHandleException) RegistryContentException(org.apache.stanbol.ontologymanager.registry.api.RegistryContentException) OrphanOntologyKeyException(org.apache.stanbol.ontologymanager.servicesapi.ontology.OrphanOntologyKeyException) Triple(org.apache.clerezza.commons.rdf.Triple) ImmutableGraph(org.apache.clerezza.commons.rdf.ImmutableGraph) Graph(org.apache.clerezza.commons.rdf.Graph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) OWLOntologyID(org.semanticweb.owlapi.model.OWLOntologyID) RegistryContentException(org.apache.stanbol.ontologymanager.registry.api.RegistryContentException) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Library(org.apache.stanbol.ontologymanager.registry.api.model.Library) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) HashSet(java.util.HashSet)

Example 99 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class RootResource method getStandaloneGraph.

@GET
@Path("/{ontologyId:.+}")
@Produces(value = { APPLICATION_JSON, N3, N_TRIPLE, RDF_JSON })
public Response getStandaloneGraph(@PathParam("ontologyId") String ontologyId, @DefaultValue("false") @QueryParam("meta") boolean meta, @DefaultValue("false") @QueryParam("merge") boolean merged, @Context UriInfo uriInfo, @Context HttpHeaders headers) {
    if (meta) {
        return getMetadata(ontologyId, uriInfo, headers);
    }
    ResponseBuilder rb;
    if (ontologyId == null || ontologyId.isEmpty()) {
        rb = Response.status(BAD_REQUEST);
    }
    OWLOntologyID key = OntologyUtils.decode(ontologyId);
    if (ontologyProvider.listOrphans().contains(key)) {
        rb = Response.status(NO_CONTENT);
    } else {
        Graph o = getGraph(ontologyId, merged, uriInfo.getRequestUri());
        rb = o == null ? Response.status(NOT_FOUND) : Response.ok(o);
    }
    // addCORSOrigin(servletContext, rb, headers);
    return rb.build();
}
Also used : ImmutableGraph(org.apache.clerezza.commons.rdf.ImmutableGraph) Graph(org.apache.clerezza.commons.rdf.Graph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) OWLOntologyID(org.semanticweb.owlapi.model.OWLOntologyID) ResponseBuilder(javax.ws.rs.core.Response.ResponseBuilder) Path(javax.ws.rs.Path) Produces(javax.ws.rs.Produces) GET(javax.ws.rs.GET)

Example 100 with Graph

use of org.apache.clerezza.commons.rdf.Graph in project stanbol by apache.

the class RootResource method getMetadata.

public Response getMetadata(@PathParam("ontologyId") String ontologyId, @Context UriInfo uriInfo, @Context HttpHeaders headers) {
    ResponseBuilder rb;
    org.apache.clerezza.commons.rdf.IRI me = new org.apache.clerezza.commons.rdf.IRI(getPublicBaseUri() + "ontonet/" + ontologyId);
    Graph mImmutableGraph = new SimpleGraph();
    for (String alias : getAliases(OntologyUtils.decode(ontologyId))) {
        mImmutableGraph.add(new TripleImpl(new org.apache.clerezza.commons.rdf.IRI(getPublicBaseUri() + "ontonet/" + alias), OWL.sameAs, me));
    }
    rb = Response.ok(mImmutableGraph);
    // addCORSOrigin(servletContext, rb, headers);
    return rb.build();
}
Also used : IRI(org.semanticweb.owlapi.model.IRI) ImmutableGraph(org.apache.clerezza.commons.rdf.ImmutableGraph) Graph(org.apache.clerezza.commons.rdf.Graph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ResponseBuilder(javax.ws.rs.core.Response.ResponseBuilder)

Aggregations

Graph (org.apache.clerezza.commons.rdf.Graph)172 IRI (org.apache.clerezza.commons.rdf.IRI)110 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)66 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)57 Triple (org.apache.clerezza.commons.rdf.Triple)45 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)43 Test (org.junit.Test)38 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)36 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)34 IOException (java.io.IOException)27 ImmutableGraph (org.apache.clerezza.commons.rdf.ImmutableGraph)26 HashSet (java.util.HashSet)24 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)24 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)24 InputStream (java.io.InputStream)21 HashMap (java.util.HashMap)20 Language (org.apache.clerezza.commons.rdf.Language)17 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)17 ArrayList (java.util.ArrayList)16 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)15