Search in sources :

Example 41 with SimpleGraph

use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.

the class MetaxaEngine method computeEnhancements.

public void computeEnhancements(ContentItem ci) throws EngineException {
    // get model from the extraction
    URIImpl docId;
    Model m = null;
    ci.getLock().readLock().lock();
    try {
        docId = new URIImpl(ci.getUri().getUnicodeString());
        m = this.extractor.extract(ci.getStream(), docId, ci.getMimeType());
    } catch (ExtractorException e) {
        throw new EngineException("Error while processing ContentItem " + ci.getUri() + " with Metaxa", e);
    } catch (IOException e) {
        throw new EngineException("Error while processing ContentItem " + ci.getUri() + " with Metaxa", e);
    } finally {
        ci.getLock().readLock().unlock();
    }
    // the extracted plain text from the model
    if (null == m) {
        log.debug("Unable to preocess ContentItem {} (mime type {}) with Metaxa", ci.getUri(), ci.getMimeType());
        return;
    }
    ContentSink plainTextSink;
    try {
        plainTextSink = ciFactory.createContentSink("text/plain");
    } catch (IOException e) {
        m.close();
        throw new EngineException("Unable to initialise Blob for storing" + "the plain text content", e);
    }
    HashMap<BlankNode, BlankNode> blankNodeMap = new HashMap<BlankNode, BlankNode>();
    RDF2GoUtils.urifyBlankNodes(m);
    ClosableIterator<Statement> it = m.iterator();
    BufferedWriter out = new BufferedWriter(new OutputStreamWriter(plainTextSink.getOutputStream(), UTF8));
    //used to detect if some text was extracted
    boolean textExtracted = false;
    try {
        //first add to a temporary graph
        Graph g = new SimpleGraph();
        while (it.hasNext()) {
            Statement oneStmt = it.next();
            //the plain text Blob!
            if (oneStmt.getSubject().equals(docId) && oneStmt.getPredicate().equals(NIE_PLAINTEXT_PROPERTY)) {
                String text = oneStmt.getObject().toString();
                if (text != null && !text.isEmpty()) {
                    try {
                        out.write(oneStmt.getObject().toString());
                    } catch (IOException e) {
                        throw new EngineException("Unable to write extracted" + "plain text to Blob (blob impl: " + plainTextSink.getBlob().getClass() + ")", e);
                    }
                    textExtracted = true;
                    if (includeText) {
                        BlankNodeOrIRI subject = (BlankNodeOrIRI) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
                        IRI predicate = (IRI) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
                        RDFTerm object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
                        g.add(new TripleImpl(subject, predicate, object));
                    }
                }
            } else {
                //add metadata to the metadata of the contentItem
                BlankNodeOrIRI subject = (BlankNodeOrIRI) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
                IRI predicate = (IRI) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
                RDFTerm object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
                if (null != subject && null != predicate && null != object) {
                    Triple t = new TripleImpl(subject, predicate, object);
                    g.add(t);
                    log.debug("added " + t.toString());
                }
            }
        }
        //add the extracted triples to the metadata of the ContentItem
        ci.getLock().writeLock().lock();
        try {
            ci.getMetadata().addAll(g);
            g = null;
        } finally {
            ci.getLock().writeLock().unlock();
        }
    } finally {
        it.close();
        m.close();
        IOUtils.closeQuietly(out);
    }
    if (textExtracted) {
        //add plain text to the content item
        IRI blobUri = new IRI("urn:metaxa:plain-text:" + randomUUID());
        ci.addPart(blobUri, plainTextSink.getBlob());
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashMap(java.util.HashMap) Statement(org.ontoware.rdf2go.model.Statement) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) BlankNode(org.ontoware.rdf2go.model.node.BlankNode) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) URIImpl(org.ontoware.rdf2go.model.node.impl.URIImpl) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) Triple(org.apache.clerezza.commons.rdf.Triple) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) Model(org.ontoware.rdf2go.model.Model) ExtractorException(org.semanticdesktop.aperture.extractor.ExtractorException) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) OutputStreamWriter(java.io.OutputStreamWriter) ContentSink(org.apache.stanbol.enhancer.servicesapi.ContentSink) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 42 with SimpleGraph

use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.

the class RootResource method getGraph.

private Graph getGraph(String ontologyId, boolean merged, URI requestUri) {
    long before = System.currentTimeMillis();
    OWLOntologyID key = OntologyUtils.decode(ontologyId);
    log.debug("Will try to retrieve ontology {} from provider.", key);
    /*
         * Export directly to Graph since the OWLOntologyWriter uses (de-)serializing converters for the
         * other formats.
         * 
         * Use oTemp for the "real" graph and o for the graph that will be exported. This is due to the fact
         * that in o we want to change import statements, but we do not want these changes to be stored
         * permanently.
         */
    Graph o = null, oTemp = null;
    try {
        oTemp = ontologyProvider.getStoredOntology(key, Graph.class, merged);
    } catch (Exception ex) {
        log.warn("Retrieval of ontology with ID " + key + " failed.", ex);
    }
    if (oTemp == null) {
        log.debug("Ontology {} missing from provider. Trying libraries...", key);
        // TODO remove once registry supports OWLOntologyID as public key.
        IRI iri = URIUtils.sanitize(IRI.create(ontologyId));
        // See if we can touch a library. TODO: replace with event model on the ontology provider.
        int minSize = -1;
        IRI smallest = null;
        for (Library lib : registryManager.getLibraries(iri)) {
            int size = lib.getChildren().length;
            if (minSize < 1 || size < minSize) {
                smallest = lib.getIRI();
                minSize = size;
            }
        }
        if (smallest != null) {
            log.debug("Selected library for ontology {} is {} .", iri, smallest);
            try {
                oTemp = registryManager.getLibrary(smallest).getOntology(iri, Graph.class);
            } catch (RegistryContentException e) {
                log.warn("The content of library " + smallest + " could not be accessed.", e);
            }
        }
    }
    // resource-intensive IndexedGraph, since both o and oTemp will be GC'ed after serialization.
    if (oTemp != null) {
        o = new SimpleGraph(oTemp);
    }
    if (o == null) {
        log.debug("Ontology {} not found in any ontology provider or library.", ontologyId);
        return null;
    }
    log.debug("Retrieved ontology {} .", ontologyId);
    // Rewrite imports
    String uri = uriInfo.getRequestUri().toString();
    URI base = URI.create(uri.substring(0, uri.lastIndexOf(ontologyId) - 1));
    // Rewrite import statements
    /*
         * TODO manage import rewrites better once the container ID is fully configurable (i.e. instead of
         * going upOne() add "session" or "ontology" if needed).
         */
    Iterator<Triple> imports = o.filter(null, OWL.imports, null);
    Set<Triple> oldImports = new HashSet<Triple>();
    while (imports.hasNext()) {
        oldImports.add(imports.next());
    }
    for (Triple t : oldImports) {
        // construct new statement
        String s = ((org.apache.clerezza.commons.rdf.IRI) t.getObject()).getUnicodeString();
        if (s.contains("::")) {
            s = s.substring(s.indexOf("::") + 2, s.length());
        }
        org.apache.clerezza.commons.rdf.IRI target = new org.apache.clerezza.commons.rdf.IRI(base + "/" + s);
        o.add(new TripleImpl(t.getSubject(), OWL.imports, target));
        // remove old statement
        o.remove(t);
    }
    // Versioning.
    OWLOntologyID id = OWLUtils.extractOntologyID(o);
    if (id != null && !id.isAnonymous() && id.getVersionIRI() == null) {
        org.apache.clerezza.commons.rdf.IRI viri = new org.apache.clerezza.commons.rdf.IRI(requestUri.toString());
        log.debug("Setting version IRI for export : {}", viri);
        o.add(new TripleImpl(new org.apache.clerezza.commons.rdf.IRI(id.getOntologyIRI().toString()), new org.apache.clerezza.commons.rdf.IRI(OWL2Constants.OWL_VERSION_IRI), viri));
    }
    log.debug("Exported as Clerezza ImmutableGraph in {} ms. Handing over to writer.", System.currentTimeMillis() - before);
    return o;
}
Also used : IRI(org.semanticweb.owlapi.model.IRI) URI(java.net.URI) UnsupportedFormatException(org.apache.clerezza.rdf.core.serializedform.UnsupportedFormatException) WebApplicationException(javax.ws.rs.WebApplicationException) ConcurrentModificationException(java.util.ConcurrentModificationException) IOException(java.io.IOException) OntologyLoadingException(org.apache.stanbol.ontologymanager.servicesapi.ontology.OntologyLoadingException) OWLOntologyCreationException(org.semanticweb.owlapi.model.OWLOntologyCreationException) OntologyHandleException(org.apache.stanbol.ontologymanager.servicesapi.ontology.OntologyHandleException) RegistryContentException(org.apache.stanbol.ontologymanager.registry.api.RegistryContentException) OrphanOntologyKeyException(org.apache.stanbol.ontologymanager.servicesapi.ontology.OrphanOntologyKeyException) Triple(org.apache.clerezza.commons.rdf.Triple) ImmutableGraph(org.apache.clerezza.commons.rdf.ImmutableGraph) Graph(org.apache.clerezza.commons.rdf.Graph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) OWLOntologyID(org.semanticweb.owlapi.model.OWLOntologyID) RegistryContentException(org.apache.stanbol.ontologymanager.registry.api.RegistryContentException) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Library(org.apache.stanbol.ontologymanager.registry.api.model.Library) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) HashSet(java.util.HashSet)

Example 43 with SimpleGraph

use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.

the class RootResource method getMetadata.

public Response getMetadata(@PathParam("ontologyId") String ontologyId, @Context UriInfo uriInfo, @Context HttpHeaders headers) {
    ResponseBuilder rb;
    org.apache.clerezza.commons.rdf.IRI me = new org.apache.clerezza.commons.rdf.IRI(getPublicBaseUri() + "ontonet/" + ontologyId);
    Graph mImmutableGraph = new SimpleGraph();
    for (String alias : getAliases(OntologyUtils.decode(ontologyId))) {
        mImmutableGraph.add(new TripleImpl(new org.apache.clerezza.commons.rdf.IRI(getPublicBaseUri() + "ontonet/" + alias), OWL.sameAs, me));
    }
    rb = Response.ok(mImmutableGraph);
    // addCORSOrigin(servletContext, rb, headers);
    return rb.build();
}
Also used : IRI(org.semanticweb.owlapi.model.IRI) ImmutableGraph(org.apache.clerezza.commons.rdf.ImmutableGraph) Graph(org.apache.clerezza.commons.rdf.Graph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ResponseBuilder(javax.ws.rs.core.Response.ResponseBuilder)

Example 44 with SimpleGraph

use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.

the class RdfSerializingWriter method getExpandedContext.

private Graph getExpandedContext(GraphNode node, GraphNode recipe) {
    final Graph result = new SimpleGraph(node.getNodeContext());
    final Set<RDFTerm> expandedResources = new HashSet<RDFTerm>();
    expandedResources.add(node.getNode());
    while (true) {
        Set<RDFTerm> additionalExpansionRes = getAdditionalExpansionResources(result, recipe);
        additionalExpansionRes.removeAll(expandedResources);
        if (additionalExpansionRes.size() == 0) {
            return result;
        }
        for (RDFTerm resource : additionalExpansionRes) {
            final GraphNode additionalNode = new GraphNode(resource, node.getGraph());
            result.addAll(additionalNode.getNodeContext());
            expandedResources.add(resource);
        }
    }
}
Also used : SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) GraphNode(org.apache.clerezza.rdf.utils.GraphNode) HashSet(java.util.HashSet)

Example 45 with SimpleGraph

use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.

the class TripleMatcherGroupImplTest method createGraph.

@Before
public void createGraph() {
    graph = new SimpleGraph();
    graph.add(TripleUtil.uriTriple("S1", "P1", "01"));
    graph.add(TripleUtil.uriTriple("S1", "P1", "02"));
    graph.add(TripleUtil.uriTriple("S2", "P1", "01"));
    graph.add(TripleUtil.uriTriple("S2", "P1", "02"));
    graph.add(TripleUtil.uriTriple("S3", "P1", "01"));
    graph.add(TripleUtil.uriTriple("S4", "P1", "02"));
}
Also used : SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Before(org.junit.Before)

Aggregations

SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)46 Graph (org.apache.clerezza.commons.rdf.Graph)34 IRI (org.apache.clerezza.commons.rdf.IRI)24 Test (org.junit.Test)17 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)15 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)12 ImmutableGraph (org.apache.clerezza.commons.rdf.ImmutableGraph)11 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)10 Triple (org.apache.clerezza.commons.rdf.Triple)10 HashSet (java.util.HashSet)9 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)6 ByteArrayOutputStream (java.io.ByteArrayOutputStream)5 InputStream (java.io.InputStream)5 HtmlExtractor (org.apache.stanbol.enhancer.engines.htmlextractor.impl.HtmlExtractor)5 RdfEntityFactory (org.apache.stanbol.enhancer.rdfentities.RdfEntityFactory)5 BlankNode (org.apache.clerezza.commons.rdf.BlankNode)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 IOException (java.io.IOException)3 ResponseBuilder (javax.ws.rs.core.Response.ResponseBuilder)3 JenaParserProvider (org.apache.clerezza.rdf.jena.parser.JenaParserProvider)3