use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.
the class MetaxaEngine method computeEnhancements.
public void computeEnhancements(ContentItem ci) throws EngineException {
// get model from the extraction
URIImpl docId;
Model m = null;
ci.getLock().readLock().lock();
try {
docId = new URIImpl(ci.getUri().getUnicodeString());
m = this.extractor.extract(ci.getStream(), docId, ci.getMimeType());
} catch (ExtractorException e) {
throw new EngineException("Error while processing ContentItem " + ci.getUri() + " with Metaxa", e);
} catch (IOException e) {
throw new EngineException("Error while processing ContentItem " + ci.getUri() + " with Metaxa", e);
} finally {
ci.getLock().readLock().unlock();
}
// the extracted plain text from the model
if (null == m) {
log.debug("Unable to preocess ContentItem {} (mime type {}) with Metaxa", ci.getUri(), ci.getMimeType());
return;
}
ContentSink plainTextSink;
try {
plainTextSink = ciFactory.createContentSink("text/plain");
} catch (IOException e) {
m.close();
throw new EngineException("Unable to initialise Blob for storing" + "the plain text content", e);
}
HashMap<BlankNode, BlankNode> blankNodeMap = new HashMap<BlankNode, BlankNode>();
RDF2GoUtils.urifyBlankNodes(m);
ClosableIterator<Statement> it = m.iterator();
BufferedWriter out = new BufferedWriter(new OutputStreamWriter(plainTextSink.getOutputStream(), UTF8));
//used to detect if some text was extracted
boolean textExtracted = false;
try {
//first add to a temporary graph
Graph g = new SimpleGraph();
while (it.hasNext()) {
Statement oneStmt = it.next();
//the plain text Blob!
if (oneStmt.getSubject().equals(docId) && oneStmt.getPredicate().equals(NIE_PLAINTEXT_PROPERTY)) {
String text = oneStmt.getObject().toString();
if (text != null && !text.isEmpty()) {
try {
out.write(oneStmt.getObject().toString());
} catch (IOException e) {
throw new EngineException("Unable to write extracted" + "plain text to Blob (blob impl: " + plainTextSink.getBlob().getClass() + ")", e);
}
textExtracted = true;
if (includeText) {
BlankNodeOrIRI subject = (BlankNodeOrIRI) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
IRI predicate = (IRI) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
RDFTerm object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
g.add(new TripleImpl(subject, predicate, object));
}
}
} else {
//add metadata to the metadata of the contentItem
BlankNodeOrIRI subject = (BlankNodeOrIRI) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
IRI predicate = (IRI) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
RDFTerm object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
if (null != subject && null != predicate && null != object) {
Triple t = new TripleImpl(subject, predicate, object);
g.add(t);
log.debug("added " + t.toString());
}
}
}
//add the extracted triples to the metadata of the ContentItem
ci.getLock().writeLock().lock();
try {
ci.getMetadata().addAll(g);
g = null;
} finally {
ci.getLock().writeLock().unlock();
}
} finally {
it.close();
m.close();
IOUtils.closeQuietly(out);
}
if (textExtracted) {
//add plain text to the content item
IRI blobUri = new IRI("urn:metaxa:plain-text:" + randomUUID());
ci.addPart(blobUri, plainTextSink.getBlob());
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.
the class RootResource method getGraph.
private Graph getGraph(String ontologyId, boolean merged, URI requestUri) {
long before = System.currentTimeMillis();
OWLOntologyID key = OntologyUtils.decode(ontologyId);
log.debug("Will try to retrieve ontology {} from provider.", key);
/*
* Export directly to Graph since the OWLOntologyWriter uses (de-)serializing converters for the
* other formats.
*
* Use oTemp for the "real" graph and o for the graph that will be exported. This is due to the fact
* that in o we want to change import statements, but we do not want these changes to be stored
* permanently.
*/
Graph o = null, oTemp = null;
try {
oTemp = ontologyProvider.getStoredOntology(key, Graph.class, merged);
} catch (Exception ex) {
log.warn("Retrieval of ontology with ID " + key + " failed.", ex);
}
if (oTemp == null) {
log.debug("Ontology {} missing from provider. Trying libraries...", key);
// TODO remove once registry supports OWLOntologyID as public key.
IRI iri = URIUtils.sanitize(IRI.create(ontologyId));
// See if we can touch a library. TODO: replace with event model on the ontology provider.
int minSize = -1;
IRI smallest = null;
for (Library lib : registryManager.getLibraries(iri)) {
int size = lib.getChildren().length;
if (minSize < 1 || size < minSize) {
smallest = lib.getIRI();
minSize = size;
}
}
if (smallest != null) {
log.debug("Selected library for ontology {} is {} .", iri, smallest);
try {
oTemp = registryManager.getLibrary(smallest).getOntology(iri, Graph.class);
} catch (RegistryContentException e) {
log.warn("The content of library " + smallest + " could not be accessed.", e);
}
}
}
// resource-intensive IndexedGraph, since both o and oTemp will be GC'ed after serialization.
if (oTemp != null) {
o = new SimpleGraph(oTemp);
}
if (o == null) {
log.debug("Ontology {} not found in any ontology provider or library.", ontologyId);
return null;
}
log.debug("Retrieved ontology {} .", ontologyId);
// Rewrite imports
String uri = uriInfo.getRequestUri().toString();
URI base = URI.create(uri.substring(0, uri.lastIndexOf(ontologyId) - 1));
// Rewrite import statements
/*
* TODO manage import rewrites better once the container ID is fully configurable (i.e. instead of
* going upOne() add "session" or "ontology" if needed).
*/
Iterator<Triple> imports = o.filter(null, OWL.imports, null);
Set<Triple> oldImports = new HashSet<Triple>();
while (imports.hasNext()) {
oldImports.add(imports.next());
}
for (Triple t : oldImports) {
// construct new statement
String s = ((org.apache.clerezza.commons.rdf.IRI) t.getObject()).getUnicodeString();
if (s.contains("::")) {
s = s.substring(s.indexOf("::") + 2, s.length());
}
org.apache.clerezza.commons.rdf.IRI target = new org.apache.clerezza.commons.rdf.IRI(base + "/" + s);
o.add(new TripleImpl(t.getSubject(), OWL.imports, target));
// remove old statement
o.remove(t);
}
// Versioning.
OWLOntologyID id = OWLUtils.extractOntologyID(o);
if (id != null && !id.isAnonymous() && id.getVersionIRI() == null) {
org.apache.clerezza.commons.rdf.IRI viri = new org.apache.clerezza.commons.rdf.IRI(requestUri.toString());
log.debug("Setting version IRI for export : {}", viri);
o.add(new TripleImpl(new org.apache.clerezza.commons.rdf.IRI(id.getOntologyIRI().toString()), new org.apache.clerezza.commons.rdf.IRI(OWL2Constants.OWL_VERSION_IRI), viri));
}
log.debug("Exported as Clerezza ImmutableGraph in {} ms. Handing over to writer.", System.currentTimeMillis() - before);
return o;
}
use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.
the class RootResource method getMetadata.
public Response getMetadata(@PathParam("ontologyId") String ontologyId, @Context UriInfo uriInfo, @Context HttpHeaders headers) {
ResponseBuilder rb;
org.apache.clerezza.commons.rdf.IRI me = new org.apache.clerezza.commons.rdf.IRI(getPublicBaseUri() + "ontonet/" + ontologyId);
Graph mImmutableGraph = new SimpleGraph();
for (String alias : getAliases(OntologyUtils.decode(ontologyId))) {
mImmutableGraph.add(new TripleImpl(new org.apache.clerezza.commons.rdf.IRI(getPublicBaseUri() + "ontonet/" + alias), OWL.sameAs, me));
}
rb = Response.ok(mImmutableGraph);
// addCORSOrigin(servletContext, rb, headers);
return rb.build();
}
use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.
the class RdfSerializingWriter method getExpandedContext.
private Graph getExpandedContext(GraphNode node, GraphNode recipe) {
final Graph result = new SimpleGraph(node.getNodeContext());
final Set<RDFTerm> expandedResources = new HashSet<RDFTerm>();
expandedResources.add(node.getNode());
while (true) {
Set<RDFTerm> additionalExpansionRes = getAdditionalExpansionResources(result, recipe);
additionalExpansionRes.removeAll(expandedResources);
if (additionalExpansionRes.size() == 0) {
return result;
}
for (RDFTerm resource : additionalExpansionRes) {
final GraphNode additionalNode = new GraphNode(resource, node.getGraph());
result.addAll(additionalNode.getNodeContext());
expandedResources.add(resource);
}
}
}
use of org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph in project stanbol by apache.
the class TripleMatcherGroupImplTest method createGraph.
@Before
public void createGraph() {
graph = new SimpleGraph();
graph.add(TripleUtil.uriTriple("S1", "P1", "01"));
graph.add(TripleUtil.uriTriple("S1", "P1", "02"));
graph.add(TripleUtil.uriTriple("S2", "P1", "01"));
graph.add(TripleUtil.uriTriple("S2", "P1", "02"));
graph.add(TripleUtil.uriTriple("S3", "P1", "01"));
graph.add(TripleUtil.uriTriple("S4", "P1", "02"));
}
Aggregations