use of org.ontoware.rdf2go.model.Statement in project stanbol by apache.
the class TestMetaxaCore method printTriples.
/**
* This prints out the Stanbol Enhancer triples that would be created for the metadata
* contained in the given model.
*
* @param m a {@link Model}
*
* @return an {@code int} with the number of added triples
*/
private int printTriples(Model m) {
int tripleCounter = 0;
HashMap<BlankNode, BlankNode> blankNodeMap = new HashMap<BlankNode, BlankNode>();
ClosableIterator<Statement> it = m.iterator();
while (it.hasNext()) {
Statement oneStmt = it.next();
BlankNodeOrIRI subject = (BlankNodeOrIRI) MetaxaEngine.asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
IRI predicate = (IRI) MetaxaEngine.asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
RDFTerm object = MetaxaEngine.asClerezzaResource(oneStmt.getObject(), blankNodeMap);
if (null != subject && null != predicate && null != object) {
Triple t = new TripleImpl(subject, predicate, object);
LOG.debug("adding " + t);
tripleCounter++;
} else {
LOG.debug("skipped " + oneStmt.toString());
}
}
it.close();
return tripleCounter;
}
use of org.ontoware.rdf2go.model.Statement in project stanbol by apache.
the class RDF2GoUtils method urifyBlankNodes.
public static void urifyBlankNodes(Model model) {
HashMap<BlankNode, URI> nodeMap = new HashMap<BlankNode, URI>();
Model add = RDF2Go.getModelFactory().createModel();
add.open();
Model remove = RDF2Go.getModelFactory().createModel();
remove.open();
for (Statement stmt : model) {
RDFTerm subj = stmt.getSubject();
URI pred = stmt.getPredicate();
Node obj = stmt.getObject();
boolean match = false;
if (subj instanceof BlankNode) {
match = true;
URI newSubj = nodeMap.get(subj);
if (newSubj == null) {
newSubj = URIGenerator.createNewRandomUniqueURI();
nodeMap.put(subj.asBlankNode(), newSubj);
}
subj = newSubj;
}
if (obj instanceof BlankNode) {
match = true;
URI newObj = nodeMap.get(obj);
if (newObj == null) {
newObj = URIGenerator.createNewRandomUniqueURI();
nodeMap.put(obj.asBlankNode(), newObj);
}
obj = newObj;
}
if (match) {
remove.addStatement(stmt);
add.addStatement(subj, pred, obj);
}
}
ClosableIterator<Statement> addIt = add.iterator();
ClosableIterator<Statement> removeIt = remove.iterator();
model.update(new DiffImpl(addIt, removeIt));
addIt.close();
removeIt.close();
add.close();
remove.close();
}
use of org.ontoware.rdf2go.model.Statement in project stanbol by apache.
the class MetaxaEngine method computeEnhancements.
public void computeEnhancements(ContentItem ci) throws EngineException {
// get model from the extraction
URIImpl docId;
Model m = null;
ci.getLock().readLock().lock();
try {
docId = new URIImpl(ci.getUri().getUnicodeString());
m = this.extractor.extract(ci.getStream(), docId, ci.getMimeType());
} catch (ExtractorException e) {
throw new EngineException("Error while processing ContentItem " + ci.getUri() + " with Metaxa", e);
} catch (IOException e) {
throw new EngineException("Error while processing ContentItem " + ci.getUri() + " with Metaxa", e);
} finally {
ci.getLock().readLock().unlock();
}
// the extracted plain text from the model
if (null == m) {
log.debug("Unable to preocess ContentItem {} (mime type {}) with Metaxa", ci.getUri(), ci.getMimeType());
return;
}
ContentSink plainTextSink;
try {
plainTextSink = ciFactory.createContentSink("text/plain");
} catch (IOException e) {
m.close();
throw new EngineException("Unable to initialise Blob for storing" + "the plain text content", e);
}
HashMap<BlankNode, BlankNode> blankNodeMap = new HashMap<BlankNode, BlankNode>();
RDF2GoUtils.urifyBlankNodes(m);
ClosableIterator<Statement> it = m.iterator();
BufferedWriter out = new BufferedWriter(new OutputStreamWriter(plainTextSink.getOutputStream(), UTF8));
//used to detect if some text was extracted
boolean textExtracted = false;
try {
//first add to a temporary graph
Graph g = new SimpleGraph();
while (it.hasNext()) {
Statement oneStmt = it.next();
//the plain text Blob!
if (oneStmt.getSubject().equals(docId) && oneStmt.getPredicate().equals(NIE_PLAINTEXT_PROPERTY)) {
String text = oneStmt.getObject().toString();
if (text != null && !text.isEmpty()) {
try {
out.write(oneStmt.getObject().toString());
} catch (IOException e) {
throw new EngineException("Unable to write extracted" + "plain text to Blob (blob impl: " + plainTextSink.getBlob().getClass() + ")", e);
}
textExtracted = true;
if (includeText) {
BlankNodeOrIRI subject = (BlankNodeOrIRI) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
IRI predicate = (IRI) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
RDFTerm object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
g.add(new TripleImpl(subject, predicate, object));
}
}
} else {
//add metadata to the metadata of the contentItem
BlankNodeOrIRI subject = (BlankNodeOrIRI) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
IRI predicate = (IRI) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
RDFTerm object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
if (null != subject && null != predicate && null != object) {
Triple t = new TripleImpl(subject, predicate, object);
g.add(t);
log.debug("added " + t.toString());
}
}
}
//add the extracted triples to the metadata of the ContentItem
ci.getLock().writeLock().lock();
try {
ci.getMetadata().addAll(g);
g = null;
} finally {
ci.getLock().writeLock().unlock();
}
} finally {
it.close();
m.close();
IOUtils.closeQuietly(out);
}
if (textExtracted) {
//add plain text to the content item
IRI blobUri = new IRI("urn:metaxa:plain-text:" + randomUUID());
ci.addPart(blobUri, plainTextSink.getBlob());
}
}
use of org.ontoware.rdf2go.model.Statement in project stanbol by apache.
the class MetaxaCore method getText.
/**
* Returns a documents plain text if contained in the given extracted
* metadata.
*
* @param model
* a {@link Model} with the extracted metadata
* @return a {@link String} with the plain text content or {@code null} if
* no plain text was contained in the extracted metadata
*/
public static String getText(Model model) {
String result = null;
ClosableIterator<Statement> statements = null;
try {
statements = model.findStatements(Variable.ANY, NIE.plainTextContent, Variable.ANY);
StringBuilder text = new StringBuilder(10000);
while (statements.hasNext()) {
Statement statement = statements.next();
Node value = statement.getObject();
if (value instanceof Literal) {
text.append(((Literal) value).getValue());
}
}
result = text.toString().trim();
if (result.length() == 0) {
result = null;
}
} finally {
if (statements != null) {
statements.close();
}
}
return result;
}
Aggregations