use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class UserResource method hasPermission.
private boolean hasPermission(GraphNode userNode, String permissionString) {
boolean has = false;
Iterator<Triple> existingPermissions = systemGraph.filter((BlankNodeOrIRI) userNode.getNode(), PERMISSION.hasPermission, null);
Lock readLock = systemGraph.getLock().readLock();
readLock.lock();
try {
// check to see if the user already has this permission
while (existingPermissions.hasNext()) {
BlankNodeOrIRI permissionNode = (BlankNodeOrIRI) existingPermissions.next().getObject();
Iterator<Triple> permissionTriples = systemGraph.filter(permissionNode, PERMISSION.javaPermissionEntry, null);
while (permissionTriples.hasNext()) {
Literal permission = (Literal) permissionTriples.next().getObject();
if (permissionString.equals(permission.getLexicalForm())) {
has = true;
}
}
}
} finally {
readLock.unlock();
}
return has;
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class MetaxaEngine method computeEnhancements.
public void computeEnhancements(ContentItem ci) throws EngineException {
// get model from the extraction
URIImpl docId;
Model m = null;
ci.getLock().readLock().lock();
try {
docId = new URIImpl(ci.getUri().getUnicodeString());
m = this.extractor.extract(ci.getStream(), docId, ci.getMimeType());
} catch (ExtractorException e) {
throw new EngineException("Error while processing ContentItem " + ci.getUri() + " with Metaxa", e);
} catch (IOException e) {
throw new EngineException("Error while processing ContentItem " + ci.getUri() + " with Metaxa", e);
} finally {
ci.getLock().readLock().unlock();
}
// the extracted plain text from the model
if (null == m) {
log.debug("Unable to preocess ContentItem {} (mime type {}) with Metaxa", ci.getUri(), ci.getMimeType());
return;
}
ContentSink plainTextSink;
try {
plainTextSink = ciFactory.createContentSink("text/plain");
} catch (IOException e) {
m.close();
throw new EngineException("Unable to initialise Blob for storing" + "the plain text content", e);
}
HashMap<BlankNode, BlankNode> blankNodeMap = new HashMap<BlankNode, BlankNode>();
RDF2GoUtils.urifyBlankNodes(m);
ClosableIterator<Statement> it = m.iterator();
BufferedWriter out = new BufferedWriter(new OutputStreamWriter(plainTextSink.getOutputStream(), UTF8));
//used to detect if some text was extracted
boolean textExtracted = false;
try {
//first add to a temporary graph
Graph g = new SimpleGraph();
while (it.hasNext()) {
Statement oneStmt = it.next();
//the plain text Blob!
if (oneStmt.getSubject().equals(docId) && oneStmt.getPredicate().equals(NIE_PLAINTEXT_PROPERTY)) {
String text = oneStmt.getObject().toString();
if (text != null && !text.isEmpty()) {
try {
out.write(oneStmt.getObject().toString());
} catch (IOException e) {
throw new EngineException("Unable to write extracted" + "plain text to Blob (blob impl: " + plainTextSink.getBlob().getClass() + ")", e);
}
textExtracted = true;
if (includeText) {
BlankNodeOrIRI subject = (BlankNodeOrIRI) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
IRI predicate = (IRI) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
RDFTerm object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
g.add(new TripleImpl(subject, predicate, object));
}
}
} else {
//add metadata to the metadata of the contentItem
BlankNodeOrIRI subject = (BlankNodeOrIRI) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
IRI predicate = (IRI) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
RDFTerm object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
if (null != subject && null != predicate && null != object) {
Triple t = new TripleImpl(subject, predicate, object);
g.add(t);
log.debug("added " + t.toString());
}
}
}
//add the extracted triples to the metadata of the ContentItem
ci.getLock().writeLock().lock();
try {
ci.getMetadata().addAll(g);
g = null;
} finally {
ci.getLock().writeLock().unlock();
}
} finally {
it.close();
m.close();
IOUtils.closeQuietly(out);
}
if (textExtracted) {
//add plain text to the content item
IRI blobUri = new IRI("urn:metaxa:plain-text:" + randomUUID());
ci.addPart(blobUri, plainTextSink.getBlob());
}
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class ZemantaEnhancementEngine method processRecognition.
/**
* Processes all Zemanta Recognitions and converts them to the according
* FISE enhancements
*
* @param results the results of the Zemanta enhancement process
* @param enhancements the graph containing the current Stanbol Enhancer
* enhancements
* @param text the content of the content item as string
*/
protected void processRecognition(Graph results, Graph enhancements, String text, IRI ciId) {
Iterator<Triple> recognitions = results.filter(null, RDF_TYPE, ZemantaOntologyEnum.Recognition.getUri());
while (recognitions.hasNext()) {
BlankNodeOrIRI recognition = recognitions.next().getSubject();
log.debug("process recognition " + recognition);
//first get everything we need for the textAnnotations
Double confidence = parseConfidence(results, recognition);
log.debug(" > confidence :" + confidence);
String anchor = EnhancementEngineHelper.getString(results, recognition, ZemantaOntologyEnum.anchor.getUri());
log.debug(" > anchor :" + anchor);
Collection<BlankNodeOrIRI> textAnnotations = processTextAnnotation(enhancements, text, ciId, anchor, confidence);
log.debug(" > number of textAnnotations :" + textAnnotations.size());
//second we need to create the EntityAnnotation that represent the
//recognition
BlankNodeOrIRI object = EnhancementEngineHelper.getReference(results, recognition, ZemantaOntologyEnum.object.getUri());
log.debug(" > object :" + object);
//The targets represent the linked entities
// ... and yes there can be more of them!
//TODO: can we create an EntityAnnotation with several referred entities?
// Should we use the owl:sameAs to decide that!
Set<IRI> sameAsSet = new HashSet<IRI>();
for (Iterator<IRI> sameAs = getReferences(results, object, ZemantaOntologyEnum.owlSameAs.getUri()); sameAs.hasNext(); sameAsSet.add(sameAs.next())) ;
log.debug(" > sameAs :" + sameAsSet);
//now parse the targets and look if there are others than the one
//merged by using sameAs
Iterator<IRI> targets = EnhancementEngineHelper.getReferences(results, object, ZemantaOntologyEnum.target.getUri());
String title = null;
while (targets.hasNext()) {
//the entityRef is the URL of the target
IRI entity = targets.next();
log.debug(" - target :" + entity);
IRI targetType = EnhancementEngineHelper.getReference(results, entity, ZemantaOntologyEnum.targetType.getUri());
log.debug(" o type :" + targetType);
if (ZemantaOntologyEnum.targetType_RDF.getUri().equals(targetType)) {
String targetTitle = EnhancementEngineHelper.getString(results, entity, ZemantaOntologyEnum.title.getUri());
log.debug(" o title :" + targetTitle);
if (sameAsSet.contains(entity)) {
if (title == null) {
title = targetTitle;
} else if (!title.equals(targetTitle)) {
log.warn("Entities marked with owl:sameAs do use different labels '" + title + "' != '" + targetTitle + "'!");
}
//else the same label used by both -> thats expected
} else {
//maybe we should create an second entityEnhancement, but I think, that such a case should
//not happen. So write an warning for now
log.warn("Found Target with type RDF, that is not linked with owl:sameAs to the others (this: '" + entity + " | sameAs: " + sameAsSet + ")");
log.warn(" - no Enhancement for " + entity + " will be created");
}
}
//else -> do not process -> RDF Entities only
//TODO: targetTypes are not parsed by Zemanta, therefore we can not set
// any entity types!
}
//create the entityEnhancement
IRI entityEnhancement = EnhancementEngineHelper.createEntityEnhancement(enhancements, this, ciId);
if (confidence != null) {
enhancements.add(new TripleImpl(entityEnhancement, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(confidence)));
}
for (BlankNodeOrIRI relatedTextAnnotation : textAnnotations) {
enhancements.add(new TripleImpl(entityEnhancement, DC_RELATION, relatedTextAnnotation));
}
for (IRI entity : sameAsSet) {
enhancements.add(new TripleImpl(entityEnhancement, ENHANCER_ENTITY_REFERENCE, entity));
}
enhancements.add(new TripleImpl(entityEnhancement, ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(title)));
}
}
use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.
the class ZemantaEnhancementEngine method processTextAnnotation.
/**
* This Methods searches/creates text annotations for anchor points of Zemanta
* extractions.
* <p>
* First this method searches for text annotations that do use the anchor as
* selected text. Second it searches for occurrences of the anchor within the
* content of the content and checks if there is an text annotation for that
* occurrence. If not it creates an new one.
*
* @param enhancements the graph containing the meta data
* @param text the content as string
* @param ciId the ID of the content item
* @param anchor the anchor text
* @param confidence the confidence to be used for newly created text annotations
*
* @return a collection of all existing/created text annotations for the parsed anchor
*/
private Collection<BlankNodeOrIRI> processTextAnnotation(Graph enhancements, String text, IRI ciId, String anchor, Double confidence) {
Collection<BlankNodeOrIRI> textAnnotations = new ArrayList<BlankNodeOrIRI>();
int anchorLength = anchor.length();
Literal anchorLiteral = new PlainLiteralImpl(anchor);
//first search for existing TextAnnotations for the anchor
Map<Integer, Collection<BlankNodeOrIRI>> existingTextAnnotationsMap = searchExistingTextAnnotations(enhancements, anchorLiteral);
for (int current = text.indexOf(anchor); current >= 0; current = text.indexOf(anchor, current + 1)) {
Collection<BlankNodeOrIRI> existingTextAnnotations = existingTextAnnotationsMap.get(current);
if (existingTextAnnotations != null) {
//use the existing once
textAnnotations.addAll(existingTextAnnotations);
} else {
//we need to create an new one!
IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(enhancements, this, ciId);
textAnnotations.add(textAnnotation);
//write the selection
enhancements.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(current)));
enhancements.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(current + anchorLength)));
enhancements.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, anchorLiteral));
//extract the selection context
int beginPos;
if (current <= SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE) {
beginPos = 0;
} else {
int start = current - SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
beginPos = text.indexOf(' ', start);
if (beginPos < 0 || beginPos >= current) {
//no words
//begin within a word
beginPos = start;
}
}
int endPos;
if (current + anchorLength + SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= text.length()) {
endPos = text.length();
} else {
int start = current + anchorLength + SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
endPos = text.lastIndexOf(' ', start);
if (endPos <= current + anchorLength) {
//end within a word;
endPos = start;
}
}
enhancements.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(text.substring(beginPos, endPos))));
// related to the annotated Entity rather to the selected text.
if (confidence != null) {
enhancements.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(confidence)));
}
//TODO: No idea about the type of the Annotation, because we do not have an type of the entity!
// One would need to get the types from the referred Source
}
}
return textAnnotations;
}
Aggregations