Search in sources :

Example 86 with BlankNodeOrIRI

use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.

the class UserResource method hasPermission.

private boolean hasPermission(GraphNode userNode, String permissionString) {
    boolean has = false;
    Iterator<Triple> existingPermissions = systemGraph.filter((BlankNodeOrIRI) userNode.getNode(), PERMISSION.hasPermission, null);
    Lock readLock = systemGraph.getLock().readLock();
    readLock.lock();
    try {
        // check to see if the user already has this permission
        while (existingPermissions.hasNext()) {
            BlankNodeOrIRI permissionNode = (BlankNodeOrIRI) existingPermissions.next().getObject();
            Iterator<Triple> permissionTriples = systemGraph.filter(permissionNode, PERMISSION.javaPermissionEntry, null);
            while (permissionTriples.hasNext()) {
                Literal permission = (Literal) permissionTriples.next().getObject();
                if (permissionString.equals(permission.getLexicalForm())) {
                    has = true;
                }
            }
        }
    } finally {
        readLock.unlock();
    }
    return has;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) Literal(org.apache.clerezza.commons.rdf.Literal) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Lock(java.util.concurrent.locks.Lock)

Example 87 with BlankNodeOrIRI

use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.

the class MetaxaEngine method computeEnhancements.

public void computeEnhancements(ContentItem ci) throws EngineException {
    // get model from the extraction
    URIImpl docId;
    Model m = null;
    ci.getLock().readLock().lock();
    try {
        docId = new URIImpl(ci.getUri().getUnicodeString());
        m = this.extractor.extract(ci.getStream(), docId, ci.getMimeType());
    } catch (ExtractorException e) {
        throw new EngineException("Error while processing ContentItem " + ci.getUri() + " with Metaxa", e);
    } catch (IOException e) {
        throw new EngineException("Error while processing ContentItem " + ci.getUri() + " with Metaxa", e);
    } finally {
        ci.getLock().readLock().unlock();
    }
    // the extracted plain text from the model
    if (null == m) {
        log.debug("Unable to preocess ContentItem {} (mime type {}) with Metaxa", ci.getUri(), ci.getMimeType());
        return;
    }
    ContentSink plainTextSink;
    try {
        plainTextSink = ciFactory.createContentSink("text/plain");
    } catch (IOException e) {
        m.close();
        throw new EngineException("Unable to initialise Blob for storing" + "the plain text content", e);
    }
    HashMap<BlankNode, BlankNode> blankNodeMap = new HashMap<BlankNode, BlankNode>();
    RDF2GoUtils.urifyBlankNodes(m);
    ClosableIterator<Statement> it = m.iterator();
    BufferedWriter out = new BufferedWriter(new OutputStreamWriter(plainTextSink.getOutputStream(), UTF8));
    //used to detect if some text was extracted
    boolean textExtracted = false;
    try {
        //first add to a temporary graph
        Graph g = new SimpleGraph();
        while (it.hasNext()) {
            Statement oneStmt = it.next();
            //the plain text Blob!
            if (oneStmt.getSubject().equals(docId) && oneStmt.getPredicate().equals(NIE_PLAINTEXT_PROPERTY)) {
                String text = oneStmt.getObject().toString();
                if (text != null && !text.isEmpty()) {
                    try {
                        out.write(oneStmt.getObject().toString());
                    } catch (IOException e) {
                        throw new EngineException("Unable to write extracted" + "plain text to Blob (blob impl: " + plainTextSink.getBlob().getClass() + ")", e);
                    }
                    textExtracted = true;
                    if (includeText) {
                        BlankNodeOrIRI subject = (BlankNodeOrIRI) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
                        IRI predicate = (IRI) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
                        RDFTerm object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
                        g.add(new TripleImpl(subject, predicate, object));
                    }
                }
            } else {
                //add metadata to the metadata of the contentItem
                BlankNodeOrIRI subject = (BlankNodeOrIRI) asClerezzaResource(oneStmt.getSubject(), blankNodeMap);
                IRI predicate = (IRI) asClerezzaResource(oneStmt.getPredicate(), blankNodeMap);
                RDFTerm object = asClerezzaResource(oneStmt.getObject(), blankNodeMap);
                if (null != subject && null != predicate && null != object) {
                    Triple t = new TripleImpl(subject, predicate, object);
                    g.add(t);
                    log.debug("added " + t.toString());
                }
            }
        }
        //add the extracted triples to the metadata of the ContentItem
        ci.getLock().writeLock().lock();
        try {
            ci.getMetadata().addAll(g);
            g = null;
        } finally {
            ci.getLock().writeLock().unlock();
        }
    } finally {
        it.close();
        m.close();
        IOUtils.closeQuietly(out);
    }
    if (textExtracted) {
        //add plain text to the content item
        IRI blobUri = new IRI("urn:metaxa:plain-text:" + randomUUID());
        ci.addPart(blobUri, plainTextSink.getBlob());
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashMap(java.util.HashMap) Statement(org.ontoware.rdf2go.model.Statement) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) BlankNode(org.ontoware.rdf2go.model.node.BlankNode) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) URIImpl(org.ontoware.rdf2go.model.node.impl.URIImpl) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) IOException(java.io.IOException) BufferedWriter(java.io.BufferedWriter) Triple(org.apache.clerezza.commons.rdf.Triple) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) Model(org.ontoware.rdf2go.model.Model) ExtractorException(org.semanticdesktop.aperture.extractor.ExtractorException) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) OutputStreamWriter(java.io.OutputStreamWriter) ContentSink(org.apache.stanbol.enhancer.servicesapi.ContentSink) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 88 with BlankNodeOrIRI

use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.

the class ZemantaEnhancementEngine method processRecognition.

/**
     * Processes all Zemanta Recognitions and converts them to the according
     * FISE enhancements
     *
     * @param results      the results of the Zemanta enhancement process
     * @param enhancements the graph containing the current Stanbol Enhancer
     *                     enhancements
     * @param text         the content of the content item as string
     */
protected void processRecognition(Graph results, Graph enhancements, String text, IRI ciId) {
    Iterator<Triple> recognitions = results.filter(null, RDF_TYPE, ZemantaOntologyEnum.Recognition.getUri());
    while (recognitions.hasNext()) {
        BlankNodeOrIRI recognition = recognitions.next().getSubject();
        log.debug("process recognition " + recognition);
        //first get everything we need for the textAnnotations
        Double confidence = parseConfidence(results, recognition);
        log.debug(" > confidence :" + confidence);
        String anchor = EnhancementEngineHelper.getString(results, recognition, ZemantaOntologyEnum.anchor.getUri());
        log.debug(" > anchor :" + anchor);
        Collection<BlankNodeOrIRI> textAnnotations = processTextAnnotation(enhancements, text, ciId, anchor, confidence);
        log.debug(" > number of textAnnotations :" + textAnnotations.size());
        //second we need to create the EntityAnnotation that represent the
        //recognition
        BlankNodeOrIRI object = EnhancementEngineHelper.getReference(results, recognition, ZemantaOntologyEnum.object.getUri());
        log.debug(" > object :" + object);
        //The targets represent the linked entities
        //  ... and yes there can be more of them!
        //TODO: can we create an EntityAnnotation with several referred entities?
        //      Should we use the owl:sameAs to decide that!
        Set<IRI> sameAsSet = new HashSet<IRI>();
        for (Iterator<IRI> sameAs = getReferences(results, object, ZemantaOntologyEnum.owlSameAs.getUri()); sameAs.hasNext(); sameAsSet.add(sameAs.next())) ;
        log.debug(" > sameAs :" + sameAsSet);
        //now parse the targets and look if there are others than the one
        //merged by using sameAs
        Iterator<IRI> targets = EnhancementEngineHelper.getReferences(results, object, ZemantaOntologyEnum.target.getUri());
        String title = null;
        while (targets.hasNext()) {
            //the entityRef is the URL of the target
            IRI entity = targets.next();
            log.debug("    -  target :" + entity);
            IRI targetType = EnhancementEngineHelper.getReference(results, entity, ZemantaOntologyEnum.targetType.getUri());
            log.debug("       o type :" + targetType);
            if (ZemantaOntologyEnum.targetType_RDF.getUri().equals(targetType)) {
                String targetTitle = EnhancementEngineHelper.getString(results, entity, ZemantaOntologyEnum.title.getUri());
                log.debug("       o title :" + targetTitle);
                if (sameAsSet.contains(entity)) {
                    if (title == null) {
                        title = targetTitle;
                    } else if (!title.equals(targetTitle)) {
                        log.warn("Entities marked with owl:sameAs do use different labels '" + title + "' != '" + targetTitle + "'!");
                    }
                //else the same label used by both -> thats expected
                } else {
                    //maybe we should create an second entityEnhancement, but I think, that such a case should
                    //not happen. So write an warning for now
                    log.warn("Found Target with type RDF, that is not linked with owl:sameAs to the others (this: '" + entity + " | sameAs: " + sameAsSet + ")");
                    log.warn("  - no Enhancement for " + entity + " will be created");
                }
            }
        //else -> do not process -> RDF Entities only
        //TODO: targetTypes are not parsed by Zemanta, therefore we can not set
        //      any entity types!
        }
        //create the entityEnhancement
        IRI entityEnhancement = EnhancementEngineHelper.createEntityEnhancement(enhancements, this, ciId);
        if (confidence != null) {
            enhancements.add(new TripleImpl(entityEnhancement, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(confidence)));
        }
        for (BlankNodeOrIRI relatedTextAnnotation : textAnnotations) {
            enhancements.add(new TripleImpl(entityEnhancement, DC_RELATION, relatedTextAnnotation));
        }
        for (IRI entity : sameAsSet) {
            enhancements.add(new TripleImpl(entityEnhancement, ENHANCER_ENTITY_REFERENCE, entity));
        }
        enhancements.add(new TripleImpl(entityEnhancement, ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(title)));
    }
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) HashSet(java.util.HashSet)

Example 89 with BlankNodeOrIRI

use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.

the class ZemantaEnhancementEngine method processTextAnnotation.

/**
     * This Methods searches/creates text annotations for anchor points of Zemanta
     * extractions.
     * <p>
     * First this method searches for text annotations that do use the anchor as
     * selected text. Second it searches for occurrences of the anchor within the
     * content of the content and checks if there is an text annotation for that
     * occurrence. If not it creates an new one.
     *
     * @param enhancements the graph containing the meta data
     * @param text         the content as string
     * @param ciId         the ID of the content item
     * @param anchor       the anchor text
     * @param confidence   the confidence to be used for newly created text annotations
     *
     * @return a collection of all existing/created text annotations for the parsed anchor
     */
private Collection<BlankNodeOrIRI> processTextAnnotation(Graph enhancements, String text, IRI ciId, String anchor, Double confidence) {
    Collection<BlankNodeOrIRI> textAnnotations = new ArrayList<BlankNodeOrIRI>();
    int anchorLength = anchor.length();
    Literal anchorLiteral = new PlainLiteralImpl(anchor);
    //first search for existing TextAnnotations for the anchor
    Map<Integer, Collection<BlankNodeOrIRI>> existingTextAnnotationsMap = searchExistingTextAnnotations(enhancements, anchorLiteral);
    for (int current = text.indexOf(anchor); current >= 0; current = text.indexOf(anchor, current + 1)) {
        Collection<BlankNodeOrIRI> existingTextAnnotations = existingTextAnnotationsMap.get(current);
        if (existingTextAnnotations != null) {
            //use the existing once
            textAnnotations.addAll(existingTextAnnotations);
        } else {
            //we need to create an new one!
            IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(enhancements, this, ciId);
            textAnnotations.add(textAnnotation);
            //write the selection
            enhancements.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(current)));
            enhancements.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(current + anchorLength)));
            enhancements.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, anchorLiteral));
            //extract the selection context
            int beginPos;
            if (current <= SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE) {
                beginPos = 0;
            } else {
                int start = current - SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
                beginPos = text.indexOf(' ', start);
                if (beginPos < 0 || beginPos >= current) {
                    //no words
                    //begin within a word
                    beginPos = start;
                }
            }
            int endPos;
            if (current + anchorLength + SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= text.length()) {
                endPos = text.length();
            } else {
                int start = current + anchorLength + SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
                endPos = text.lastIndexOf(' ', start);
                if (endPos <= current + anchorLength) {
                    //end within a word;
                    endPos = start;
                }
            }
            enhancements.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(text.substring(beginPos, endPos))));
            //      related to the annotated Entity rather to the selected text.
            if (confidence != null) {
                enhancements.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(confidence)));
            }
        //TODO: No idea about the type of the Annotation, because we do not have an type of the entity!
        //      One would need to get the types from the referred Source
        }
    }
    return textAnnotations;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Literal(org.apache.clerezza.commons.rdf.Literal) ArrayList(java.util.ArrayList) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Collection(java.util.Collection) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Aggregations

BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)89 Triple (org.apache.clerezza.commons.rdf.Triple)52 IRI (org.apache.clerezza.commons.rdf.IRI)41 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)30 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)27 HashSet (java.util.HashSet)24 Graph (org.apache.clerezza.commons.rdf.Graph)22 HashMap (java.util.HashMap)17 ArrayList (java.util.ArrayList)14 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)14 Literal (org.apache.clerezza.commons.rdf.Literal)13 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)12 Lock (java.util.concurrent.locks.Lock)10 BlankNode (org.apache.clerezza.commons.rdf.BlankNode)10 EnhancementEngineHelper.getString (org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getString)8 Test (org.junit.Test)8 Collection (java.util.Collection)7 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)7 Language (org.apache.clerezza.commons.rdf.Language)6 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)6