Search in sources :

Example 1 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class FstLinkingEngine method writeEnhancements.

/**
 * Writes the Enhancements for the {@link LinkedEntity LinkedEntities}
 * extracted from the parsed ContentItem
 * @param ci
 * @param tags
 * @param language
 */
private void writeEnhancements(ContentItem ci, String text, Collection<Tag> tags, String language, boolean writeRankings) {
    Language languageObject = null;
    if (language != null && !language.isEmpty()) {
        languageObject = new Language(language);
    }
    Graph metadata = ci.getMetadata();
    for (Tag tag : tags) {
        Collection<IRI> textAnnotations = new ArrayList<IRI>(tags.size());
        // first create the TextAnnotations for the Occurrences
        Literal startLiteral = literalFactory.createTypedLiteral(tag.getStart());
        Literal endLiteral = literalFactory.createTypedLiteral(tag.getEnd());
        // search for existing text annotation
        Iterator<Triple> it = metadata.filter(null, ENHANCER_START, startLiteral);
        IRI textAnnotation = null;
        while (it.hasNext()) {
            Triple t = it.next();
            if (metadata.filter(t.getSubject(), ENHANCER_END, endLiteral).hasNext() && metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()) {
                textAnnotation = (IRI) t.getSubject();
                break;
            }
        }
        if (textAnnotation == null) {
            // not found ... create a new one
            textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, startLiteral));
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, endLiteral));
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, tag.getAnchor(), tag.getStart()), languageObject)));
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(tag.getAnchor(), languageObject)));
            metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(tag.getScore())));
        } else {
            // if existing add this engine as contributor
            metadata.add(new TripleImpl(textAnnotation, DC_CONTRIBUTOR, new PlainLiteralImpl(this.getClass().getName())));
        }
        // add dc:types (even to existing)
        for (IRI dcType : getDcTypes(tag.getSuggestions())) {
            metadata.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, dcType));
        }
        textAnnotations.add(textAnnotation);
        // now the EntityAnnotations for the Suggestions
        for (Match match : tag.getSuggestions()) {
            IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            // should we use the label used for the match, or search the
            // representation for the best label ... currently its the matched one
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_LABEL, match.getMatchLabel()));
            metadata.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, new IRI(match.getUri())));
            for (IRI type : match.getTypes()) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_TYPE, type));
            }
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(match.getScore())));
            // add the relation to the fise:TextAnnotation (the tag)
            metadata.add(new TripleImpl(entityAnnotation, Properties.DC_RELATION, textAnnotation));
            // write origin information
            if (indexConfig.getOrigin() != null) {
                metadata.add(new TripleImpl(entityAnnotation, FISE_ORIGIN, indexConfig.getOrigin()));
            }
            // }
            if (writeRankings) {
                Double ranking = match.getRanking();
                if (ranking != null) {
                    metadata.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_RANKING, literalFactory.createTypedLiteral(ranking)));
                }
            }
        // TODO: dereferencing
        // if(linkerConfig.isDereferenceEntitiesEnabled() &&
        // dereferencedEntitis.add(entity.getUri())){ //not yet dereferenced
        // //add all outgoing triples for this entity
        // //NOTE: do not add all triples as there might be other data in the graph
        // for(Iterator<Triple> triples = entity.getData().filter(entity.getUri(), null, null);
        // triples.hasNext();metadata.add(triples.next()));
        // }
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) ArrayList(java.util.ArrayList) Triple(org.apache.clerezza.commons.rdf.Triple) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) NlpEngineHelper.getLanguage(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage) Literal(org.apache.clerezza.commons.rdf.Literal) NerTag(org.apache.stanbol.enhancer.nlp.ner.NerTag) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 2 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class ClerezzaRDFUtils method urifyBlankNodes.

public static void urifyBlankNodes(Graph model) {
    HashMap<BlankNode, IRI> blankNodeMap = new HashMap<BlankNode, IRI>();
    Graph remove = new SimpleGraph();
    Graph add = new SimpleGraph();
    for (Triple t : model) {
        BlankNodeOrIRI subj = t.getSubject();
        RDFTerm obj = t.getObject();
        IRI pred = t.getPredicate();
        boolean match = false;
        if (subj instanceof BlankNode) {
            match = true;
            IRI ru = blankNodeMap.get(subj);
            if (ru == null) {
                ru = createRandomUri();
                blankNodeMap.put((BlankNode) subj, ru);
            }
            subj = ru;
        }
        if (obj instanceof BlankNode) {
            match = true;
            IRI ru = blankNodeMap.get(obj);
            if (ru == null) {
                ru = createRandomUri();
                blankNodeMap.put((BlankNode) obj, ru);
            }
            obj = ru;
        }
        if (match) {
            remove.add(t);
            add.add(new TripleImpl(subj, pred, obj));
        }
    }
    model.removeAll(remove);
    model.addAll(add);
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 3 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class ClerezzaRDFUtils method findRoot.

private static void findRoot(Graph model, BlankNodeOrIRI node, Set<BlankNodeOrIRI> roots, Set<BlankNodeOrIRI> visited) {
    if (visited.contains(node)) {
        return;
    }
    visited.add(node);
    Iterator<Triple> it = model.filter(null, null, node);
    // something that is not the object of some stement is a root
    if (!it.hasNext()) {
        roots.add(node);
        LOG.debug("Root found: {}", node);
        return;
    }
    while (it.hasNext()) {
        Triple t = it.next();
        BlankNodeOrIRI subj = t.getSubject();
        findRoot(model, subj, roots, visited);
    }
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI)

Example 4 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class ClerezzaModelWriter method toRDF.

private Graph toRDF(QueryResultList<?> resultList) {
    final Graph resultGraph;
    Class<?> type = resultList.getType();
    if (String.class.isAssignableFrom(type)) {
        // create a new ImmutableGraph
        resultGraph = new IndexedGraph();
        for (Object result : resultList) {
            // add a triple to each reference in the result set
            resultGraph.add(new TripleImpl(QUERY_RESULT_LIST, QUERY_RESULT, new IRI(result.toString())));
        }
    } else {
        // first determine the type of the resultList
        final boolean isSignType;
        if (Representation.class.isAssignableFrom(type)) {
            isSignType = false;
        } else if (Representation.class.isAssignableFrom(type)) {
            isSignType = true;
        } else {
            // incompatible type -> throw an Exception
            throw new IllegalArgumentException("Parsed type " + type + " is not supported");
        }
        // special treatment for RdfQueryResultList for increased performance
        if (resultList instanceof RdfQueryResultList) {
            resultGraph = ((RdfQueryResultList) resultList).getResultGraph();
            if (isSignType) {
                // if we build a ResultList for Signs, that we need to do more things
                // first remove all triples representing results
                Iterator<Triple> resultTripleIt = resultGraph.filter(QUERY_RESULT_LIST, QUERY_RESULT, null);
                while (resultTripleIt.hasNext()) {
                    resultTripleIt.next();
                    resultTripleIt.remove();
                }
                // to the Sign IDs
                for (Object result : resultList) {
                    IRI signId = new IRI(((Entity) result).getId());
                    addEntityTriplesToGraph(resultGraph, (Entity) result);
                    resultGraph.add(new TripleImpl(QUERY_RESULT_LIST, QUERY_RESULT, signId));
                }
            }
        } else {
            // any other implementation of the QueryResultList interface
            // create a new graph
            resultGraph = new IndexedGraph();
            if (Representation.class.isAssignableFrom(type)) {
                for (Object result : resultList) {
                    IRI resultId;
                    if (!isSignType) {
                        addRDFTo(resultGraph, (Representation) result);
                        resultId = new IRI(((Representation) result).getId());
                    } else {
                        addRDFTo(resultGraph, (Entity) result);
                        resultId = new IRI(((Entity) result).getId());
                    }
                    // Note: In case of Representation this Triple points to
                    // the representation. In case of Signs it points to
                    // the sign.
                    resultGraph.add(new TripleImpl(QUERY_RESULT_LIST, QUERY_RESULT, resultId));
                }
            }
        }
    }
    return resultGraph;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Entity(org.apache.stanbol.entityhub.servicesapi.model.Entity) RdfRepresentation(org.apache.stanbol.entityhub.model.clerezza.RdfRepresentation) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) Triple(org.apache.clerezza.commons.rdf.Triple) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) RdfQueryResultList(org.apache.stanbol.entityhub.query.clerezza.RdfQueryResultList) JSONObject(org.codehaus.jettison.json.JSONObject) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph)

Example 5 with Triple

use of org.apache.clerezza.commons.rdf.Triple in project stanbol by apache.

the class ClerezzaYard method remove.

@Override
public void remove(String id) throws YardException, IllegalArgumentException {
    if (id == null) {
        throw new IllegalArgumentException("The parsed Representation id MUST NOT be NULL!");
    }
    IRI resource = new IRI(id);
    final Lock writeLock = writeLockGraph();
    try {
        Iterator<Triple> it = graph.filter(resource, null, null);
        while (it.hasNext()) {
            it.next();
            it.remove();
        }
    // if(isRepresentation(resource)){
    // graph.removeAll(createRepresentationGraph(resource, graph));
    // } //else not found  -> nothing to do
    } finally {
        writeLock.unlock();
    }
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Lock(java.util.concurrent.locks.Lock)

Aggregations

Triple (org.apache.clerezza.commons.rdf.Triple)151 IRI (org.apache.clerezza.commons.rdf.IRI)88 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)84 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)70 Graph (org.apache.clerezza.commons.rdf.Graph)45 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)41 HashSet (java.util.HashSet)34 Literal (org.apache.clerezza.commons.rdf.Literal)30 ArrayList (java.util.ArrayList)27 Lock (java.util.concurrent.locks.Lock)21 HashMap (java.util.HashMap)20 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)19 OWLOntologyID (org.semanticweb.owlapi.model.OWLOntologyID)19 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)15 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)12 Test (org.junit.Test)12 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)10 ImmutableGraph (org.apache.clerezza.commons.rdf.ImmutableGraph)9 GraphNode (org.apache.clerezza.rdf.utils.GraphNode)8 IOException (java.io.IOException)7