Search in sources :

Example 61 with BlankNodeOrIRI

use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.

the class NamedEntity method createFromTextAnnotation.

/**
     * Extracts the information of an {@link NamedEntity} from an
     * {@link TechnicalClasses#ENHANCER_TEXTANNOTATION} instance.
     * @param graph the graph with the information
     * @param textAnnotation the text annotation instance
     * @return the {@link NamedEntity} or <code>null</code> if the parsed
     * text annotation is missing required information.
     */
public static NamedEntity createFromTextAnnotation(Graph graph, BlankNodeOrIRI textAnnotation) {
    String selected = EnhancementEngineHelper.getString(graph, textAnnotation, ENHANCER_SELECTED_TEXT);
    if (selected == null) {
        log.debug("Unable to create NamedEntity for TextAnnotation {} " + "because property {} is not present", textAnnotation, ENHANCER_SELECTED_TEXT);
        return null;
    }
    String name = selected.trim();
    if (name.isEmpty()) {
        log.debug("Unable to process TextAnnotation {} because its selects " + "an empty Stirng !", textAnnotation);
        return null;
    }
    // remove punctuation form the search string
    name = cleanupKeywords(name);
    if (name.isEmpty()) {
        log.debug("Unable to process TextAnnotation {} because its selects " + "an stirng with punktations only (selected: {})!", textAnnotation, selected);
        return null;
    }
    IRI type = EnhancementEngineHelper.getReference(graph, textAnnotation, DC_TYPE);
    if (type == null) {
        log.warn("Unable to process TextAnnotation {} because property {}" + " is not present!", textAnnotation, DC_TYPE);
        return null;
    }
    return new NamedEntity(textAnnotation, name, type);
}
Also used : BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) IRI(org.apache.clerezza.commons.rdf.IRI)

Example 62 with BlankNodeOrIRI

use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.

the class NamedEntityTaggingEngine method computeEnhancements.

public void computeEnhancements(ContentItem ci) throws EngineException {
    final Site site;
    if (referencedSiteID != null) {
        // lookup the referenced site
        site = siteManager.getSite(referencedSiteID);
        // ensure that it is present
        if (site == null) {
            String msg = String.format("Unable to enhance %s because Referenced Site %s is currently not active!", ci.getUri().getUnicodeString(), referencedSiteID);
            log.warn(msg);
            // throw new EngineException(msg);
            return;
        }
        // and that it supports offline mode if required
        if (isOfflineMode() && !site.supportsLocalMode()) {
            log.warn("Unable to enhance ci {} because OfflineMode is not supported by ReferencedSite {}.", ci.getUri().getUnicodeString(), site.getId());
            return;
        }
    } else {
        // null indicates to use the Entityhub to lookup Entities
        site = null;
    }
    Graph graph = ci.getMetadata();
    LiteralFactory literalFactory = LiteralFactory.getInstance();
    // Retrieve the existing text annotations (requires read lock)
    Map<NamedEntity, List<IRI>> textAnnotations = new HashMap<NamedEntity, List<IRI>>();
    // the language extracted for the parsed content or NULL if not
    // available
    String contentLangauge;
    ci.getLock().readLock().lock();
    try {
        contentLangauge = EnhancementEngineHelper.getLanguage(ci);
        for (Iterator<Triple> it = graph.filter(null, RDF_TYPE, TechnicalClasses.ENHANCER_TEXTANNOTATION); it.hasNext(); ) {
            IRI uri = (IRI) it.next().getSubject();
            if (graph.filter(uri, Properties.DC_RELATION, null).hasNext()) {
                // skip
                continue;
            }
            NamedEntity namedEntity = NamedEntity.createFromTextAnnotation(graph, uri);
            if (namedEntity != null) {
                // This is a first occurrence, collect any subsumed
                // annotations
                List<IRI> subsumed = new ArrayList<IRI>();
                for (Iterator<Triple> it2 = graph.filter(null, Properties.DC_RELATION, uri); it2.hasNext(); ) {
                    subsumed.add((IRI) it2.next().getSubject());
                }
                textAnnotations.put(namedEntity, subsumed);
            }
        }
    } finally {
        ci.getLock().readLock().unlock();
    }
    // search the suggestions
    Map<NamedEntity, List<Suggestion>> suggestions = new HashMap<NamedEntity, List<Suggestion>>(textAnnotations.size());
    for (Entry<NamedEntity, List<IRI>> entry : textAnnotations.entrySet()) {
        try {
            List<Suggestion> entitySuggestions = computeEntityRecommentations(site, entry.getKey(), entry.getValue(), contentLangauge);
            if (entitySuggestions != null && !entitySuggestions.isEmpty()) {
                suggestions.put(entry.getKey(), entitySuggestions);
            }
        } catch (EntityhubException e) {
            throw new EngineException(this, ci, e);
        }
    }
    // now write the results (requires write lock)
    ci.getLock().writeLock().lock();
    try {
        RdfValueFactory factory = RdfValueFactory.getInstance();
        Map<String, Representation> entityData = new HashMap<String, Representation>();
        for (Entry<NamedEntity, List<Suggestion>> entitySuggestions : suggestions.entrySet()) {
            List<IRI> subsumed = textAnnotations.get(entitySuggestions.getKey());
            List<BlankNodeOrIRI> annotationsToRelate = new ArrayList<BlankNodeOrIRI>(subsumed);
            annotationsToRelate.add(entitySuggestions.getKey().getEntity());
            for (Suggestion suggestion : entitySuggestions.getValue()) {
                log.debug("Add Suggestion {} for {}", suggestion.getEntity().getId(), entitySuggestions.getKey());
                EnhancementRDFUtils.writeEntityAnnotation(this, literalFactory, graph, ci.getUri(), annotationsToRelate, suggestion, nameField, // header)?!
                contentLangauge == null ? DEFAULT_LANGUAGE : contentLangauge);
                if (dereferenceEntities) {
                    entityData.put(suggestion.getEntity().getId(), suggestion.getEntity().getRepresentation());
                }
            }
        }
        // Representations to add! If false entityData will be empty
        for (Representation rep : entityData.values()) {
            graph.addAll(factory.toRdfRepresentation(rep).getRdfGraph());
        }
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : Site(org.apache.stanbol.entityhub.servicesapi.site.Site) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) List(java.util.List) ArrayList(java.util.ArrayList) QueryResultList(org.apache.stanbol.entityhub.servicesapi.query.QueryResultList) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) Triple(org.apache.clerezza.commons.rdf.Triple) Graph(org.apache.clerezza.commons.rdf.Graph) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) RdfValueFactory(org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory)

Example 63 with BlankNodeOrIRI

use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.

the class ExecutionMetadataHelper method createChainExecutionNode.

public static BlankNodeOrIRI createChainExecutionNode(Graph graph, BlankNodeOrIRI executionPlan, IRI ciUri, boolean defaultChain) {
    BlankNodeOrIRI node = new BlankNode();
    graph.add(new TripleImpl(node, RDF_TYPE, EXECUTION));
    graph.add(new TripleImpl(node, RDF_TYPE, CHAIN_EXECUTION));
    graph.add(new TripleImpl(node, ENHANCES, ciUri));
    graph.add(new TripleImpl(ciUri, ENHANCED_BY, node));
    graph.add(new TripleImpl(node, STATUS, STATUS_SCHEDULED));
    graph.add(new TripleImpl(node, EXECUTION_PLAN, executionPlan));
    graph.add(new TripleImpl(node, IS_DEFAULT_CHAIN, lf.createTypedLiteral(defaultChain)));
    return node;
}
Also used : BlankNode(org.apache.clerezza.commons.rdf.BlankNode) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 64 with BlankNodeOrIRI

use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.

the class ExecutionPlanHelper method validateExecutionPlan.

/**
     * Utility that checks if the parsed graph contains a valid execution
     * plan. This method is intended to be used by components that need to
     * ensure that an parsed graph contains a valid execution plan.<p>
     * This especially checks: <ul>
     * <li> if for all {@link ExecutionPlan#EXECUTION_NODE}s
     * <li> if they define a unary and valid value for the
     * {@link ExecutionPlan#ENGINE} property and
     * <li> if all {@link ExecutionPlan#DEPENDS_ON} values do actually point
     * to an other execution node in the parsed graph
     * <ul><p>
     * This method does not modify the parsed graph. Therefore it is save
     * to parse a {@link ImmutableGraph} object.<p>
     * TODO: There is no check for cycles implemented yet.
     * @param the graph to check
     * @return the engine names referenced by the validated execution plan-
     * @throws ChainException
     */
public static Set<String> validateExecutionPlan(Graph executionPlan) throws ChainException {
    Iterator<Triple> executionNodeIt = executionPlan.filter(null, RDF_TYPE, EXECUTION_NODE);
    Set<String> engineNames = new HashSet<String>();
    Map<BlankNodeOrIRI, Collection<BlankNodeOrIRI>> nodeDependencies = new HashMap<BlankNodeOrIRI, Collection<BlankNodeOrIRI>>();
    //1. check the ExecutionNodes
    while (executionNodeIt.hasNext()) {
        BlankNodeOrIRI node = executionNodeIt.next().getSubject();
        Iterator<String> engines = EnhancementEngineHelper.getStrings(executionPlan, node, ENGINE);
        if (!engines.hasNext()) {
            throw new ChainException("Execution Node " + node + " does not define " + "the required property " + ENGINE + "!");
        }
        String engine = engines.next();
        if (engines.hasNext()) {
            throw new ChainException("Execution Node " + node + " does not define " + "multiple values for the property " + ENGINE + "!");
        }
        if (engine.isEmpty()) {
            throw new ChainException("Execution Node " + node + " does not define " + "an empty String as engine name (property " + ENGINE + ")!");
        }
        engineNames.add(engine);
        Collection<BlankNodeOrIRI> dependsOn = new HashSet<BlankNodeOrIRI>();
        for (Iterator<Triple> t = executionPlan.filter(node, DEPENDS_ON, null); t.hasNext(); ) {
            RDFTerm o = t.next().getObject();
            if (o instanceof BlankNodeOrIRI) {
                dependsOn.add((BlankNodeOrIRI) o);
            } else {
                throw new ChainException("Execution Node " + node + " defines the literal '" + o + "' as value for the " + DEPENDS_ON + " property. However this" + "property requires values to be bNodes or URIs.");
            }
        }
        nodeDependencies.put(node, dependsOn);
    }
    //2. now check the dependency graph
    for (Entry<BlankNodeOrIRI, Collection<BlankNodeOrIRI>> entry : nodeDependencies.entrySet()) {
        if (entry.getValue() != null) {
            for (BlankNodeOrIRI dependent : entry.getValue()) {
                if (!nodeDependencies.containsKey(dependent)) {
                    throw new ChainException("Execution Node " + entry.getKey() + " defines a dependency to an non existent ex:ExectutionNode " + dependent + "!");
                }
            //else the dependency is valid
            }
        }
    //no dependencies
    }
    //done ... the parsed graph survived all consistency checks :)
    return engineNames;
}
Also used : HashMap(java.util.HashMap) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) EnhancementEngineHelper.getString(org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getString) Triple(org.apache.clerezza.commons.rdf.Triple) Collection(java.util.Collection) ChainException(org.apache.stanbol.enhancer.servicesapi.ChainException) HashSet(java.util.HashSet)

Example 65 with BlankNodeOrIRI

use of org.apache.clerezza.commons.rdf.BlankNodeOrIRI in project stanbol by apache.

the class ExecutionPlanHelper method writeExecutionNode.

/**
     * Writes all triples for an ep:ExecutionNode to the parsed {@link Graph}.
     * An {@link BlankNode} is use for representing the execution node resource.
     * @param graph the graph to write the triples. MUST NOT be empty
     * @param epNode the BlankNodeOrIRI representing the ep:ExecutionPlan
     * @param engineName the name of the engine. MUST NOT be <code>null</code> nor empty
     * @param optional if the execution of this node is optional or required
     * @param dependsOn other nodes that MUST BE executed before this one. Parse 
     * <code>null</code> or an empty set if none.
     * @param enhProps the EnhancementProperties for this ExecutionNode or
     * <code>null</code> if none
     * @return the resource representing the added ep:ExecutionNode.
     * @since 0.12.1
     */
public static BlankNodeOrIRI writeExecutionNode(Graph graph, BlankNodeOrIRI epNode, String engineName, boolean optional, Set<BlankNodeOrIRI> dependsOn, Map<String, Object> enhProps) {
    if (graph == null) {
        throw new IllegalArgumentException("The parsed Graph MUST NOT be NULL!");
    }
    if (engineName == null || engineName.isEmpty()) {
        throw new IllegalArgumentException("The parsed Engine name MUST NOT be NULL nor empty!");
    }
    if (epNode == null) {
        throw new IllegalArgumentException("The ep:ExecutionPlan instance MUST NOT be NULL!");
    }
    BlankNodeOrIRI node = new BlankNode();
    graph.add(new TripleImpl(epNode, HAS_EXECUTION_NODE, node));
    graph.add(new TripleImpl(node, RDF_TYPE, EXECUTION_NODE));
    graph.add(new TripleImpl(node, ENGINE, new PlainLiteralImpl(engineName)));
    if (dependsOn != null) {
        for (BlankNodeOrIRI dependend : dependsOn) {
            if (dependend != null) {
                graph.add(new TripleImpl(node, DEPENDS_ON, dependend));
            }
        }
    }
    graph.add(new TripleImpl(node, OPTIONAL, lf.createTypedLiteral(optional)));
    writeEnhancementProperties(graph, node, engineName, enhProps);
    return node;
}
Also used : PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Aggregations

BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)89 Triple (org.apache.clerezza.commons.rdf.Triple)52 IRI (org.apache.clerezza.commons.rdf.IRI)41 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)30 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)27 HashSet (java.util.HashSet)24 Graph (org.apache.clerezza.commons.rdf.Graph)22 HashMap (java.util.HashMap)17 ArrayList (java.util.ArrayList)14 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)14 Literal (org.apache.clerezza.commons.rdf.Literal)13 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)12 Lock (java.util.concurrent.locks.Lock)10 BlankNode (org.apache.clerezza.commons.rdf.BlankNode)10 EnhancementEngineHelper.getString (org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getString)8 Test (org.junit.Test)8 Collection (java.util.Collection)7 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)7 Language (org.apache.clerezza.commons.rdf.Language)6 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)6