Search in sources :

Example 96 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class ZemantaEnhancementEngine method processCategories.

protected void processCategories(Graph results, Graph enhancements, IRI ciId) {
    Iterator<Triple> categories = results.filter(null, RDF_TYPE, ZemantaOntologyEnum.Category.getUri());
    // add the root Text annotation as soon as the first TopicAnnotation is added.
    IRI textAnnotation = null;
    while (categories.hasNext()) {
        BlankNodeOrIRI category = categories.next().getSubject();
        log.debug("process category " + category);
        Double confidence = parseConfidence(results, category);
        log.debug(" > confidence :" + confidence);
        // now we need to follow the Target link
        IRI target = EnhancementEngineHelper.getReference(results, category, ZemantaOntologyEnum.target.getUri());
        if (target != null) {
            // first check the used categorisation
            IRI categorisationScheme = EnhancementEngineHelper.getReference(results, target, ZemantaOntologyEnum.categorization.getUri());
            if (categorisationScheme != null && categorisationScheme.equals(ZemantaOntologyEnum.categorization_DMOZ.getUri())) {
                String categoryTitle = EnhancementEngineHelper.getString(results, target, ZemantaOntologyEnum.title.getUri());
                if (categoryTitle != null) {
                    if (textAnnotation == null) {
                        // this is the first category ... create the TextAnnotation used
                        // to link all fise:TopicAnnotations
                        textAnnotation = createTextEnhancement(enhancements, this, ciId);
                        enhancements.add(new TripleImpl(textAnnotation, DC_TYPE, SKOS_CONCEPT));
                    }
                    // now write the TopicAnnotation
                    IRI categoryEnhancement = createTopicEnhancement(enhancements, this, ciId);
                    // make related to the EntityAnnotation
                    enhancements.add(new TripleImpl(categoryEnhancement, DC_RELATION, textAnnotation));
                    // write the title
                    enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(categoryTitle)));
                    // write the reference
                    if (categoryTitle.startsWith(ZEMANTA_DMOZ_PREFIX)) {
                        enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_REFERENCE, new IRI(DMOZ_BASE_URL + categoryTitle.substring(ZEMANTA_DMOZ_PREFIX.length()))));
                    }
                    // write the confidence
                    if (confidence != null) {
                        enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(confidence)));
                    }
                    // we need to write the fise:entity-type
                    // as of STANBOL-617 we use now both the zemanta:Category AND the skos:Concept
                    // type. dc:type is no longer used as this is only used by fise:TextAnnotations
                    // see http://wiki.iks-project.eu/index.php/ZemantaEnhancementEngine#Mapping_of_Categories
                    // for more Information
                    enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_TYPE, SKOS_CONCEPT));
                    // Use also Zemanta Category as type for the referred Entity
                    enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_TYPE, ZemantaOntologyEnum.Category.getUri()));
                } else {
                    log.warn("Unable to process category " + category + " because no title is present");
                }
            } else {
                log.warn("Unable to process category " + category + " because categorisation scheme != DMOZ (" + categorisationScheme + " != " + ZemantaOntologyEnum.categorization_DMOZ.getUri() + ")");
            }
        } else {
            log.warn("Unable to process category " + category + " because no target node was found");
        }
    }
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 97 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class ZemantaEnhancementEngine method computeEnhancements.

public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with a supported Mime Type" + "found for ContentItem " + ci.getUri() + "(supported: '" + SUPPORTED_MIMETYPES + "') -> this indicates that canEnhance was" + "NOT called and indicates a bug in the used EnhancementJobManager!");
    }
    String text;
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(this, ci, e);
    }
    if (text.trim().length() == 0) {
        log.warn("ContentPart {} of ContentItem {} does not contain any text to enhance", contentPart.getKey(), ci.getUri());
        return;
    }
    Graph graph = ci.getMetadata();
    IRI ciId = ci.getUri();
    // we need to store the results of Zemanta in an temp graph
    Graph results = new SimpleGraph();
    ZemantaAPIWrapper zemanta = new ZemantaAPIWrapper(key);
    try {
        results.addAll(zemanta.enhance(text));
    } catch (IOException e) {
        throw new EngineException("Unable to get Enhancement from remote Zemanta Service", e);
    }
    // now we need to process the results and convert them into the Enhancer
    // annotation structure
    ci.getLock().writeLock().lock();
    try {
        processRecognition(results, graph, text, ciId);
        processCategories(results, graph, ciId);
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) IOException(java.io.IOException)

Example 98 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class TripleMatcherGroupImpl method getMatchingSubjects.

@Override
public Set<IRI> getMatchingSubjects(ImmutableGraph g) {
    if (matchers.isEmpty()) {
        return new HashSet<IRI>();
    }
    // For all matchers, find the set of subjects that match
    // and compute the intersection of those sets
    Set<IRI> intersection = null;
    for (TripleMatcher m : matchers) {
        final Set<IRI> s = new HashSet<IRI>();
        final Iterator<Triple> it = g.iterator();
        while (it.hasNext()) {
            final Triple t = it.next();
            if (m.matches(t)) {
                final BlankNodeOrIRI n = t.getSubject();
                if (n instanceof IRI) {
                    s.add((IRI) n);
                } else {
                // TODO do we need to handle non-IRI subjects?
                }
            }
        }
        if (intersection == null) {
            intersection = s;
        } else {
            intersection.retainAll(s);
        }
    }
    return intersection;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) IRI(org.apache.clerezza.commons.rdf.IRI) TripleMatcher(org.apache.stanbol.enhancer.benchmark.TripleMatcher) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) HashSet(java.util.HashSet)

Example 99 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class JenaAdapter method main.

public static void main(String[] args) {
    RuleAdapter ruleAdapter = new JenaAdapter();
    try {
        KB kb = RuleParserImpl.parse("http://sssw.org/2012/rules/", new FileInputStream("/Users/mac/Documents/CNR/SSSW2012/rules/exercise1"));
        System.out.println("Rules: " + kb.getRuleList().size());
        Recipe recipe = new RecipeImpl(new IRI("http://sssw.org/2012/rules/"), "Recipe", kb.getRuleList());
        List<com.hp.hpl.jena.reasoner.rulesys.Rule> jenaRules = (List<com.hp.hpl.jena.reasoner.rulesys.Rule>) ruleAdapter.adaptTo(recipe, com.hp.hpl.jena.reasoner.rulesys.Rule.class);
        String rules = "[ Exercise1: (http://dbpedia.org/resource/Madrid http://dbpedia.org/ontology/locationOf ?location) (?location rdf:type http://dbpedia.org/ontology/Museum) (?location http://dbpedia.org/ontology/numberOfVisitors ?visitors) greaterThan(?visitors '2000000'^^http://www.w3.org/2001/XMLSchema#integer) -> (?location rdf:type http://www.mytravels.com/Itinerary/MadridItinerary) ]";
        // List<com.hp.hpl.jena.reasoner.rulesys.Rule> jenaRules = com.hp.hpl.jena.reasoner.rulesys.Rule.parseRules(rules);
        for (com.hp.hpl.jena.reasoner.rulesys.Rule jenaRule : jenaRules) {
            System.out.println(jenaRule.toString());
        }
        Model m = ModelFactory.createDefaultModel();
        Resource configuration = m.createResource();
        configuration.addProperty(ReasonerVocabulary.PROPruleMode, "hybrid");
        // Model model = FileManager.get().loadModel("/Users/mac/Documents/workspaceMyStanbol/sssw2012/events.rdf");
        Model model = FileManager.get().loadModel("/Users/mac/Documents/CNR/SSSW2012/datasets_new/Exercise1.rdf");
        // GenericRuleReasoner reasoner = new GenericRuleReasoner(jenaRules);
        // GenericRuleReasoner reasoner = new GenericRuleReasoner(com.hp.hpl.jena.reasoner.rulesys.Rule.parseRules(rules));
        GenericRuleReasoner reasoner = new GenericRuleReasoner(jenaRules);
        // not needed in RDFS case
        reasoner.setOWLTranslation(true);
        reasoner.setTransitiveClosureCaching(true);
        InfModel infModel = ModelFactory.createInfModel(reasoner, model);
        infModel.prepare();
        infModel.getDeductionsModel().write(System.out);
        // String sparql = "select * where {?s a <http://www.mytravels.com/Itinerary/MovieCityMuseums> }";
        // String sparql = "select * where {?s a <http://www.mytravels.com/Itinerary/CityEventItinerary> }";
        String sparql = "select * where {?s a <http://www.mytravels.com/Itinerary/MadridItinerary> }";
        // String sparql = "select * where {?s a <http://linkedevents.org/ontology/cazzo> }";
        // String sparql = "select * where {?s a <http://www.mytravels.com/Itinerary/MovieCityItinerary> }";
        Query query = QueryFactory.create(sparql, Syntax.syntaxARQ);
        QueryExecution queryExecution = QueryExecutionFactory.create(query, infModel);
        com.hp.hpl.jena.query.ResultSet resultSet = queryExecution.execSelect();
        ResultSetFormatter.out(System.out, resultSet);
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (RuleAtomCallExeption e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (UnavailableRuleObjectException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (UnsupportedTypeForExportException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Query(com.hp.hpl.jena.query.Query) Recipe(org.apache.stanbol.rules.base.api.Recipe) FileNotFoundException(java.io.FileNotFoundException) InfModel(com.hp.hpl.jena.rdf.model.InfModel) QueryExecution(com.hp.hpl.jena.query.QueryExecution) UnsupportedTypeForExportException(org.apache.stanbol.rules.base.api.UnsupportedTypeForExportException) KB(org.apache.stanbol.rules.manager.KB) List(java.util.List) ArrayList(java.util.ArrayList) AtomList(org.apache.stanbol.rules.base.api.util.AtomList) RuleList(org.apache.stanbol.rules.base.api.util.RuleList) RuleAtomCallExeption(org.apache.stanbol.rules.base.api.RuleAtomCallExeption) Resource(com.hp.hpl.jena.rdf.model.Resource) UnavailableRuleObjectException(org.apache.stanbol.rules.base.api.UnavailableRuleObjectException) FileInputStream(java.io.FileInputStream) RecipeImpl(org.apache.stanbol.rules.manager.RecipeImpl) InfModel(com.hp.hpl.jena.rdf.model.InfModel) Model(com.hp.hpl.jena.rdf.model.Model) GenericRuleReasoner(com.hp.hpl.jena.reasoner.rulesys.GenericRuleReasoner) Rule(org.apache.stanbol.rules.base.api.Rule) AbstractRuleAdapter(org.apache.stanbol.rules.adapters.AbstractRuleAdapter) RuleAdapter(org.apache.stanbol.rules.base.api.RuleAdapter)

Example 100 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class IsBlankAtom method adapt.

@SuppressWarnings("unchecked")
@Override
public <T> T adapt(RuleAtom ruleAtom) throws RuleAtomCallExeption, UnavailableRuleObjectException, UnsupportedTypeForExportException {
    org.apache.stanbol.rules.manager.atoms.IsBlankAtom tmp = (org.apache.stanbol.rules.manager.atoms.IsBlankAtom) ruleAtom;
    IObjectAtom uriResource = tmp.getUriResource();
    ClerezzaSparqlObject argumentCSO = (ClerezzaSparqlObject) adapter.adaptTo(uriResource, ConstructQuery.class);
    ;
    Object arg = argumentCSO.getClerezzaObject();
    Expression argumentExpression;
    if (arg instanceof Variable) {
        argumentExpression = (Variable) arg;
    } else if (arg instanceof IRI) {
        argumentExpression = new UriRefExpression((IRI) arg);
    } else {
        throw new RuleAtomCallExeption(getClass());
    }
    List<Expression> expressions = new ArrayList<Expression>();
    expressions.add(argumentExpression);
    return (T) new ClerezzaSparqlObject(new BuiltInCall("isBLANK", expressions));
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Variable(org.apache.clerezza.rdf.core.sparql.query.Variable) ArrayList(java.util.ArrayList) BuiltInCall(org.apache.clerezza.rdf.core.sparql.query.BuiltInCall) UriRefExpression(org.apache.clerezza.rdf.core.sparql.query.UriRefExpression) IObjectAtom(org.apache.stanbol.rules.manager.atoms.IObjectAtom) ConstructQuery(org.apache.clerezza.rdf.core.sparql.query.ConstructQuery) UriRefExpression(org.apache.clerezza.rdf.core.sparql.query.UriRefExpression) Expression(org.apache.clerezza.rdf.core.sparql.query.Expression) ClerezzaSparqlObject(org.apache.stanbol.rules.adapters.clerezza.ClerezzaSparqlObject) ClerezzaSparqlObject(org.apache.stanbol.rules.adapters.clerezza.ClerezzaSparqlObject) RuleAtomCallExeption(org.apache.stanbol.rules.base.api.RuleAtomCallExeption)

Aggregations

IRI (org.apache.clerezza.commons.rdf.IRI)346 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)113 Graph (org.apache.clerezza.commons.rdf.Graph)109 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)104 Triple (org.apache.clerezza.commons.rdf.Triple)88 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)84 Test (org.junit.Test)78 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)58 HashSet (java.util.HashSet)50 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)46 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)39 HashMap (java.util.HashMap)38 IOException (java.io.IOException)37 ArrayList (java.util.ArrayList)37 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)36 Literal (org.apache.clerezza.commons.rdf.Literal)35 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)31 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)29 Recipe (org.apache.stanbol.rules.base.api.Recipe)29 Language (org.apache.clerezza.commons.rdf.Language)24