use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class ZemantaEnhancementEngine method processCategories.
protected void processCategories(Graph results, Graph enhancements, IRI ciId) {
Iterator<Triple> categories = results.filter(null, RDF_TYPE, ZemantaOntologyEnum.Category.getUri());
// add the root Text annotation as soon as the first TopicAnnotation is added.
IRI textAnnotation = null;
while (categories.hasNext()) {
BlankNodeOrIRI category = categories.next().getSubject();
log.debug("process category " + category);
Double confidence = parseConfidence(results, category);
log.debug(" > confidence :" + confidence);
// now we need to follow the Target link
IRI target = EnhancementEngineHelper.getReference(results, category, ZemantaOntologyEnum.target.getUri());
if (target != null) {
// first check the used categorisation
IRI categorisationScheme = EnhancementEngineHelper.getReference(results, target, ZemantaOntologyEnum.categorization.getUri());
if (categorisationScheme != null && categorisationScheme.equals(ZemantaOntologyEnum.categorization_DMOZ.getUri())) {
String categoryTitle = EnhancementEngineHelper.getString(results, target, ZemantaOntologyEnum.title.getUri());
if (categoryTitle != null) {
if (textAnnotation == null) {
// this is the first category ... create the TextAnnotation used
// to link all fise:TopicAnnotations
textAnnotation = createTextEnhancement(enhancements, this, ciId);
enhancements.add(new TripleImpl(textAnnotation, DC_TYPE, SKOS_CONCEPT));
}
// now write the TopicAnnotation
IRI categoryEnhancement = createTopicEnhancement(enhancements, this, ciId);
// make related to the EntityAnnotation
enhancements.add(new TripleImpl(categoryEnhancement, DC_RELATION, textAnnotation));
// write the title
enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(categoryTitle)));
// write the reference
if (categoryTitle.startsWith(ZEMANTA_DMOZ_PREFIX)) {
enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_REFERENCE, new IRI(DMOZ_BASE_URL + categoryTitle.substring(ZEMANTA_DMOZ_PREFIX.length()))));
}
// write the confidence
if (confidence != null) {
enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(confidence)));
}
// we need to write the fise:entity-type
// as of STANBOL-617 we use now both the zemanta:Category AND the skos:Concept
// type. dc:type is no longer used as this is only used by fise:TextAnnotations
// see http://wiki.iks-project.eu/index.php/ZemantaEnhancementEngine#Mapping_of_Categories
// for more Information
enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_TYPE, SKOS_CONCEPT));
// Use also Zemanta Category as type for the referred Entity
enhancements.add(new TripleImpl(categoryEnhancement, ENHANCER_ENTITY_TYPE, ZemantaOntologyEnum.Category.getUri()));
} else {
log.warn("Unable to process category " + category + " because no title is present");
}
} else {
log.warn("Unable to process category " + category + " because categorisation scheme != DMOZ (" + categorisationScheme + " != " + ZemantaOntologyEnum.categorization_DMOZ.getUri() + ")");
}
} else {
log.warn("Unable to process category " + category + " because no target node was found");
}
}
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class ZemantaEnhancementEngine method computeEnhancements.
public void computeEnhancements(ContentItem ci) throws EngineException {
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMETYPES);
if (contentPart == null) {
throw new IllegalStateException("No ContentPart with a supported Mime Type" + "found for ContentItem " + ci.getUri() + "(supported: '" + SUPPORTED_MIMETYPES + "') -> this indicates that canEnhance was" + "NOT called and indicates a bug in the used EnhancementJobManager!");
}
String text;
try {
text = ContentItemHelper.getText(contentPart.getValue());
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
if (text.trim().length() == 0) {
log.warn("ContentPart {} of ContentItem {} does not contain any text to enhance", contentPart.getKey(), ci.getUri());
return;
}
Graph graph = ci.getMetadata();
IRI ciId = ci.getUri();
// we need to store the results of Zemanta in an temp graph
Graph results = new SimpleGraph();
ZemantaAPIWrapper zemanta = new ZemantaAPIWrapper(key);
try {
results.addAll(zemanta.enhance(text));
} catch (IOException e) {
throw new EngineException("Unable to get Enhancement from remote Zemanta Service", e);
}
// now we need to process the results and convert them into the Enhancer
// annotation structure
ci.getLock().writeLock().lock();
try {
processRecognition(results, graph, text, ciId);
processCategories(results, graph, ciId);
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class TripleMatcherGroupImpl method getMatchingSubjects.
@Override
public Set<IRI> getMatchingSubjects(ImmutableGraph g) {
if (matchers.isEmpty()) {
return new HashSet<IRI>();
}
// For all matchers, find the set of subjects that match
// and compute the intersection of those sets
Set<IRI> intersection = null;
for (TripleMatcher m : matchers) {
final Set<IRI> s = new HashSet<IRI>();
final Iterator<Triple> it = g.iterator();
while (it.hasNext()) {
final Triple t = it.next();
if (m.matches(t)) {
final BlankNodeOrIRI n = t.getSubject();
if (n instanceof IRI) {
s.add((IRI) n);
} else {
// TODO do we need to handle non-IRI subjects?
}
}
}
if (intersection == null) {
intersection = s;
} else {
intersection.retainAll(s);
}
}
return intersection;
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class JenaAdapter method main.
public static void main(String[] args) {
RuleAdapter ruleAdapter = new JenaAdapter();
try {
KB kb = RuleParserImpl.parse("http://sssw.org/2012/rules/", new FileInputStream("/Users/mac/Documents/CNR/SSSW2012/rules/exercise1"));
System.out.println("Rules: " + kb.getRuleList().size());
Recipe recipe = new RecipeImpl(new IRI("http://sssw.org/2012/rules/"), "Recipe", kb.getRuleList());
List<com.hp.hpl.jena.reasoner.rulesys.Rule> jenaRules = (List<com.hp.hpl.jena.reasoner.rulesys.Rule>) ruleAdapter.adaptTo(recipe, com.hp.hpl.jena.reasoner.rulesys.Rule.class);
String rules = "[ Exercise1: (http://dbpedia.org/resource/Madrid http://dbpedia.org/ontology/locationOf ?location) (?location rdf:type http://dbpedia.org/ontology/Museum) (?location http://dbpedia.org/ontology/numberOfVisitors ?visitors) greaterThan(?visitors '2000000'^^http://www.w3.org/2001/XMLSchema#integer) -> (?location rdf:type http://www.mytravels.com/Itinerary/MadridItinerary) ]";
// List<com.hp.hpl.jena.reasoner.rulesys.Rule> jenaRules = com.hp.hpl.jena.reasoner.rulesys.Rule.parseRules(rules);
for (com.hp.hpl.jena.reasoner.rulesys.Rule jenaRule : jenaRules) {
System.out.println(jenaRule.toString());
}
Model m = ModelFactory.createDefaultModel();
Resource configuration = m.createResource();
configuration.addProperty(ReasonerVocabulary.PROPruleMode, "hybrid");
// Model model = FileManager.get().loadModel("/Users/mac/Documents/workspaceMyStanbol/sssw2012/events.rdf");
Model model = FileManager.get().loadModel("/Users/mac/Documents/CNR/SSSW2012/datasets_new/Exercise1.rdf");
// GenericRuleReasoner reasoner = new GenericRuleReasoner(jenaRules);
// GenericRuleReasoner reasoner = new GenericRuleReasoner(com.hp.hpl.jena.reasoner.rulesys.Rule.parseRules(rules));
GenericRuleReasoner reasoner = new GenericRuleReasoner(jenaRules);
// not needed in RDFS case
reasoner.setOWLTranslation(true);
reasoner.setTransitiveClosureCaching(true);
InfModel infModel = ModelFactory.createInfModel(reasoner, model);
infModel.prepare();
infModel.getDeductionsModel().write(System.out);
// String sparql = "select * where {?s a <http://www.mytravels.com/Itinerary/MovieCityMuseums> }";
// String sparql = "select * where {?s a <http://www.mytravels.com/Itinerary/CityEventItinerary> }";
String sparql = "select * where {?s a <http://www.mytravels.com/Itinerary/MadridItinerary> }";
// String sparql = "select * where {?s a <http://linkedevents.org/ontology/cazzo> }";
// String sparql = "select * where {?s a <http://www.mytravels.com/Itinerary/MovieCityItinerary> }";
Query query = QueryFactory.create(sparql, Syntax.syntaxARQ);
QueryExecution queryExecution = QueryExecutionFactory.create(query, infModel);
com.hp.hpl.jena.query.ResultSet resultSet = queryExecution.execSelect();
ResultSetFormatter.out(System.out, resultSet);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (RuleAtomCallExeption e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (UnavailableRuleObjectException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (UnsupportedTypeForExportException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class IsBlankAtom method adapt.
@SuppressWarnings("unchecked")
@Override
public <T> T adapt(RuleAtom ruleAtom) throws RuleAtomCallExeption, UnavailableRuleObjectException, UnsupportedTypeForExportException {
org.apache.stanbol.rules.manager.atoms.IsBlankAtom tmp = (org.apache.stanbol.rules.manager.atoms.IsBlankAtom) ruleAtom;
IObjectAtom uriResource = tmp.getUriResource();
ClerezzaSparqlObject argumentCSO = (ClerezzaSparqlObject) adapter.adaptTo(uriResource, ConstructQuery.class);
;
Object arg = argumentCSO.getClerezzaObject();
Expression argumentExpression;
if (arg instanceof Variable) {
argumentExpression = (Variable) arg;
} else if (arg instanceof IRI) {
argumentExpression = new UriRefExpression((IRI) arg);
} else {
throw new RuleAtomCallExeption(getClass());
}
List<Expression> expressions = new ArrayList<Expression>();
expressions.add(argumentExpression);
return (T) new ClerezzaSparqlObject(new BuiltInCall("isBLANK", expressions));
}
Aggregations