use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.
the class ContentItemBackendTest method testTextAnnotationFunctionWithoutParsedContext.
@Test
public void testTextAnnotationFunctionWithoutParsedContext() throws LDPathParseException {
String path = "fn:textAnnotation()/fise:selected-text";
Collection<RDFTerm> result = ldpath.pathQuery(ci.getUri(), path, null);
assertNotNull(result);
assertFalse(result.isEmpty());
assertTrue(result.size() == 2);
Set<String> expectedValues = new HashSet<String>(Arrays.asList("Bob Marley", "Paris"));
for (RDFTerm r : result) {
assertTrue(r instanceof Literal);
assertTrue(expectedValues.remove(((Literal) r).getLexicalForm()));
}
assertTrue(expectedValues.isEmpty());
//test with a filter for the type
//same as the 1st example bat rather using an ld-path construct for
//filtering for TextAnnotations representing persons
path = "fn:textAnnotation()[dc:type is dbpedia-ont:Person]/fise:selected-text";
result = ldpath.pathQuery(ci.getUri(), path, null);
assertNotNull(result);
assertFalse(result.isEmpty());
assertTrue(result.size() == 1);
RDFTerm r = result.iterator().next();
assertTrue(r instanceof Literal);
assertEquals(((Literal) r).getLexicalForm(), "Bob Marley");
}
use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.
the class OpenCalaisEngine method queryModel.
/**
* Extracts the relevant entity information from the Calais RDF data.
* The entities and the relted information is extracted by a Sparql query.
*
* @param model the Graph representing the Calais data
*
* @return a Collection of entity information
* @throws EngineException on a {@link ParseException} while processing the
* Sparql query.
*/
public Collection<CalaisEntityOccurrence> queryModel(Graph model) throws EngineException {
//TODO extract also Geo info (latitude/longitude)?
String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " + "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> " + "PREFIX p: <http://s.opencalais.com/1/pred/> " + "PREFIX t: <http://s.opencalais.com/1/type/em/e/> " + "SELECT DISTINCT ?id ?did ?name ?type ?dtype ?offset ?length ?exact ?context ?score WHERE { " + "?id p:name ?name ." + "?id rdf:type ?type ." + "?y p:subject ?id ." + "?y p:offset ?offset ." + "?y p:length ?length ." + "?y p:exact ?exact ." + "?y p:detection ?context ." + " OPTIONAL { ?z p:subject ?id . ?z p:relevance ?score . } " + // get disambiguated entity references if available
" OPTIONAL { ?did p:subject ?id . ?did p:name ?name . ?did rdf:type ?dtype . } " + "FILTER (" + "?type = t:Person || " + "?type = t:City || " + "?type = t:Continent || " + "?type = t:Country || " + "?type = t:ProvinceOrState || " + "?type = t:Region || " + "?type = t:Company || " + "?type = t:Facility || " + "?type = t:Organization " + ")" + "} ";
Collection<CalaisEntityOccurrence> result = new ArrayList<CalaisEntityOccurrence>();
try {
SelectQuery sQuery = (SelectQuery) QueryParser.getInstance().parse(query);
ResultSet rs = tcManager.executeSparqlQuery(sQuery, model);
while (rs.hasNext()) {
SolutionMapping row = rs.next();
CalaisEntityOccurrence occ = new CalaisEntityOccurrence();
RDFTerm disambiguated = row.get("did");
occ.id = (disambiguated == null ? row.get("id") : disambiguated);
if (onlyNERMode) {
occ.type = row.get("type");
} else {
occ.type = (disambiguated == null ? row.get("type") : row.get("dtype"));
}
if (calaisTypeMap != null) {
IRI mappedType = calaisTypeMap.get(occ.type);
if (mappedType != null) {
occ.type = mappedType;
}
}
occ.name = ((Literal) row.get("name")).getLexicalForm();
occ.exact = ((Literal) row.get("exact")).getLexicalForm();
//TODO for html the offsets might not be those of the original document but refer to a cleaned up version?
occ.offset = Integer.valueOf(((Literal) row.get("offset")).getLexicalForm());
// remove brackets
occ.context = ((Literal) row.get("context")).getLexicalForm().replaceAll("[\\[\\]]", "");
occ.length = Integer.valueOf(((Literal) row.get("length")).getLexicalForm());
if (row.get("score") != null) {
occ.relevance = Double.valueOf(((Literal) row.get("score")).getLexicalForm());
}
result.add(occ);
}
} catch (ParseException e) {
throw new EngineException("Unable to parse SPARQL query for processing OpenCalais results", e);
}
log.info("Found {} occurences", result.size());
return result;
}
use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.
the class AbstractOntologyCollectorImpl method exportToGraph.
/**
* This method has no conversion calls, to it can be invoked by subclasses that wish to modify it
* afterwards.
*
* @param merge
* @return
*/
protected Graph exportToGraph(boolean merge, org.semanticweb.owlapi.model.IRI prefix) {
// if (merge) throw new UnsupportedOperationException(
// "Merge not implemented yet for Clerezza triple collections.");
long before = System.currentTimeMillis();
// No need to store, give it a name, or anything.
Graph root = new SimpleGraph();
IRI iri = new IRI(prefix + _id);
// Add the import declarations for directly managed ontologies.
if (root != null) {
// Set the ontology ID
root.add(new TripleImpl(iri, RDF.type, OWL.Ontology));
if (merge) {
log.warn("Merging of Clerezza triple collections is only implemented one level down. Import statements will be preserved for further levels.");
Iterator<Triple> it;
Set<RDFTerm> importTargets = new HashSet<RDFTerm>();
for (OWLOntologyID ontologyId : managedOntologies) {
ImmutableGraph g = getOntology(ontologyId, ImmutableGraph.class, false);
root.addAll(g);
it = g.filter(null, OWL.imports, null);
while (it.hasNext()) {
org.semanticweb.owlapi.model.IRI tgt;
RDFTerm r = it.next().getObject();
try {
if (r instanceof IRI)
tgt = org.semanticweb.owlapi.model.IRI.create(((IRI) r).getUnicodeString());
else if (r instanceof Literal)
tgt = org.semanticweb.owlapi.model.IRI.create(((Literal) r).getLexicalForm());
else
tgt = org.semanticweb.owlapi.model.IRI.create(r.toString());
tgt = URIUtils.sanitize(tgt);
importTargets.add(new IRI(tgt.toString()));
} catch (Exception ex) {
log.error("FAILED to obtain import target from resource {}", r);
continue;
}
}
it = g.filter(null, RDF.type, OWL.Ontology);
while (it.hasNext()) {
BlankNodeOrIRI ontology = it.next().getSubject();
log.debug("Removing all triples related to {} from {}", ontology, iri);
Iterator<Triple> it2 = g.filter(ontology, null, null);
while (it2.hasNext()) root.remove(it2.next());
}
/*
* Reinstate import statements, though. If imported ontologies were not merged earlier, we
* are not doing it now anyway.
*/
for (RDFTerm target : importTargets) root.add(new TripleImpl(iri, OWL.imports, target));
}
} else {
String base = prefix + getID();
for (int i = 0; i < backwardPathLength; i++) base = URIUtils.upOne(URI.create(base)).toString();
base += "/";
// The key set of managedOntologies contains the ontology IRIs, not their storage keys.
for (OWLOntologyID ontologyId : managedOntologies) {
org.semanticweb.owlapi.model.IRI physIRI = // .create(base + ontologyId.getVersionIRI()));
org.semanticweb.owlapi.model.IRI.create(base + OntologyUtils.encode(ontologyId));
root.add(new TripleImpl(iri, OWL.imports, new IRI(physIRI.toString())));
}
}
log.debug("Clerezza export of {} completed in {} ms.", getID(), System.currentTimeMillis() - before);
}
return root;
}
use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.
the class MultiThreadedTestBase method createRdfDataIterator.
/**
* Iterator implementation that parses an RDF graph from the parsed
* {@link InputStream}. The RDF data are loaded in-memory. Because of this
* only test data that fit in-memory can be used. <p>
* Literal values (objects) of the {@link #PROPERTY_TEST_DATA_PROPERTY} are
* used as data. If this property is not present {@link #DEFAULT_TEST_DATA_PROPERTY}
* is used. If {@link #PROPERTY_TEST_DATA_PROPERTY} is set to '*' than all
* Triples with Literal values are used.<p>
* This supports all RDF-formats supported by the {@link JenaParserProvider} and
* {@link RdfJsonParsingProvider}. The charset is expected to be UTF-8.
* @param is the input stream providing the RDF test data.
* @param mediaType the Media-Type of the stream. MUST BE supported by
* the Apache Clerezza RDF parsers.
*/
private Iterator<String> createRdfDataIterator(InputStream is, String mediaType, final String propertyString) {
final SimpleGraph graph = new SimpleGraph();
try {
rdfParser.parse(graph, is, mediaType);
} catch (UnsupportedFormatException e) {
Assert.fail("The MimeType '" + mediaType + "' of the parsed testData " + "is not supported. This utility supports plain text files as " + "as well as the RDF formats " + rdfParser.getSupportedFormats() + "If your test data uses one of those formats but it was not " + "correctly detected you can use the System property '" + PROPERTY_TEST_DATA_TYPE + "' to manually parse the Media-Type!");
}
IOUtils.closeQuietly(is);
return new Iterator<String>() {
Iterator<Triple> it = null;
String next = null;
private String getNext() {
if (it == null) {
IRI property;
if ("*".equals(propertyString)) {
//wildcard
property = null;
log.info("Iterate over values of all Triples");
} else {
property = new IRI(NamespaceMappingUtils.getConfiguredUri(nsPrefixService, propertyString));
log.info("Iterate over values of property {}", property);
}
it = graph.filter(null, property, null);
}
while (it.hasNext()) {
RDFTerm value = it.next().getObject();
if (value instanceof Literal) {
return ((Literal) value).getLexicalForm();
}
}
//no more data
return null;
}
@Override
public boolean hasNext() {
if (next == null) {
next = getNext();
}
return next != null;
}
@Override
public String next() {
if (next == null) {
next = getNext();
}
if (next == null) {
throw new NoSuchElementException("No further testData available");
} else {
String elem = next;
next = null;
return elem;
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.
the class IndexedGraphTest method createGraph.
private static void createGraph(Collection<Triple> tc, int triples, Long seed) {
Random rnd = new Random();
if (seed != null) {
rnd.setSeed(seed);
}
LiteralFactory lf = LiteralFactory.getInstance();
//randoms are in the range [0..3]
//literal
double l = 1.0;
//int
double i = l / 3;
//double
double d = l * 2 / 3;
//bNode
double b = 2.0;
//create new bNode
double nb = b - (l * 2 / 3);
double random;
BlankNodeOrIRI subject = null;
IRI predicate = null;
List<IRI> predicateList = new ArrayList<IRI>();
predicateList.add(RDF.first);
predicateList.add(RDF.rest);
predicateList.add(RDF.type);
predicateList.add(RDFS.label);
predicateList.add(RDFS.comment);
predicateList.add(RDFS.range);
predicateList.add(RDFS.domain);
predicateList.add(FOAF.name);
predicateList.add(FOAF.nick);
predicateList.add(FOAF.homepage);
predicateList.add(FOAF.age);
predicateList.add(FOAF.depiction);
String URI_PREFIX = "http://www.test.org/bigGraph/ref";
Language DE = new Language("de");
Language EN = new Language("en");
Iterator<IRI> predicates = predicateList.iterator();
List<BlankNode> bNodes = new ArrayList<BlankNode>();
bNodes.add(new BlankNode());
for (int count = 0; tc.size() < triples; count++) {
random = rnd.nextDouble() * 3;
if (random >= 2.5 || count == 0) {
if (random <= 2.75) {
subject = new IRI(URI_PREFIX + count);
} else {
int rndIndex = (int) ((random - 2.75) * bNodes.size() / (3.0 - 2.75));
subject = bNodes.get(rndIndex);
}
}
if (random > 2.0 || count == 0) {
if (!predicates.hasNext()) {
Collections.shuffle(predicateList, rnd);
predicates = predicateList.iterator();
}
predicate = predicates.next();
}
if (random <= l) {
//literal
if (random <= i) {
tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(count)));
} else if (random <= d) {
tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(random)));
} else {
Literal text;
if (random <= i) {
text = new PlainLiteralImpl("Literal for " + count);
} else if (random <= d) {
text = new PlainLiteralImpl("An English literal for " + count, EN);
} else {
text = new PlainLiteralImpl("Ein Deutsches Literal für " + count, DE);
}
tc.add(new TripleImpl(subject, predicate, text));
}
} else if (random <= b) {
//bnode
BlankNode bnode;
if (random <= nb) {
bnode = new BlankNode();
bNodes.add(bnode);
} else {
//>nb <b
int rndIndex = (int) ((random - nb) * bNodes.size() / (b - nb));
bnode = bNodes.get(rndIndex);
}
tc.add(new TripleImpl(subject, predicate, bnode));
} else {
//IRI
tc.add(new TripleImpl(subject, predicate, new IRI(URI_PREFIX + count * random)));
}
}
}
Aggregations