Search in sources :

Example 6 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class ContentItemBackendTest method testTextAnnotationFunctionWithoutParsedContext.

@Test
public void testTextAnnotationFunctionWithoutParsedContext() throws LDPathParseException {
    String path = "fn:textAnnotation()/fise:selected-text";
    Collection<RDFTerm> result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 2);
    Set<String> expectedValues = new HashSet<String>(Arrays.asList("Bob Marley", "Paris"));
    for (RDFTerm r : result) {
        assertTrue(r instanceof Literal);
        assertTrue(expectedValues.remove(((Literal) r).getLexicalForm()));
    }
    assertTrue(expectedValues.isEmpty());
    //test with a filter for the type
    //same as the 1st example bat rather using an ld-path construct for
    //filtering for TextAnnotations representing persons
    path = "fn:textAnnotation()[dc:type is dbpedia-ont:Person]/fise:selected-text";
    result = ldpath.pathQuery(ci.getUri(), path, null);
    assertNotNull(result);
    assertFalse(result.isEmpty());
    assertTrue(result.size() == 1);
    RDFTerm r = result.iterator().next();
    assertTrue(r instanceof Literal);
    assertEquals(((Literal) r).getLexicalForm(), "Bob Marley");
}
Also used : Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 7 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class OpenCalaisEngine method queryModel.

/**
     * Extracts the relevant entity information from the Calais RDF data.
     * The entities and the relted information is extracted by a Sparql query.
     *
     * @param model the Graph representing the Calais data
     *
     * @return a Collection of entity information
     * @throws EngineException on a {@link ParseException} while processing the
     * Sparql query.
     */
public Collection<CalaisEntityOccurrence> queryModel(Graph model) throws EngineException {
    //TODO extract also Geo info (latitude/longitude)?
    String query = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> " + "PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> " + "PREFIX p: <http://s.opencalais.com/1/pred/> " + "PREFIX t: <http://s.opencalais.com/1/type/em/e/> " + "SELECT DISTINCT ?id ?did ?name ?type ?dtype ?offset ?length ?exact ?context ?score WHERE { " + "?id p:name ?name ." + "?id rdf:type ?type ." + "?y p:subject ?id ." + "?y p:offset ?offset ." + "?y p:length ?length ." + "?y p:exact ?exact ." + "?y p:detection ?context ." + " OPTIONAL { ?z p:subject ?id . ?z p:relevance ?score . } " + // get disambiguated entity references if available
    " OPTIONAL { ?did p:subject ?id . ?did p:name ?name . ?did rdf:type ?dtype . } " + "FILTER (" + "?type = t:Person || " + "?type = t:City || " + "?type = t:Continent || " + "?type = t:Country || " + "?type = t:ProvinceOrState || " + "?type = t:Region || " + "?type = t:Company || " + "?type = t:Facility || " + "?type = t:Organization " + ")" + "} ";
    Collection<CalaisEntityOccurrence> result = new ArrayList<CalaisEntityOccurrence>();
    try {
        SelectQuery sQuery = (SelectQuery) QueryParser.getInstance().parse(query);
        ResultSet rs = tcManager.executeSparqlQuery(sQuery, model);
        while (rs.hasNext()) {
            SolutionMapping row = rs.next();
            CalaisEntityOccurrence occ = new CalaisEntityOccurrence();
            RDFTerm disambiguated = row.get("did");
            occ.id = (disambiguated == null ? row.get("id") : disambiguated);
            if (onlyNERMode) {
                occ.type = row.get("type");
            } else {
                occ.type = (disambiguated == null ? row.get("type") : row.get("dtype"));
            }
            if (calaisTypeMap != null) {
                IRI mappedType = calaisTypeMap.get(occ.type);
                if (mappedType != null) {
                    occ.type = mappedType;
                }
            }
            occ.name = ((Literal) row.get("name")).getLexicalForm();
            occ.exact = ((Literal) row.get("exact")).getLexicalForm();
            //TODO for html the offsets might not be those of the original document but refer to a cleaned up version?
            occ.offset = Integer.valueOf(((Literal) row.get("offset")).getLexicalForm());
            // remove brackets
            occ.context = ((Literal) row.get("context")).getLexicalForm().replaceAll("[\\[\\]]", "");
            occ.length = Integer.valueOf(((Literal) row.get("length")).getLexicalForm());
            if (row.get("score") != null) {
                occ.relevance = Double.valueOf(((Literal) row.get("score")).getLexicalForm());
            }
            result.add(occ);
        }
    } catch (ParseException e) {
        throw new EngineException("Unable to parse SPARQL query for processing OpenCalais results", e);
    }
    log.info("Found {} occurences", result.size());
    return result;
}
Also used : SelectQuery(org.apache.clerezza.rdf.core.sparql.query.SelectQuery) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) SolutionMapping(org.apache.clerezza.rdf.core.sparql.SolutionMapping) Literal(org.apache.clerezza.commons.rdf.Literal) ArrayList(java.util.ArrayList) ResultSet(org.apache.clerezza.rdf.core.sparql.ResultSet) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ParseException(org.apache.clerezza.rdf.core.sparql.ParseException)

Example 8 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class AbstractOntologyCollectorImpl method exportToGraph.

/**
     * This method has no conversion calls, to it can be invoked by subclasses that wish to modify it
     * afterwards.
     * 
     * @param merge
     * @return
     */
protected Graph exportToGraph(boolean merge, org.semanticweb.owlapi.model.IRI prefix) {
    // if (merge) throw new UnsupportedOperationException(
    // "Merge not implemented yet for Clerezza triple collections.");
    long before = System.currentTimeMillis();
    // No need to store, give it a name, or anything.
    Graph root = new SimpleGraph();
    IRI iri = new IRI(prefix + _id);
    // Add the import declarations for directly managed ontologies.
    if (root != null) {
        // Set the ontology ID
        root.add(new TripleImpl(iri, RDF.type, OWL.Ontology));
        if (merge) {
            log.warn("Merging of Clerezza triple collections is only implemented one level down. Import statements will be preserved for further levels.");
            Iterator<Triple> it;
            Set<RDFTerm> importTargets = new HashSet<RDFTerm>();
            for (OWLOntologyID ontologyId : managedOntologies) {
                ImmutableGraph g = getOntology(ontologyId, ImmutableGraph.class, false);
                root.addAll(g);
                it = g.filter(null, OWL.imports, null);
                while (it.hasNext()) {
                    org.semanticweb.owlapi.model.IRI tgt;
                    RDFTerm r = it.next().getObject();
                    try {
                        if (r instanceof IRI)
                            tgt = org.semanticweb.owlapi.model.IRI.create(((IRI) r).getUnicodeString());
                        else if (r instanceof Literal)
                            tgt = org.semanticweb.owlapi.model.IRI.create(((Literal) r).getLexicalForm());
                        else
                            tgt = org.semanticweb.owlapi.model.IRI.create(r.toString());
                        tgt = URIUtils.sanitize(tgt);
                        importTargets.add(new IRI(tgt.toString()));
                    } catch (Exception ex) {
                        log.error("FAILED to obtain import target from resource {}", r);
                        continue;
                    }
                }
                it = g.filter(null, RDF.type, OWL.Ontology);
                while (it.hasNext()) {
                    BlankNodeOrIRI ontology = it.next().getSubject();
                    log.debug("Removing all triples related to {} from {}", ontology, iri);
                    Iterator<Triple> it2 = g.filter(ontology, null, null);
                    while (it2.hasNext()) root.remove(it2.next());
                }
                /*
                     * Reinstate import statements, though. If imported ontologies were not merged earlier, we
                     * are not doing it now anyway.
                     */
                for (RDFTerm target : importTargets) root.add(new TripleImpl(iri, OWL.imports, target));
            }
        } else {
            String base = prefix + getID();
            for (int i = 0; i < backwardPathLength; i++) base = URIUtils.upOne(URI.create(base)).toString();
            base += "/";
            // The key set of managedOntologies contains the ontology IRIs, not their storage keys.
            for (OWLOntologyID ontologyId : managedOntologies) {
                org.semanticweb.owlapi.model.IRI physIRI = // .create(base + ontologyId.getVersionIRI()));
                org.semanticweb.owlapi.model.IRI.create(base + OntologyUtils.encode(ontologyId));
                root.add(new TripleImpl(iri, OWL.imports, new IRI(physIRI.toString())));
            }
        }
        log.debug("Clerezza export of {} completed in {} ms.", getID(), System.currentTimeMillis() - before);
    }
    return root;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) OWLOntologyCreationException(org.semanticweb.owlapi.model.OWLOntologyCreationException) MissingOntologyException(org.apache.stanbol.ontologymanager.servicesapi.collector.MissingOntologyException) OWLOntologyAlreadyExistsException(org.semanticweb.owlapi.model.OWLOntologyAlreadyExistsException) UnmodifiableOntologyCollectorException(org.apache.stanbol.ontologymanager.servicesapi.collector.UnmodifiableOntologyCollectorException) OntologyCollectorModificationException(org.apache.stanbol.ontologymanager.servicesapi.collector.OntologyCollectorModificationException) Triple(org.apache.clerezza.commons.rdf.Triple) ImmutableGraph(org.apache.clerezza.commons.rdf.ImmutableGraph) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) OWLOntologyID(org.semanticweb.owlapi.model.OWLOntologyID) Literal(org.apache.clerezza.commons.rdf.Literal) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ImmutableGraph(org.apache.clerezza.commons.rdf.ImmutableGraph) HashSet(java.util.HashSet)

Example 9 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class MultiThreadedTestBase method createRdfDataIterator.

/**
     * Iterator implementation that parses an RDF graph from the parsed
     * {@link InputStream}. The RDF data are loaded in-memory. Because of this
     * only test data that fit in-memory can be used. <p>
     * Literal values (objects) of the {@link #PROPERTY_TEST_DATA_PROPERTY} are
     * used as data. If this property is not present {@link #DEFAULT_TEST_DATA_PROPERTY}
     * is used. If {@link #PROPERTY_TEST_DATA_PROPERTY} is set to '*' than all
     * Triples with Literal values are used.<p>
     * This supports all RDF-formats supported by the {@link JenaParserProvider} and
     * {@link RdfJsonParsingProvider}. The charset is expected to be UTF-8.
     * @param is the input stream providing the RDF test data.
     * @param mediaType the Media-Type of the stream. MUST BE supported by
     * the Apache Clerezza RDF parsers.
     */
private Iterator<String> createRdfDataIterator(InputStream is, String mediaType, final String propertyString) {
    final SimpleGraph graph = new SimpleGraph();
    try {
        rdfParser.parse(graph, is, mediaType);
    } catch (UnsupportedFormatException e) {
        Assert.fail("The MimeType '" + mediaType + "' of the parsed testData " + "is not supported. This utility supports plain text files as " + "as well as the RDF formats " + rdfParser.getSupportedFormats() + "If your test data uses one of those formats but it was not " + "correctly detected you can use the System property '" + PROPERTY_TEST_DATA_TYPE + "' to manually parse the Media-Type!");
    }
    IOUtils.closeQuietly(is);
    return new Iterator<String>() {

        Iterator<Triple> it = null;

        String next = null;

        private String getNext() {
            if (it == null) {
                IRI property;
                if ("*".equals(propertyString)) {
                    //wildcard
                    property = null;
                    log.info("Iterate over values of all Triples");
                } else {
                    property = new IRI(NamespaceMappingUtils.getConfiguredUri(nsPrefixService, propertyString));
                    log.info("Iterate over values of property {}", property);
                }
                it = graph.filter(null, property, null);
            }
            while (it.hasNext()) {
                RDFTerm value = it.next().getObject();
                if (value instanceof Literal) {
                    return ((Literal) value).getLexicalForm();
                }
            }
            //no more data
            return null;
        }

        @Override
        public boolean hasNext() {
            if (next == null) {
                next = getNext();
            }
            return next != null;
        }

        @Override
        public String next() {
            if (next == null) {
                next = getNext();
            }
            if (next == null) {
                throw new NoSuchElementException("No further testData available");
            } else {
                String elem = next;
                next = null;
                return elem;
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    };
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) UnsupportedFormatException(org.apache.clerezza.rdf.core.serializedform.UnsupportedFormatException) Literal(org.apache.clerezza.commons.rdf.Literal) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Iterator(java.util.Iterator) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) NoSuchElementException(java.util.NoSuchElementException)

Example 10 with Literal

use of org.apache.clerezza.commons.rdf.Literal in project stanbol by apache.

the class IndexedGraphTest method createGraph.

private static void createGraph(Collection<Triple> tc, int triples, Long seed) {
    Random rnd = new Random();
    if (seed != null) {
        rnd.setSeed(seed);
    }
    LiteralFactory lf = LiteralFactory.getInstance();
    //randoms are in the range [0..3]
    //literal
    double l = 1.0;
    //int
    double i = l / 3;
    //double
    double d = l * 2 / 3;
    //bNode
    double b = 2.0;
    //create new bNode
    double nb = b - (l * 2 / 3);
    double random;
    BlankNodeOrIRI subject = null;
    IRI predicate = null;
    List<IRI> predicateList = new ArrayList<IRI>();
    predicateList.add(RDF.first);
    predicateList.add(RDF.rest);
    predicateList.add(RDF.type);
    predicateList.add(RDFS.label);
    predicateList.add(RDFS.comment);
    predicateList.add(RDFS.range);
    predicateList.add(RDFS.domain);
    predicateList.add(FOAF.name);
    predicateList.add(FOAF.nick);
    predicateList.add(FOAF.homepage);
    predicateList.add(FOAF.age);
    predicateList.add(FOAF.depiction);
    String URI_PREFIX = "http://www.test.org/bigGraph/ref";
    Language DE = new Language("de");
    Language EN = new Language("en");
    Iterator<IRI> predicates = predicateList.iterator();
    List<BlankNode> bNodes = new ArrayList<BlankNode>();
    bNodes.add(new BlankNode());
    for (int count = 0; tc.size() < triples; count++) {
        random = rnd.nextDouble() * 3;
        if (random >= 2.5 || count == 0) {
            if (random <= 2.75) {
                subject = new IRI(URI_PREFIX + count);
            } else {
                int rndIndex = (int) ((random - 2.75) * bNodes.size() / (3.0 - 2.75));
                subject = bNodes.get(rndIndex);
            }
        }
        if (random > 2.0 || count == 0) {
            if (!predicates.hasNext()) {
                Collections.shuffle(predicateList, rnd);
                predicates = predicateList.iterator();
            }
            predicate = predicates.next();
        }
        if (random <= l) {
            //literal
            if (random <= i) {
                tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(count)));
            } else if (random <= d) {
                tc.add(new TripleImpl(subject, predicate, lf.createTypedLiteral(random)));
            } else {
                Literal text;
                if (random <= i) {
                    text = new PlainLiteralImpl("Literal for " + count);
                } else if (random <= d) {
                    text = new PlainLiteralImpl("An English literal for " + count, EN);
                } else {
                    text = new PlainLiteralImpl("Ein Deutsches Literal für " + count, DE);
                }
                tc.add(new TripleImpl(subject, predicate, text));
            }
        } else if (random <= b) {
            //bnode
            BlankNode bnode;
            if (random <= nb) {
                bnode = new BlankNode();
                bNodes.add(bnode);
            } else {
                //>nb <b
                int rndIndex = (int) ((random - nb) * bNodes.size() / (b - nb));
                bnode = bNodes.get(rndIndex);
            }
            tc.add(new TripleImpl(subject, predicate, bnode));
        } else {
            //IRI
            tc.add(new TripleImpl(subject, predicate, new IRI(URI_PREFIX + count * random)));
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) ArrayList(java.util.ArrayList) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) Random(java.util.Random) Language(org.apache.clerezza.commons.rdf.Language) Literal(org.apache.clerezza.commons.rdf.Literal) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Aggregations

Literal (org.apache.clerezza.commons.rdf.Literal)71 IRI (org.apache.clerezza.commons.rdf.IRI)35 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)35 Triple (org.apache.clerezza.commons.rdf.Triple)30 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)22 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)20 ArrayList (java.util.ArrayList)16 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)16 Language (org.apache.clerezza.commons.rdf.Language)12 Graph (org.apache.clerezza.commons.rdf.Graph)11 Test (org.junit.Test)10 HashSet (java.util.HashSet)9 Date (java.util.Date)8 Lock (java.util.concurrent.locks.Lock)6 Entity (org.apache.stanbol.enhancer.engines.entitylinking.Entity)5 HashMap (java.util.HashMap)4 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)4 NoConvertorException (org.apache.clerezza.rdf.core.NoConvertorException)4 Representation (org.apache.stanbol.entityhub.servicesapi.model.Representation)4 Collection (java.util.Collection)3