Search in sources :

Example 21 with Query

use of org.apache.jena.query.Query in project legato by DOREMUS-ANR.

the class DocumentBuilder method getDocuments.

/**
 **********************************************************
 * Build documents for resources based on selected properties
 ***********************************************************
 */
public static HashMap<String, String> getDocuments(String pathFile, List<String> classResources, List<String> selectedProp, String dataset) throws Exception {
    LEGATO legato = LEGATO.getInstance();
    /**
     **
     * Load RDF model from the dataset
     ***
     */
    File f = new File(pathFile);
    Model modelSource = ModelManager.loadModel(pathFile);
    // 1st String = the docName. 2d String = its content
    HashMap<String, String> documents = new HashMap<String, String>();
    /**
     **
     * Documents creation based on the selected properties for each resource
     ***
     */
    for (Resource resource : CBDBuilder.getResources(modelSource, classResources)) {
        Model model = ModelFactory.createDefaultModel();
        String sparqlQueryString = "SELECT DISTINCT ?p ?o {<" + resource + "> ?p ?o }";
        Query query = QueryFactory.create(sparqlQueryString);
        QueryExecution qexec = QueryExecutionFactory.create(query, modelSource);
        ResultSet queryResults = qexec.execSelect();
        while (queryResults.hasNext()) {
            QuerySolution qs = queryResults.nextSolution();
            Resource prop = qs.getResource("?p");
            if (selectedProp.contains(prop.toString())) {
                model.createResource(resource).addProperty(model.createProperty(prop.toString()), qs.get("?o").toString());
            }
        }
        qexec.close();
        String docName = generateUUID(resource.getURI());
        /**
         ***
         * Preprocessing before documents creation
         ****
         */
        String docContent = StopWords.clean(CBDBuilder.getLiterals(model));
        // docContent = Stemmer.stem(docContent);
        if (!docContent.equals("") && !docContent.equals(null) && !docContent.equals("\n") && !docContent.equals(" ")) {
            if (dataset.equals("source"))
                legato.setSrcUri(docName, resource.getURI());
            else if (dataset.equals("target"))
                legato.setTgtUri(docName, resource.getURI());
            // Construct a document for each resource
            documents.put(docName, docContent);
            FileManager.create(docName, docContent, dataset);
        }
    }
    return documents;
}
Also used : LEGATO(legato.LEGATO) Query(org.apache.jena.query.Query) HashMap(java.util.HashMap) QuerySolution(org.apache.jena.query.QuerySolution) Model(org.apache.jena.rdf.model.Model) Resource(org.apache.jena.rdf.model.Resource) ResultSet(org.apache.jena.query.ResultSet) File(java.io.File) QueryExecution(org.apache.jena.query.QueryExecution)

Example 22 with Query

use of org.apache.jena.query.Query in project rdf2neo by Rothamsted.

the class RdfDataManager method processSparql.

/**
 * Process a SPARQL query, by running it against our RDF source and passing each {@link QuerySolution} to
 * the action parameter. Works out operations like getting the proper handler from TDB query or
 * caching the SPARQL queries.
 *
 * @param logPrefix operation name, used for logging.
 */
public long processSparql(String logPrefix, String sparql, Consumer<QuerySolution> action) {
    if (sparql == null) {
        log.debug("null SPARQL for {}, skipping", logPrefix);
        return 0;
    }
    ensureOpen();
    Dataset ds = this.dataSet;
    Model model = ds.getDefaultModel();
    Query query = queryCache.getUnchecked(sparql);
    long[] ctr = { 0L };
    Txn.executeRead(ds, () -> {
        QueryExecution qx = QueryExecutionFactory.create(query, model);
        qx.execSelect().forEachRemaining(row -> {
            action.accept(row);
            if (++ctr[0] % 100000 == 0)
                log.info("{}: {} SPARQL tuples read from RDF", logPrefix, ctr[0]);
        });
    });
    return ctr[0];
}
Also used : Query(org.apache.jena.query.Query) Dataset(org.apache.jena.query.Dataset) Model(org.apache.jena.rdf.model.Model) QueryExecution(org.apache.jena.query.QueryExecution)

Example 23 with Query

use of org.apache.jena.query.Query in project rdf2neo by Rothamsted.

the class RdfDataManager method addCypherProps.

/**
 * Take an existing {@link CypherEntity} and adds the properties that can be mapped from the underlining TDB by means
 * of a property query, like {@link CyNodeLoadingHandler#getNodePropsSparql()}, or
 * {@link CyRelationLoadingHandler#getRelationPropsSparql()}.
 *
 * It doesn't do anything if the query is null.
 */
protected void addCypherProps(CypherEntity cyEnt, String propsSparql) {
    ensureOpen();
    Model model = this.dataSet.getDefaultModel();
    QuerySolutionMap params = new QuerySolutionMap();
    params.add("iri", model.getResource(cyEnt.getIri()));
    // It may be omitted, if you don't have any property except the IRI.
    if (propsSparql == null)
        return;
    Query qry = this.queryCache.getUnchecked(propsSparql);
    Function<String, String> propIdConverter = this.getCyPropertyIdConverter();
    boolean wasInTnx = dataSet.isInTransaction();
    if (!wasInTnx)
        dataSet.begin(ReadWrite.READ);
    try {
        QueryExecution qx = QueryExecutionFactory.create(qry, model, params);
        qx.execSelect().forEachRemaining(row -> {
            String propName = this.getCypherId(row.get("name"), propIdConverter);
            if (propName == null)
                throw new IllegalArgumentException("Null property name for " + cyEnt.getIri());
            String propValue = JENAUTILS.literal2Value(row.getLiteral("value")).get();
            cyEnt.addPropValue(propName, propValue);
        });
    } finally {
        if (!wasInTnx && dataSet.isInTransaction())
            dataSet.end();
    }
}
Also used : Query(org.apache.jena.query.Query) Model(org.apache.jena.rdf.model.Model) QueryExecution(org.apache.jena.query.QueryExecution) QuerySolutionMap(org.apache.jena.query.QuerySolutionMap)

Example 24 with Query

use of org.apache.jena.query.Query in project jena by apache.

the class Service method exec.

public static QueryIterator exec(OpService op, Context context) {
    if (context == null)
        context = emptyContext;
    if (context != null && context.isFalse(httpServiceAllowed))
        throw new QueryExecException("SERVICE execution disabled");
    if (!op.getService().isURI())
        throw new QueryExecException("Service URI not bound: " + op.getService());
    boolean silent = op.getSilent();
    // [QExec] Add getSubOpUnmodified();
    if (!op.getService().isURI())
        throw new QueryExecException("Service URI not bound: " + op.getService());
    String serviceURL = op.getService().getURI();
    Op opRemote = op.getSubOp();
    Query query;
    if (false) {
        // ***** Interacts with substitution.
        Element el = op.getServiceElement().getElement();
        if (el instanceof ElementSubQuery)
            query = ((ElementSubQuery) el).getQuery();
        else {
            query = QueryFactory.create();
            query.setQueryPattern(el);
            query.setResultVars();
        }
    }
    // This relies on the observation that the query was originally correct,
    // so reversing the scope renaming is safe (it merely restores the
    // algebra expression).
    // 
    // Any variables that reappear should be internal ones that were hidden
    // by renaming in the first place.
    // 
    // Any substitution is also safe because it replaces variables by
    // values.
    // 
    // It is safer to rename/unrename than skipping SERVICE during rename
    // to avoid substituting hidden variables.
    Op opRestored = Rename.reverseVarRename(opRemote, true);
    query = OpAsQuery.asQuery(opRestored);
    // Transforming: Same object means "no change"
    boolean requiresRemapping = false;
    Map<Var, Var> varMapping = null;
    if (!opRestored.equals(opRemote)) {
        varMapping = new HashMap<>();
        Set<Var> originalVars = OpVars.visibleVars(op);
        Set<Var> remoteVars = OpVars.visibleVars(opRestored);
        for (Var v : originalVars) {
            if (v.getName().contains("/")) {
                // A variable which was scope renamed so has a different name
                String origName = v.getName().substring(v.getName().lastIndexOf('/') + 1);
                Var remoteVar = Var.alloc(origName);
                if (remoteVars.contains(remoteVar)) {
                    varMapping.put(remoteVar, v);
                    requiresRemapping = true;
                }
            } else {
                // A variable which does not have a different name
                if (remoteVars.contains(v))
                    varMapping.put(v, v);
            }
        }
    }
    // -- Setup
    // boolean withCompression = context.isTrueOrUndef(httpQueryCompression);
    long timeoutMillis = timeoutFromContext(context);
    // RegistryServiceModifier is applied by QueryExecHTTP
    Params serviceParams = getServiceParamsFromContext(serviceURL, context);
    HttpClient httpClient = chooseHttpClient(serviceURL, context);
    QuerySendMode querySendMode = chooseQuerySendMode(serviceURL, context, QuerySendMode.asGetWithLimitBody);
    // -- End setup
    // Build the execution
    QueryExecHTTP qExec = QueryExecHTTP.newBuilder().endpoint(serviceURL).timeout(timeoutMillis, TimeUnit.MILLISECONDS).query(query).params(serviceParams).context(context).httpClient(httpClient).sendMode(querySendMode).build();
    try {
        // Detach from the network stream.
        RowSet rowSet = qExec.select().materialize();
        QueryIterator qIter = QueryIterPlainWrapper.create(rowSet);
        if (requiresRemapping)
            qIter = QueryIter.map(qIter, varMapping);
        return qIter;
    } catch (HttpException ex) {
        throw QueryExceptionHTTP.rewrap(ex);
    }
}
Also used : Op(org.apache.jena.sparql.algebra.Op) ElementSubQuery(org.apache.jena.sparql.syntax.ElementSubQuery) OpAsQuery(org.apache.jena.sparql.algebra.OpAsQuery) Query(org.apache.jena.query.Query) Var(org.apache.jena.sparql.core.Var) Element(org.apache.jena.sparql.syntax.Element) RowSet(org.apache.jena.sparql.exec.RowSet) HttpParams(org.apache.jena.sparql.engine.http.HttpParams) QueryExecException(org.apache.jena.query.QueryExecException) ElementSubQuery(org.apache.jena.sparql.syntax.ElementSubQuery) QueryIterator(org.apache.jena.sparql.engine.QueryIterator) HttpClient(java.net.http.HttpClient) RegistryHttpClient(org.apache.jena.http.RegistryHttpClient) HttpException(org.apache.jena.atlas.web.HttpException)

Example 25 with Query

use of org.apache.jena.query.Query in project jena by apache.

the class UpdateEngineWorker method visit.

@Override
public void visit(UpdateModify update) {
    Node withGraph = update.getWithIRI();
    Element elt = update.getWherePattern();
    // null or a dataset for USING clause.
    // USING/USING NAMED
    DatasetGraph dsg = processUsing(update);
    // USING overrides WITH
    if (dsg == null && withGraph != null) {
        // Subtle difference : WITH <uri>... WHERE {}
        // and an empty/unknown graph <uri>
        // rewrite with GRAPH -> no match.
        // redo as dataset with different default graph -> match
        // SPARQL is unclear about what happens when the graph does not exist.
        // but the rewrite with ElementNamedGraph is closer to SPARQL.
        // Better, treat as
        // WHERE { GRAPH <with> { ... } }
        // This is the SPARQL wording (which is a bit loose).
        elt = new ElementNamedGraph(withGraph, elt);
    }
    if (dsg == null)
        dsg = datasetGraph;
    Query query = elementToQuery(elt);
    ThresholdPolicy<Binding> policy = ThresholdPolicyFactory.policyFromContext(datasetGraph.getContext());
    DataBag<Binding> db = BagFactory.newDefaultBag(policy, SerializationFactoryFinder.bindingSerializationFactory());
    try {
        Iterator<Binding> bindings = evalBindings(query, dsg, inputBinding, context);
        if (false) {
            List<Binding> x = Iter.toList(bindings);
            System.out.printf("====>> Bindings (%d)\n", x.size());
            Iter.print(System.out, x.iterator());
            System.out.println("====<<");
            bindings = Iter.iter(x);
        }
        db.addAll(bindings);
        Iter.close(bindings);
        Iterator<Binding> it = db.iterator();
        execDelete(datasetGraph, update.getDeleteQuads(), withGraph, it);
        Iter.close(it);
        Iterator<Binding> it2 = db.iterator();
        execInsert(datasetGraph, update.getInsertQuads(), withGraph, it2);
        Iter.close(it2);
    } finally {
        db.close();
    }
}
Also used : Binding(org.apache.jena.sparql.engine.binding.Binding) ElementNamedGraph(org.apache.jena.sparql.syntax.ElementNamedGraph) Query(org.apache.jena.query.Query) Node(org.apache.jena.graph.Node) Element(org.apache.jena.sparql.syntax.Element)

Aggregations

Query (org.apache.jena.query.Query)265 Test (org.junit.Test)78 ContractTest (org.xenei.junit.contract.ContractTest)65 QueryExecution (org.apache.jena.query.QueryExecution)63 ElementSubQuery (org.apache.jena.sparql.syntax.ElementSubQuery)49 WhereValidator (org.apache.jena.arq.querybuilder.WhereValidator)42 Var (org.apache.jena.sparql.core.Var)42 Triple (org.apache.jena.graph.Triple)38 ElementPathBlock (org.apache.jena.sparql.syntax.ElementPathBlock)32 ResultSet (org.apache.jena.query.ResultSet)29 Node (org.apache.jena.graph.Node)28 TriplePath (org.apache.jena.sparql.core.TriplePath)28 QuerySolution (org.apache.jena.query.QuerySolution)26 ExprVar (org.apache.jena.sparql.expr.ExprVar)24 Binding (org.apache.jena.sparql.engine.binding.Binding)22 Model (org.apache.jena.rdf.model.Model)20 HashMap (java.util.HashMap)19 Dataset (org.apache.jena.query.Dataset)15 Op (org.apache.jena.sparql.algebra.Op)15 SelectBuilder (org.apache.jena.arq.querybuilder.SelectBuilder)14