Search in sources :

Example 16 with SortCondition

use of org.apache.jena.query.SortCondition in project jena by apache.

the class TransformDistinctToReduced method isSafe.

protected boolean isSafe(Set<Var> projectVars, OpOrder opOrder) {
    Set<Var> seenVars = new HashSet<>();
    // For the optimization to be safe all project variables must appear in
    // the ordering prior to any unprojected variables
    // Ordering by expressions is fine provided they use only projected
    // variables
    boolean ok = true;
    for (SortCondition cond : opOrder.getConditions()) {
        if (!isValidSortCondition(cond, projectVars, seenVars)) {
            ok = false;
            break;
        }
        // further sort conditions are irrelevant
        if (seenVars.size() == projectVars.size())
            return true;
    }
    // The projects vars must all have been seen.
    return (seenVars.size() == projectVars.size());
}
Also used : SortCondition(org.apache.jena.query.SortCondition) Var(org.apache.jena.sparql.core.Var) HashSet(java.util.HashSet)

Example 17 with SortCondition

use of org.apache.jena.query.SortCondition in project jena by apache.

the class TransformEliminateAssignments method processConditions.

private List<SortCondition> processConditions(List<SortCondition> baseConditions, List<SortCondition> processedConditions, Var var) {
    List<SortCondition> inputConditions = processedConditions != null ? processedConditions : baseConditions;
    List<SortCondition> outputConditions = new ArrayList<>();
    for (SortCondition cond : inputConditions) {
        Expr e = cond.getExpression();
        e = ExprTransformer.transform(new ExprTransformSubstitute(var, getAssignExpr(var)), e);
        outputConditions.add(new SortCondition(e, cond.getDirection()));
    }
    return outputConditions;
}
Also used : SortCondition(org.apache.jena.query.SortCondition)

Example 18 with SortCondition

use of org.apache.jena.query.SortCondition in project jena by apache.

the class TestSortedDataBag method testSorting.

private void testSorting(int numBindings, int threshold) {
    List<Binding> unsorted = randomBindings(numBindings);
    List<SortCondition> conditions = new ArrayList<>();
    conditions.add(new SortCondition(new ExprVar("8"), Query.ORDER_ASCENDING));
    conditions.add(new SortCondition(new ExprVar("1"), Query.ORDER_ASCENDING));
    conditions.add(new SortCondition(new ExprVar("0"), Query.ORDER_DESCENDING));
    BindingComparator comparator = new BindingComparator(conditions);
    List<Binding> sorted = new ArrayList<>();
    SortedDataBag<Binding> db = new SortedDataBag<>(new ThresholdPolicyCount<Binding>(threshold), SerializationFactoryFinder.bindingSerializationFactory(), comparator);
    try {
        db.addAll(unsorted);
        Iterator<Binding> iter = db.iterator();
        while (iter.hasNext()) {
            sorted.add(iter.next());
        }
        Iter.close(iter);
    } finally {
        db.close();
    }
    Collections.sort(unsorted, comparator);
    assertEquals(unsorted, sorted);
}
Also used : Binding(org.apache.jena.sparql.engine.binding.Binding) ExprVar(org.apache.jena.sparql.expr.ExprVar) SortCondition(org.apache.jena.query.SortCondition) BindingComparator(org.apache.jena.sparql.engine.binding.BindingComparator) SortedDataBag(org.apache.jena.atlas.data.SortedDataBag) ArrayList(java.util.ArrayList)

Example 19 with SortCondition

use of org.apache.jena.query.SortCondition in project jena by apache.

the class TestSortedDataBag method testTemporaryFilesAreCleanedUpAfterCompletion.

@Test
public void testTemporaryFilesAreCleanedUpAfterCompletion() {
    List<Binding> unsorted = randomBindings(500);
    List<SortCondition> conditions = new ArrayList<>();
    conditions.add(new SortCondition(new ExprVar("8"), Query.ORDER_ASCENDING));
    BindingComparator comparator = new BindingComparator(conditions);
    SortedDataBag<Binding> db = new SortedDataBag<>(new ThresholdPolicyCount<Binding>(10), SerializationFactoryFinder.bindingSerializationFactory(), comparator);
    List<File> spillFiles = new ArrayList<>();
    try {
        db.addAll(unsorted);
        spillFiles.addAll(db.getSpillFiles());
        int count = 0;
        for (File file : spillFiles) {
            if (file.exists()) {
                count++;
            }
        }
        // 500 bindings divided into 50 chunks (49 in files, and 1 in memory)
        assertEquals(49, count);
        Iterator<Binding> iter = db.iterator();
        while (iter.hasNext()) {
            iter.next();
        }
        Iter.close(iter);
    } finally {
        db.close();
    }
    int count = 0;
    for (File file : spillFiles) {
        if (file.exists()) {
            count++;
        }
    }
    assertEquals(0, count);
}
Also used : Binding(org.apache.jena.sparql.engine.binding.Binding) ExprVar(org.apache.jena.sparql.expr.ExprVar) SortCondition(org.apache.jena.query.SortCondition) BindingComparator(org.apache.jena.sparql.engine.binding.BindingComparator) ArrayList(java.util.ArrayList) SortedDataBag(org.apache.jena.atlas.data.SortedDataBag) File(java.io.File) Test(org.junit.Test)

Example 20 with SortCondition

use of org.apache.jena.query.SortCondition in project jena by apache.

the class TransformTopN method transform.

/* For reference: from the algebra generation of a query, the order of operations is: 
	 *  Limit/Offset
	 *   Distinct/reduce
	 *     Project
	 *       OrderBy
	 *         Values
	 *           Having
	 *             Select Expressions
	 *               Group
	 * but note that a subquery can be used to create other orders.                 
	 */
@Override
public Op transform(final OpSlice opSlice, final Op inSubOp) {
    /* 
         * This looks for all the following cases of slice with optionally 
         * distinct and/or project follow by order. It is quicker to execute
         * by avoiding the full sort, just track the top items. 
         * 
         *  + slice-order                   => topN
         *  + slice-distinct|reduced-order  => top-distinct
         *  + slice-project-order           => project-top 
         *  + slice distinct project order  => topN distinct project  (only some cases)  
         *
         * If the slice has an offset, a (slice X _) is added. (slice 0 _) is a no-op and it not added.   
         *
         * In detail:
         * 
         * Case 1:
         *  (slice X N
         *   (order (cond) PATTERN) )
         * ==> 
         * (slice X _
         *   (top (X+N cond) PATTERN) )
         * 
         * Case 2:
         * (slice X N
         *   (distinct or reduced
         *     (order (cond) PATTERN) ))
         * ==>  
         * (slice X _
         *   (top (X+N cond) (distinct PATTERN))
         *
         * Case 3: 
         * (slice X N
         *   (project (vars)
         *     (order (cond) 
         *         PATTERN ))))
         * ==>
         * (slice X _
         *   (project (vars) 
         *     (top (X+N cond) PATTERN) ))
         *
         * Case 4:
         * (slice X N
         *   (distinct 
         *     (project (vars) 
         *       (order (cond) PATTERN) )))
         * ==> 
         * If project-order can be swapped, 
         * (slice X N
         *   (top (X+N) (cond)
         *    (distinct
         *      (project (vars)   
         *         PATTERN) )))
         * 
         * Care needed: because of the need to keep distinct, we can't
         * process like case 3. Reversing (order) and (project) can only
         * be done if the projection variables include all variables used
         * by the sort conditions.
         * 
         * When there is no project, we can push the distinct under the topN,
         * but, when there is, the sort variables may include one projected away,
         * it's not possible to do this with project.  The key is that 
         * distinct-project can change the number of rows in ways that mean
         * we can not predict the topN slice.
         */
    /* Algorthm:
         *    Test to see if the slice is small enough.
         *    Extract distinct/reduce, and projection details. 
         *    Is it an (order)? If no - not applicable. 
         * 
         * If slice-project-order
         *   Treat as project-slice-order
         *   Output project-top
         * If slice-distinct-project-order
         *   Test to see if project and order can swap
         *   If they can, output top-distinct-project
         *   else no action.
         *  
         * Add a slice if there was an OFFSET. 
         * Distinct and reduce are treated as distinct.
         */
    Op subOp = inSubOp;
    if (opSlice.getLength() == Query.NOLIMIT)
        return doNothing(opSlice, inSubOp);
    long limit = opSlice.getLength();
    long offset = (opSlice.getStart() != Query.NOLIMIT) ? opSlice.getStart() : 0L;
    long N = limit + offset;
    int threshold = (Integer) ARQ.getContext().get(externalSortBufferSize, defaultTopNSortingThreshold);
    if (N >= threshold)
        return doNothing(opSlice, inSubOp);
    boolean distinct = false;
    boolean reduce = false;
    // Extract any distinct/reduce.
    if (subOp instanceof OpDistinct) {
        distinct = true;
        subOp = ((Op1) subOp).getSubOp();
    } else if (subOp instanceof OpReduced) {
        distinct = true;
        subOp = ((Op1) subOp).getSubOp();
    }
    // Extract any projection.
    List<Var> projection = null;
    if (subOp instanceof OpProject) {
        OpProject opProject = (OpProject) subOp;
        projection = opProject.getVars();
        subOp = opProject.getSubOp();
    }
    if (!(subOp instanceof OpOrder))
        return doNothing(opSlice, inSubOp);
    // We have found an (order)
    OpOrder opOrder = (OpOrder) subOp;
    subOp = opOrder.getSubOp();
    if ((reduce || distinct) && projection != null) {
        List<SortCondition> sortConditions = opOrder.getConditions();
        Set<Var> orderVars = ExprVars.getVarsMentioned(sortConditions);
        if (!projection.containsAll(orderVars))
            return doNothing(opSlice, inSubOp);
    }
    Op newOp = subOp;
    if ((reduce || distinct) && projection != null)
        newOp = new OpProject(newOp, projection);
    if (distinct)
        newOp = OpDistinct.create(newOp);
    if (reduce)
        newOp = OpReduced.create(newOp);
    newOp = new OpTopN(newOp, (int) N, opOrder.getConditions());
    if (!reduce && !distinct && projection != null)
        newOp = new OpProject(newOp, projection);
    if (opSlice.getStart() > 0)
        newOp = new OpSlice(newOp, opSlice.getStart(), Query.NOLIMIT);
    return newOp;
}
Also used : Op(org.apache.jena.sparql.algebra.Op) SortCondition(org.apache.jena.query.SortCondition) Var(org.apache.jena.sparql.core.Var)

Aggregations

SortCondition (org.apache.jena.query.SortCondition)23 Var (org.apache.jena.sparql.core.Var)10 ArrayList (java.util.ArrayList)5 Binding (org.apache.jena.sparql.engine.binding.Binding)4 BindingComparator (org.apache.jena.sparql.engine.binding.BindingComparator)4 ExprVar (org.apache.jena.sparql.expr.ExprVar)3 SortedDataBag (org.apache.jena.atlas.data.SortedDataBag)2 Op (org.apache.jena.sparql.algebra.Op)2 OpOrder (org.apache.jena.sparql.algebra.op.OpOrder)2 OpProject (org.apache.jena.sparql.algebra.op.OpProject)2 Expr (org.apache.jena.sparql.expr.Expr)2 File (java.io.File)1 HashSet (java.util.HashSet)1 Random (java.util.Random)1 ExprRewriter (org.apache.jena.arq.querybuilder.rewriters.ExprRewriter)1 Node (org.apache.jena.graph.Node)1 QueryCancelledException (org.apache.jena.query.QueryCancelledException)1 QueryExecException (org.apache.jena.query.QueryExecException)1 OpDistinct (org.apache.jena.sparql.algebra.op.OpDistinct)1 VarExprList (org.apache.jena.sparql.core.VarExprList)1