use of org.apache.jena.query.SortCondition in project jena by apache.
the class TransformDistinctToReduced method isSafe.
protected boolean isSafe(Set<Var> projectVars, OpOrder opOrder) {
Set<Var> seenVars = new HashSet<>();
// For the optimization to be safe all project variables must appear in
// the ordering prior to any unprojected variables
// Ordering by expressions is fine provided they use only projected
// variables
boolean ok = true;
for (SortCondition cond : opOrder.getConditions()) {
if (!isValidSortCondition(cond, projectVars, seenVars)) {
ok = false;
break;
}
// further sort conditions are irrelevant
if (seenVars.size() == projectVars.size())
return true;
}
// The projects vars must all have been seen.
return (seenVars.size() == projectVars.size());
}
use of org.apache.jena.query.SortCondition in project jena by apache.
the class TransformEliminateAssignments method processConditions.
private List<SortCondition> processConditions(List<SortCondition> baseConditions, List<SortCondition> processedConditions, Var var) {
List<SortCondition> inputConditions = processedConditions != null ? processedConditions : baseConditions;
List<SortCondition> outputConditions = new ArrayList<>();
for (SortCondition cond : inputConditions) {
Expr e = cond.getExpression();
e = ExprTransformer.transform(new ExprTransformSubstitute(var, getAssignExpr(var)), e);
outputConditions.add(new SortCondition(e, cond.getDirection()));
}
return outputConditions;
}
use of org.apache.jena.query.SortCondition in project jena by apache.
the class TestSortedDataBag method testSorting.
private void testSorting(int numBindings, int threshold) {
List<Binding> unsorted = randomBindings(numBindings);
List<SortCondition> conditions = new ArrayList<>();
conditions.add(new SortCondition(new ExprVar("8"), Query.ORDER_ASCENDING));
conditions.add(new SortCondition(new ExprVar("1"), Query.ORDER_ASCENDING));
conditions.add(new SortCondition(new ExprVar("0"), Query.ORDER_DESCENDING));
BindingComparator comparator = new BindingComparator(conditions);
List<Binding> sorted = new ArrayList<>();
SortedDataBag<Binding> db = new SortedDataBag<>(new ThresholdPolicyCount<Binding>(threshold), SerializationFactoryFinder.bindingSerializationFactory(), comparator);
try {
db.addAll(unsorted);
Iterator<Binding> iter = db.iterator();
while (iter.hasNext()) {
sorted.add(iter.next());
}
Iter.close(iter);
} finally {
db.close();
}
Collections.sort(unsorted, comparator);
assertEquals(unsorted, sorted);
}
use of org.apache.jena.query.SortCondition in project jena by apache.
the class TestSortedDataBag method testTemporaryFilesAreCleanedUpAfterCompletion.
@Test
public void testTemporaryFilesAreCleanedUpAfterCompletion() {
List<Binding> unsorted = randomBindings(500);
List<SortCondition> conditions = new ArrayList<>();
conditions.add(new SortCondition(new ExprVar("8"), Query.ORDER_ASCENDING));
BindingComparator comparator = new BindingComparator(conditions);
SortedDataBag<Binding> db = new SortedDataBag<>(new ThresholdPolicyCount<Binding>(10), SerializationFactoryFinder.bindingSerializationFactory(), comparator);
List<File> spillFiles = new ArrayList<>();
try {
db.addAll(unsorted);
spillFiles.addAll(db.getSpillFiles());
int count = 0;
for (File file : spillFiles) {
if (file.exists()) {
count++;
}
}
// 500 bindings divided into 50 chunks (49 in files, and 1 in memory)
assertEquals(49, count);
Iterator<Binding> iter = db.iterator();
while (iter.hasNext()) {
iter.next();
}
Iter.close(iter);
} finally {
db.close();
}
int count = 0;
for (File file : spillFiles) {
if (file.exists()) {
count++;
}
}
assertEquals(0, count);
}
use of org.apache.jena.query.SortCondition in project jena by apache.
the class TransformTopN method transform.
/* For reference: from the algebra generation of a query, the order of operations is:
* Limit/Offset
* Distinct/reduce
* Project
* OrderBy
* Values
* Having
* Select Expressions
* Group
* but note that a subquery can be used to create other orders.
*/
@Override
public Op transform(final OpSlice opSlice, final Op inSubOp) {
/*
* This looks for all the following cases of slice with optionally
* distinct and/or project follow by order. It is quicker to execute
* by avoiding the full sort, just track the top items.
*
* + slice-order => topN
* + slice-distinct|reduced-order => top-distinct
* + slice-project-order => project-top
* + slice distinct project order => topN distinct project (only some cases)
*
* If the slice has an offset, a (slice X _) is added. (slice 0 _) is a no-op and it not added.
*
* In detail:
*
* Case 1:
* (slice X N
* (order (cond) PATTERN) )
* ==>
* (slice X _
* (top (X+N cond) PATTERN) )
*
* Case 2:
* (slice X N
* (distinct or reduced
* (order (cond) PATTERN) ))
* ==>
* (slice X _
* (top (X+N cond) (distinct PATTERN))
*
* Case 3:
* (slice X N
* (project (vars)
* (order (cond)
* PATTERN ))))
* ==>
* (slice X _
* (project (vars)
* (top (X+N cond) PATTERN) ))
*
* Case 4:
* (slice X N
* (distinct
* (project (vars)
* (order (cond) PATTERN) )))
* ==>
* If project-order can be swapped,
* (slice X N
* (top (X+N) (cond)
* (distinct
* (project (vars)
* PATTERN) )))
*
* Care needed: because of the need to keep distinct, we can't
* process like case 3. Reversing (order) and (project) can only
* be done if the projection variables include all variables used
* by the sort conditions.
*
* When there is no project, we can push the distinct under the topN,
* but, when there is, the sort variables may include one projected away,
* it's not possible to do this with project. The key is that
* distinct-project can change the number of rows in ways that mean
* we can not predict the topN slice.
*/
/* Algorthm:
* Test to see if the slice is small enough.
* Extract distinct/reduce, and projection details.
* Is it an (order)? If no - not applicable.
*
* If slice-project-order
* Treat as project-slice-order
* Output project-top
* If slice-distinct-project-order
* Test to see if project and order can swap
* If they can, output top-distinct-project
* else no action.
*
* Add a slice if there was an OFFSET.
* Distinct and reduce are treated as distinct.
*/
Op subOp = inSubOp;
if (opSlice.getLength() == Query.NOLIMIT)
return doNothing(opSlice, inSubOp);
long limit = opSlice.getLength();
long offset = (opSlice.getStart() != Query.NOLIMIT) ? opSlice.getStart() : 0L;
long N = limit + offset;
int threshold = (Integer) ARQ.getContext().get(externalSortBufferSize, defaultTopNSortingThreshold);
if (N >= threshold)
return doNothing(opSlice, inSubOp);
boolean distinct = false;
boolean reduce = false;
// Extract any distinct/reduce.
if (subOp instanceof OpDistinct) {
distinct = true;
subOp = ((Op1) subOp).getSubOp();
} else if (subOp instanceof OpReduced) {
distinct = true;
subOp = ((Op1) subOp).getSubOp();
}
// Extract any projection.
List<Var> projection = null;
if (subOp instanceof OpProject) {
OpProject opProject = (OpProject) subOp;
projection = opProject.getVars();
subOp = opProject.getSubOp();
}
if (!(subOp instanceof OpOrder))
return doNothing(opSlice, inSubOp);
// We have found an (order)
OpOrder opOrder = (OpOrder) subOp;
subOp = opOrder.getSubOp();
if ((reduce || distinct) && projection != null) {
List<SortCondition> sortConditions = opOrder.getConditions();
Set<Var> orderVars = ExprVars.getVarsMentioned(sortConditions);
if (!projection.containsAll(orderVars))
return doNothing(opSlice, inSubOp);
}
Op newOp = subOp;
if ((reduce || distinct) && projection != null)
newOp = new OpProject(newOp, projection);
if (distinct)
newOp = OpDistinct.create(newOp);
if (reduce)
newOp = OpReduced.create(newOp);
newOp = new OpTopN(newOp, (int) N, opOrder.getConditions());
if (!reduce && !distinct && projection != null)
newOp = new OpProject(newOp, projection);
if (opSlice.getStart() > 0)
newOp = new OpSlice(newOp, opSlice.getStart(), Query.NOLIMIT);
return newOp;
}
Aggregations