use of org.apache.rya.accumulo.pcj.iterators.PCJKeyToJoinBindingSetIterator in project incubator-rya by apache.
the class AccumuloIndexSet method evaluate.
/**
* Core evaluation method used during query evaluation - given a collection
* of binding set constraints, this method finds common binding labels
* between the constraints and table, uses those to build a prefix scan of
* the Accumulo table, and creates a solution binding set by iterating of
* the scan results.
* @param bindingset - collection of {@link BindingSet}s to be joined with PCJ
* @return - CloseableIteration over joined results
*/
@Override
public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(final Collection<BindingSet> bindingset) throws QueryEvaluationException {
if (bindingset.isEmpty()) {
return new IteratorWrapper<BindingSet, QueryEvaluationException>(new HashSet<BindingSet>().iterator());
}
final List<BindingSet> crossProductBs = new ArrayList<>();
final Map<String, org.openrdf.model.Value> constantConstraints = new HashMap<>();
final Set<Range> hashJoinRanges = new HashSet<>();
final Range EMPTY_RANGE = new Range("", true, "~", false);
Range crossProductRange = EMPTY_RANGE;
String localityGroupOrder = varOrder.get(0);
int maxPrefixLen = Integer.MIN_VALUE;
int prefixLen = 0;
int oldPrefixLen = 0;
final Multimap<String, BindingSet> bindingSetHashMap = HashMultimap.create();
HashJoinType joinType = HashJoinType.CONSTANT_JOIN_VAR;
final Set<String> unAssuredVariables = Sets.difference(getTupleExpr().getBindingNames(), getTupleExpr().getAssuredBindingNames());
boolean useColumnScan = false;
boolean isCrossProd = false;
boolean containsConstantConstraints = false;
final BindingSet constants = getConstantConstraints();
containsConstantConstraints = constants.size() > 0;
try {
for (final BindingSet bs : bindingset) {
if (bindingset.size() == 1 && bs.size() == 0) {
// in this case, only single, empty bindingset, pcj node is
// first node in query plan - use full Range scan with
// column
// family set
useColumnScan = true;
}
// get common vars for PCJ - only use variables associated
// with assured Bindings
final QueryBindingSet commonVars = new QueryBindingSet();
for (final String b : getTupleExpr().getAssuredBindingNames()) {
final Binding v = bs.getBinding(b);
if (v != null) {
commonVars.addBinding(v);
}
}
// no common vars implies cross product
if (commonVars.size() == 0 && bs.size() != 0) {
crossProductBs.add(bs);
isCrossProd = true;
}
// get a varOrder from orders in PCJ table - use at least
// one common variable
final BindingSetVariableOrder varOrder = getVarOrder(commonVars.getBindingNames(), constants.getBindingNames());
// update constant constraints not used in varOrder and
// update Bindings used to form range by removing unused
// variables
commonVars.addAll(constants);
if (commonVars.size() > varOrder.varOrderLen) {
final Map<String, Value> valMap = getConstantValueMap();
for (final String s : new HashSet<String>(varOrder.unusedVars)) {
if (valMap.containsKey(s) && !constantConstraints.containsKey(s)) {
constantConstraints.put(s, valMap.get(s));
}
commonVars.removeBinding(s);
}
}
if (containsConstantConstraints && (useColumnScan || isCrossProd)) {
// constant constraints
if (crossProductRange == EMPTY_RANGE) {
crossProductRange = getRange(varOrder.varOrder, commonVars);
localityGroupOrder = prefixToOrder(varOrder.varOrder);
}
} else if (!useColumnScan && !isCrossProd) {
// update ranges and add BindingSet to HashJoinMap if not a
// cross product
hashJoinRanges.add(getRange(varOrder.varOrder, commonVars));
prefixLen = varOrder.varOrderLen;
// update the HashJoinType to be VARIABLE_JOIN_VAR.
if (oldPrefixLen == 0) {
oldPrefixLen = prefixLen;
} else {
if (oldPrefixLen != prefixLen && joinType == HashJoinType.CONSTANT_JOIN_VAR) {
joinType = HashJoinType.VARIABLE_JOIN_VAR;
}
oldPrefixLen = prefixLen;
}
// update max prefix len
if (prefixLen > maxPrefixLen) {
maxPrefixLen = prefixLen;
}
final String key = getHashJoinKey(varOrder.varOrder, commonVars);
bindingSetHashMap.put(key, bs);
}
isCrossProd = false;
}
// BindingSets
if ((useColumnScan || crossProductBs.size() > 0) && bindingSetHashMap.size() == 0) {
final Scanner scanner = accCon.createScanner(tablename, auths);
// cross product with no cross product constraints here
scanner.setRange(crossProductRange);
scanner.fetchColumnFamily(new Text(localityGroupOrder));
return new PCJKeyToCrossProductBindingSetIterator(scanner, crossProductBs, constantConstraints, unAssuredVariables, getTableVarMap());
} else if ((useColumnScan || crossProductBs.size() > 0) && bindingSetHashMap.size() > 0) {
// in this case, both hash join BindingSets and cross product
// BindingSets exist
// create an iterator to evaluate cross product and an iterator
// for hash join, then combine
final List<CloseableIteration<BindingSet, QueryEvaluationException>> iteratorList = new ArrayList<>();
// create cross product iterator
final Scanner scanner1 = accCon.createScanner(tablename, auths);
scanner1.setRange(crossProductRange);
scanner1.fetchColumnFamily(new Text(localityGroupOrder));
iteratorList.add(new PCJKeyToCrossProductBindingSetIterator(scanner1, crossProductBs, constantConstraints, unAssuredVariables, getTableVarMap()));
// create hash join iterator
final BatchScanner scanner2 = accCon.createBatchScanner(tablename, auths, 10);
scanner2.setRanges(hashJoinRanges);
final PCJKeyToJoinBindingSetIterator iterator = new PCJKeyToJoinBindingSetIterator(scanner2, getTableVarMap(), maxPrefixLen);
iteratorList.add(new BindingSetHashJoinIterator(bindingSetHashMap, iterator, unAssuredVariables, joinType));
// combine iterators
return new IteratorCombiner(iteratorList);
} else {
// only hash join BindingSets exist
final BatchScanner scanner = accCon.createBatchScanner(tablename, auths, 10);
// only need to create hash join iterator
scanner.setRanges(hashJoinRanges);
final PCJKeyToJoinBindingSetIterator iterator = new PCJKeyToJoinBindingSetIterator(scanner, getTableVarMap(), maxPrefixLen);
return new BindingSetHashJoinIterator(bindingSetHashMap, iterator, unAssuredVariables, joinType);
}
} catch (final Exception e) {
throw new QueryEvaluationException(e);
}
}
Aggregations