Search in sources :

Example 1 with BindingSetHashJoinIterator

use of org.apache.rya.accumulo.pcj.iterators.BindingSetHashJoinIterator in project incubator-rya by apache.

the class AccumuloIndexSet method evaluate.

/**
 * Core evaluation method used during query evaluation - given a collection
 * of binding set constraints, this method finds common binding labels
 * between the constraints and table, uses those to build a prefix scan of
 * the Accumulo table, and creates a solution binding set by iterating of
 * the scan results.
 * @param bindingset - collection of {@link BindingSet}s to be joined with PCJ
 * @return - CloseableIteration over joined results
 */
@Override
public CloseableIteration<BindingSet, QueryEvaluationException> evaluate(final Collection<BindingSet> bindingset) throws QueryEvaluationException {
    if (bindingset.isEmpty()) {
        return new IteratorWrapper<BindingSet, QueryEvaluationException>(new HashSet<BindingSet>().iterator());
    }
    final List<BindingSet> crossProductBs = new ArrayList<>();
    final Map<String, org.openrdf.model.Value> constantConstraints = new HashMap<>();
    final Set<Range> hashJoinRanges = new HashSet<>();
    final Range EMPTY_RANGE = new Range("", true, "~", false);
    Range crossProductRange = EMPTY_RANGE;
    String localityGroupOrder = varOrder.get(0);
    int maxPrefixLen = Integer.MIN_VALUE;
    int prefixLen = 0;
    int oldPrefixLen = 0;
    final Multimap<String, BindingSet> bindingSetHashMap = HashMultimap.create();
    HashJoinType joinType = HashJoinType.CONSTANT_JOIN_VAR;
    final Set<String> unAssuredVariables = Sets.difference(getTupleExpr().getBindingNames(), getTupleExpr().getAssuredBindingNames());
    boolean useColumnScan = false;
    boolean isCrossProd = false;
    boolean containsConstantConstraints = false;
    final BindingSet constants = getConstantConstraints();
    containsConstantConstraints = constants.size() > 0;
    try {
        for (final BindingSet bs : bindingset) {
            if (bindingset.size() == 1 && bs.size() == 0) {
                // in this case, only single, empty bindingset, pcj node is
                // first node in query plan - use full Range scan with
                // column
                // family set
                useColumnScan = true;
            }
            // get common vars for PCJ - only use variables associated
            // with assured Bindings
            final QueryBindingSet commonVars = new QueryBindingSet();
            for (final String b : getTupleExpr().getAssuredBindingNames()) {
                final Binding v = bs.getBinding(b);
                if (v != null) {
                    commonVars.addBinding(v);
                }
            }
            // no common vars implies cross product
            if (commonVars.size() == 0 && bs.size() != 0) {
                crossProductBs.add(bs);
                isCrossProd = true;
            }
            // get a varOrder from orders in PCJ table - use at least
            // one common variable
            final BindingSetVariableOrder varOrder = getVarOrder(commonVars.getBindingNames(), constants.getBindingNames());
            // update constant constraints not used in varOrder and
            // update Bindings used to form range by removing unused
            // variables
            commonVars.addAll(constants);
            if (commonVars.size() > varOrder.varOrderLen) {
                final Map<String, Value> valMap = getConstantValueMap();
                for (final String s : new HashSet<String>(varOrder.unusedVars)) {
                    if (valMap.containsKey(s) && !constantConstraints.containsKey(s)) {
                        constantConstraints.put(s, valMap.get(s));
                    }
                    commonVars.removeBinding(s);
                }
            }
            if (containsConstantConstraints && (useColumnScan || isCrossProd)) {
                // constant constraints
                if (crossProductRange == EMPTY_RANGE) {
                    crossProductRange = getRange(varOrder.varOrder, commonVars);
                    localityGroupOrder = prefixToOrder(varOrder.varOrder);
                }
            } else if (!useColumnScan && !isCrossProd) {
                // update ranges and add BindingSet to HashJoinMap if not a
                // cross product
                hashJoinRanges.add(getRange(varOrder.varOrder, commonVars));
                prefixLen = varOrder.varOrderLen;
                // update the HashJoinType to be VARIABLE_JOIN_VAR.
                if (oldPrefixLen == 0) {
                    oldPrefixLen = prefixLen;
                } else {
                    if (oldPrefixLen != prefixLen && joinType == HashJoinType.CONSTANT_JOIN_VAR) {
                        joinType = HashJoinType.VARIABLE_JOIN_VAR;
                    }
                    oldPrefixLen = prefixLen;
                }
                // update max prefix len
                if (prefixLen > maxPrefixLen) {
                    maxPrefixLen = prefixLen;
                }
                final String key = getHashJoinKey(varOrder.varOrder, commonVars);
                bindingSetHashMap.put(key, bs);
            }
            isCrossProd = false;
        }
        // BindingSets
        if ((useColumnScan || crossProductBs.size() > 0) && bindingSetHashMap.size() == 0) {
            final Scanner scanner = accCon.createScanner(tablename, auths);
            // cross product with no cross product constraints here
            scanner.setRange(crossProductRange);
            scanner.fetchColumnFamily(new Text(localityGroupOrder));
            return new PCJKeyToCrossProductBindingSetIterator(scanner, crossProductBs, constantConstraints, unAssuredVariables, getTableVarMap());
        } else if ((useColumnScan || crossProductBs.size() > 0) && bindingSetHashMap.size() > 0) {
            // in this case, both hash join BindingSets and cross product
            // BindingSets exist
            // create an iterator to evaluate cross product and an iterator
            // for hash join, then combine
            final List<CloseableIteration<BindingSet, QueryEvaluationException>> iteratorList = new ArrayList<>();
            // create cross product iterator
            final Scanner scanner1 = accCon.createScanner(tablename, auths);
            scanner1.setRange(crossProductRange);
            scanner1.fetchColumnFamily(new Text(localityGroupOrder));
            iteratorList.add(new PCJKeyToCrossProductBindingSetIterator(scanner1, crossProductBs, constantConstraints, unAssuredVariables, getTableVarMap()));
            // create hash join iterator
            final BatchScanner scanner2 = accCon.createBatchScanner(tablename, auths, 10);
            scanner2.setRanges(hashJoinRanges);
            final PCJKeyToJoinBindingSetIterator iterator = new PCJKeyToJoinBindingSetIterator(scanner2, getTableVarMap(), maxPrefixLen);
            iteratorList.add(new BindingSetHashJoinIterator(bindingSetHashMap, iterator, unAssuredVariables, joinType));
            // combine iterators
            return new IteratorCombiner(iteratorList);
        } else {
            // only hash join BindingSets exist
            final BatchScanner scanner = accCon.createBatchScanner(tablename, auths, 10);
            // only need to create hash join iterator
            scanner.setRanges(hashJoinRanges);
            final PCJKeyToJoinBindingSetIterator iterator = new PCJKeyToJoinBindingSetIterator(scanner, getTableVarMap(), maxPrefixLen);
            return new BindingSetHashJoinIterator(bindingSetHashMap, iterator, unAssuredVariables, joinType);
        }
    } catch (final Exception e) {
        throw new QueryEvaluationException(e);
    }
}
Also used : BatchScanner(org.apache.accumulo.core.client.BatchScanner) Scanner(org.apache.accumulo.core.client.Scanner) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) BatchScanner(org.apache.accumulo.core.client.BatchScanner) PCJKeyToJoinBindingSetIterator(org.apache.rya.accumulo.pcj.iterators.PCJKeyToJoinBindingSetIterator) List(java.util.List) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Binding(org.openrdf.query.Binding) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) BindingSet(org.openrdf.query.BindingSet) HashJoinType(org.apache.rya.accumulo.pcj.iterators.BindingSetHashJoinIterator.HashJoinType) PCJKeyToCrossProductBindingSetIterator(org.apache.rya.accumulo.pcj.iterators.PCJKeyToCrossProductBindingSetIterator) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) QueryBindingSet(org.openrdf.query.algebra.evaluation.QueryBindingSet) SailException(org.openrdf.sail.SailException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) PcjException(org.apache.rya.indexing.pcj.storage.PcjException) BindingSetConversionException(org.apache.rya.indexing.pcj.storage.accumulo.BindingSetConverter.BindingSetConversionException) QueryEvaluationException(org.openrdf.query.QueryEvaluationException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) NoSuchElementException(java.util.NoSuchElementException) MalformedQueryException(org.openrdf.query.MalformedQueryException) PCJStorageException(org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.PCJStorageException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) IteratorWrapper(org.apache.rya.api.utils.IteratorWrapper) QueryEvaluationException(org.openrdf.query.QueryEvaluationException) Value(org.openrdf.model.Value) IteratorCombiner(org.apache.rya.accumulo.pcj.iterators.IteratorCombiner) BindingSetHashJoinIterator(org.apache.rya.accumulo.pcj.iterators.BindingSetHashJoinIterator)

Aggregations

ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 NoSuchElementException (java.util.NoSuchElementException)1 AccumuloException (org.apache.accumulo.core.client.AccumuloException)1 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)1 BatchScanner (org.apache.accumulo.core.client.BatchScanner)1 Scanner (org.apache.accumulo.core.client.Scanner)1 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)1 Range (org.apache.accumulo.core.data.Range)1 Text (org.apache.hadoop.io.Text)1 BindingSetHashJoinIterator (org.apache.rya.accumulo.pcj.iterators.BindingSetHashJoinIterator)1 HashJoinType (org.apache.rya.accumulo.pcj.iterators.BindingSetHashJoinIterator.HashJoinType)1 IteratorCombiner (org.apache.rya.accumulo.pcj.iterators.IteratorCombiner)1 PCJKeyToCrossProductBindingSetIterator (org.apache.rya.accumulo.pcj.iterators.PCJKeyToCrossProductBindingSetIterator)1 PCJKeyToJoinBindingSetIterator (org.apache.rya.accumulo.pcj.iterators.PCJKeyToJoinBindingSetIterator)1 IteratorWrapper (org.apache.rya.api.utils.IteratorWrapper)1 PcjException (org.apache.rya.indexing.pcj.storage.PcjException)1 PCJStorageException (org.apache.rya.indexing.pcj.storage.PrecomputedJoinStorage.PCJStorageException)1