Search in sources :

Example 1 with Tuple2

use of datawave.query.util.Tuple2 in project datawave by NationalSecurityAgency.

the class AncestorUidIntersector method intersect.

@Override
public Set<IndexMatch> intersect(Set<IndexMatch> uids1, Set<IndexMatch> uids2, List<JexlNode> delayedNodes) {
    /*
         * C) Both are small, so we have an easy case where we can prune much of this sub query. Must propagate delayed nodes, though.
         */
    // create a map of correlated UIDS mapped to the root uid. The values keep the two lists of uids separate
    Map<String, Tuple2<ArrayList<IndexMatch>, ArrayList<IndexMatch>>> correlatedUids = new HashMap<>();
    // put the first set of uids in the correlated list
    for (IndexMatch match1 : uids1) {
        String baseUid = TLD.parseRootPointerFromId(match1.getUid());
        Tuple2<ArrayList<IndexMatch>, ArrayList<IndexMatch>> indexMatchLists = correlatedUids.get(baseUid);
        if (indexMatchLists == null) {
            indexMatchLists = new Tuple2<>(new ArrayList<>(), new ArrayList<>());
            correlatedUids.put(baseUid, indexMatchLists);
        }
        indexMatchLists.first().add(match1);
    }
    // put the second set of uids in the correlated list
    for (IndexMatch match2 : uids2) {
        String baseUid = TLD.parseRootPointerFromId(match2.getUid());
        Tuple2<ArrayList<IndexMatch>, ArrayList<IndexMatch>> indexMatchLists = correlatedUids.get(baseUid);
        if (indexMatchLists == null) {
            indexMatchLists = new Tuple2<>(new ArrayList<>(), new ArrayList<>());
            correlatedUids.put(baseUid, indexMatchLists);
        }
        indexMatchLists.second().add(match2);
    }
    // now for each base uid, if we have uids in the two lists then remap them to the descendent furthest from the root
    Set<IndexMatch> matches = new HashSet<>();
    for (Tuple2<ArrayList<IndexMatch>, ArrayList<IndexMatch>> indexMatchLists : correlatedUids.values()) {
        if (!indexMatchLists.first().isEmpty() && !indexMatchLists.second().isEmpty()) {
            for (IndexMatch uid1 : indexMatchLists.first()) {
                for (IndexMatch uid2 : indexMatchLists.second()) {
                    // if uid1 starts with uid2, then uid1 is a descendent of uid2
                    if (uid1.getUid().startsWith(uid2.getUid() + UIDConstants.DEFAULT_SEPARATOR) || uid1.getUid().equals(uid2.getUid())) {
                        JexlNodeSet nodeSet = new JexlNodeSet();
                        nodeSet.add(uid1.getNode());
                        nodeSet.add(uid2.getNode());
                        nodeSet.addAll(delayedNodes);
                        IndexMatch currentMatch = new IndexMatch(Sets.newHashSet(nodeSet.getNodes()), uid1.getUid(), IndexMatchType.AND);
                        matches = reduce(matches, currentMatch);
                    } else // if uid2 starts with uid1, then uid2 is a descendent of uid1
                    if (uid2.getUid().startsWith(uid1.getUid() + UIDConstants.DEFAULT_SEPARATOR)) {
                        JexlNodeSet nodeSet = new JexlNodeSet();
                        nodeSet.add(uid1.getNode());
                        nodeSet.add(uid2.getNode());
                        nodeSet.addAll(delayedNodes);
                        IndexMatch currentMatch = new IndexMatch(Sets.newHashSet(nodeSet), uid2.getUid(), IndexMatchType.AND);
                        matches = reduce(matches, currentMatch);
                    }
                }
            }
        }
    }
    return matches;
}
Also used : HashMap(java.util.HashMap) Tuple2(datawave.query.util.Tuple2) IndexMatch(datawave.query.index.lookup.IndexMatch) ArrayList(java.util.ArrayList) JexlNodeSet(datawave.query.language.parser.jexl.JexlNodeSet) HashSet(java.util.HashSet)

Example 2 with Tuple2

use of datawave.query.util.Tuple2 in project datawave by NationalSecurityAgency.

the class DefaultQueryPlanner method getQueryRanges.

/**
 * Returns a Tuple2&lt;Iterable&lt;Range&gt;,Boolean&gt; whose elements represent the Ranges to use for querying the shard table and whether or not this is
 * a "full-table-scan" query.
 *
 * @param scannerFactory
 * @param metadataHelper
 * @param config
 * @param queryTree
 * @return
 * @throws DatawaveQueryException
 */
public Tuple2<CloseableIterable<QueryPlan>, Boolean> getQueryRanges(ScannerFactory scannerFactory, MetadataHelper metadataHelper, ShardQueryConfiguration config, JexlNode queryTree) throws DatawaveQueryException {
    Preconditions.checkNotNull(queryTree);
    boolean needsFullTable = false;
    CloseableIterable<QueryPlan> ranges = null;
    // if the query has already been reduced to false there is no reason to do more
    if (QueryPruningVisitor.getState(queryTree) == QueryPruningVisitor.TruthState.FALSE) {
        return new Tuple2<>(emptyCloseableIterator(), false);
    }
    // if we still have an unexecutable tree, then a full table scan is
    // required
    List<String> debugOutput = null;
    if (log.isDebugEnabled()) {
        debugOutput = new ArrayList<>(32);
    }
    STATE state = ExecutableDeterminationVisitor.getState(queryTree, config, metadataHelper, debugOutput);
    if (log.isDebugEnabled()) {
        logDebug(debugOutput, "ExecutableDeterminationVisitor at getQueryRanges:");
    }
    if (state != STATE.EXECUTABLE) {
        if (state == STATE.ERROR) {
            log.warn("After expanding the query, it is determined that the query cannot be executed due to index-only fields mixed with expressions that cannot be run against the index.");
            BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_MIXED_INVALID_EXPRESSIONS);
            throw new InvalidQueryException(qe);
        }
        log.warn("After expanding the query, it is determined that the query cannot be executed against the field index and a full table scan is required");
        needsFullTable = true;
    }
    // scan, then lets try to compute ranges
    if (!needsFullTable) {
        // count the terms
        int termCount = TermCountingVisitor.countTerms(queryTree);
        if (termCount >= pushdownThreshold) {
            if (log.isTraceEnabled()) {
                log.trace("pushing down query because it has " + termCount + " when our max is " + pushdownThreshold);
            }
            config.setCollapseUids(true);
        }
        TraceStopwatch stopwatch = config.getTimers().newStartedStopwatch("DefaultQueryPlanner - Begin stream of ranges from inverted index");
        RangeStream stream = initializeRangeStream(config, scannerFactory, metadataHelper);
        ranges = stream.streamPlans(queryTree);
        if (log.isTraceEnabled()) {
            log.trace("query stream is " + stream.context());
        }
        // if a term threshold is exceeded and we cannot handle that, then
        // throw unsupported
        boolean thresholdExceeded = StreamContext.EXCEEDED_TERM_THRESHOLD.equals(stream.context());
        if (thresholdExceeded && !config.canHandleExceededTermThreshold()) {
            throw new UnsupportedOperationException(EXCEED_TERM_EXPANSION_ERROR);
        }
        if (StreamContext.UNINDEXED.equals(stream.context())) {
            log.debug("Needs full table scan because of unindexed fields");
            needsFullTable = true;
        } else if (StreamContext.DELAYED_FIELD.equals(stream.context())) {
            log.debug("Needs full table scan because query consists of only delayed expressions");
            needsFullTable = true;
        } else // force a full table scan
        if (IvaratorRequiredVisitor.isIvaratorRequired(queryTree) && !config.canHandleExceededValueThreshold()) {
            log.debug("Needs full table scan because we exceeded the value threshold and config.canHandleExceededValueThreshold() is false");
            needsFullTable = true;
        }
        stopwatch.stop();
    }
    if (needsFullTable) {
        if (config.getFullTableScanEnabled()) {
            ranges = this.getFullScanRange(config, queryTree);
        } else {
            if (log.isTraceEnabled())
                log.trace("Full table scans are not enabled, query will not be run");
            QueryException qe = new QueryException(DatawaveErrorCode.FULL_TABLE_SCAN_REQUIRED_BUT_DISABLED);
            throw new FullTableScansDisallowedException(qe);
        }
        if (log.isTraceEnabled())
            log.trace("Ranges are " + ranges);
    }
    return new Tuple2<>(ranges, needsFullTable);
}
Also used : BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) STATE(datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE) DatawaveQueryException(datawave.query.exceptions.DatawaveQueryException) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) PreConditionFailedQueryException(datawave.webservice.query.exception.PreConditionFailedQueryException) DoNotPerformOptimizedQueryException(datawave.query.exceptions.DoNotPerformOptimizedQueryException) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) QueryException(datawave.webservice.query.exception.QueryException) InvalidQueryException(datawave.query.exceptions.InvalidQueryException) BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) FullTableScansDisallowedException(datawave.query.exceptions.FullTableScansDisallowedException) Tuple2(datawave.query.util.Tuple2) TraceStopwatch(datawave.util.time.TraceStopwatch) RangeStream(datawave.query.index.lookup.RangeStream) InvalidQueryException(datawave.query.exceptions.InvalidQueryException)

Example 3 with Tuple2

use of datawave.query.util.Tuple2 in project datawave by NationalSecurityAgency.

the class FacetedQueryPlanner method getQueryRanges.

@Override
public Tuple2<CloseableIterable<QueryPlan>, Boolean> getQueryRanges(ScannerFactory scannerFactory, MetadataHelper metadataHelper, ShardQueryConfiguration config, JexlNode queryTree) throws DatawaveQueryException {
    if (usePrecomputedFacets) {
        config.setBypassExecutabilityCheck();
        FacetQueryPlanVisitor visitor = new FacetQueryPlanVisitor(config, facetedConfig, metadataHelper, facetedConfig.getFacetedFields());
        queryTree.jjtAccept(visitor, null);
        return new Tuple2<>(visitor, false);
    } else {
        return new Tuple2<>(this.getFullScanRange(config, queryTree), false);
    }
}
Also used : Tuple2(datawave.query.util.Tuple2) FacetQueryPlanVisitor(datawave.query.tables.facets.FacetQueryPlanVisitor)

Example 4 with Tuple2

use of datawave.query.util.Tuple2 in project datawave by NationalSecurityAgency.

the class DelayedPredicatePushDown method visit.

@Override
public Object visit(ASTAndNode node, Object data) {
    // we are a top level And
    SortedSet<Tuple2<JexlNode, Cost>> costEstimates = new TreeSet<>(new CostCompartor());
    Preconditions.checkNotNull(costEstimator);
    for (int i = 0; i < node.jjtGetNumChildren(); i++) {
        JexlNode child = node.jjtGetChild(i);
        costEstimates.add(new Tuple2<>(child, costEstimator.computeCostForSubtree(child)));
    }
    JexlNode newAnd = new ASTAndNode(ParserTreeConstants.JJTANDNODE);
    newAnd.jjtSetParent(node.jjtGetParent());
    Iterator<Tuple2<JexlNode, Cost>> tupleIter = costEstimates.iterator();
    if (!tupleIter.hasNext())
        return node;
    JexlNode child = tupleIter.next().first();
    child.jjtSetParent(newAnd);
    newAnd.jjtAddChild(child, 0);
    int i = 1;
    while (tupleIter.hasNext()) {
        child = ASTDelayedPredicate.create(tupleIter.next().first());
        newAnd.jjtAddChild(child, i);
        child.jjtSetParent(newAnd);
        i++;
    }
    return newAnd;
}
Also used : Tuple2(datawave.query.util.Tuple2) TreeSet(java.util.TreeSet) JexlNode(org.apache.commons.jexl2.parser.JexlNode) ASTAndNode(org.apache.commons.jexl2.parser.ASTAndNode)

Example 5 with Tuple2

use of datawave.query.util.Tuple2 in project datawave by NationalSecurityAgency.

the class UnionTest method buildFullScannerStream.

// Build a ScannerStream specifically for testing the ability to seek through the stream.
private ScannerStream buildFullScannerStream(SortedSet<String> shards, String field, String value) {
    JexlNode node = JexlNodeFactory.buildEQNode(field, value);
    List<Tuple2<String, IndexInfo>> elements = new ArrayList<>();
    for (String shard : shards) {
        IndexInfo info = new IndexInfo(-1);
        info.applyNode(node);
        elements.add(new Tuple2<>(shard, info));
    }
    return ScannerStream.variable(elements.iterator(), node);
}
Also used : Tuple2(datawave.query.util.Tuple2) ArrayList(java.util.ArrayList) JexlNode(org.apache.commons.jexl2.parser.JexlNode)

Aggregations

Tuple2 (datawave.query.util.Tuple2)22 JexlNode (org.apache.commons.jexl2.parser.JexlNode)7 ArrayList (java.util.ArrayList)6 Entry (java.util.Map.Entry)3 Set (java.util.Set)3 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)3 Test (org.junit.Test)3 HashMultimap (com.google.common.collect.HashMultimap)2 LcNoDiacriticsType (datawave.data.type.LcNoDiacriticsType)2 NoOpType (datawave.data.type.NoOpType)2 NumberType (datawave.data.type.NumberType)2 Type (datawave.data.type.Type)2 DatawaveFatalQueryException (datawave.query.exceptions.DatawaveFatalQueryException)2 EntryParser (datawave.query.index.lookup.EntryParser)2 IndexInfo (datawave.query.index.lookup.IndexInfo)2 ScannerStream (datawave.query.index.lookup.ScannerStream)2 Tuple3 (datawave.query.util.Tuple3)2 QueryException (datawave.webservice.query.exception.QueryException)2 ByteSequence (org.apache.accumulo.core.data.ByteSequence)2 Key (org.apache.accumulo.core.data.Key)2