use of datawave.query.util.Tuple2 in project datawave by NationalSecurityAgency.
the class AncestorUidIntersector method intersect.
@Override
public Set<IndexMatch> intersect(Set<IndexMatch> uids1, Set<IndexMatch> uids2, List<JexlNode> delayedNodes) {
/*
* C) Both are small, so we have an easy case where we can prune much of this sub query. Must propagate delayed nodes, though.
*/
// create a map of correlated UIDS mapped to the root uid. The values keep the two lists of uids separate
Map<String, Tuple2<ArrayList<IndexMatch>, ArrayList<IndexMatch>>> correlatedUids = new HashMap<>();
// put the first set of uids in the correlated list
for (IndexMatch match1 : uids1) {
String baseUid = TLD.parseRootPointerFromId(match1.getUid());
Tuple2<ArrayList<IndexMatch>, ArrayList<IndexMatch>> indexMatchLists = correlatedUids.get(baseUid);
if (indexMatchLists == null) {
indexMatchLists = new Tuple2<>(new ArrayList<>(), new ArrayList<>());
correlatedUids.put(baseUid, indexMatchLists);
}
indexMatchLists.first().add(match1);
}
// put the second set of uids in the correlated list
for (IndexMatch match2 : uids2) {
String baseUid = TLD.parseRootPointerFromId(match2.getUid());
Tuple2<ArrayList<IndexMatch>, ArrayList<IndexMatch>> indexMatchLists = correlatedUids.get(baseUid);
if (indexMatchLists == null) {
indexMatchLists = new Tuple2<>(new ArrayList<>(), new ArrayList<>());
correlatedUids.put(baseUid, indexMatchLists);
}
indexMatchLists.second().add(match2);
}
// now for each base uid, if we have uids in the two lists then remap them to the descendent furthest from the root
Set<IndexMatch> matches = new HashSet<>();
for (Tuple2<ArrayList<IndexMatch>, ArrayList<IndexMatch>> indexMatchLists : correlatedUids.values()) {
if (!indexMatchLists.first().isEmpty() && !indexMatchLists.second().isEmpty()) {
for (IndexMatch uid1 : indexMatchLists.first()) {
for (IndexMatch uid2 : indexMatchLists.second()) {
// if uid1 starts with uid2, then uid1 is a descendent of uid2
if (uid1.getUid().startsWith(uid2.getUid() + UIDConstants.DEFAULT_SEPARATOR) || uid1.getUid().equals(uid2.getUid())) {
JexlNodeSet nodeSet = new JexlNodeSet();
nodeSet.add(uid1.getNode());
nodeSet.add(uid2.getNode());
nodeSet.addAll(delayedNodes);
IndexMatch currentMatch = new IndexMatch(Sets.newHashSet(nodeSet.getNodes()), uid1.getUid(), IndexMatchType.AND);
matches = reduce(matches, currentMatch);
} else // if uid2 starts with uid1, then uid2 is a descendent of uid1
if (uid2.getUid().startsWith(uid1.getUid() + UIDConstants.DEFAULT_SEPARATOR)) {
JexlNodeSet nodeSet = new JexlNodeSet();
nodeSet.add(uid1.getNode());
nodeSet.add(uid2.getNode());
nodeSet.addAll(delayedNodes);
IndexMatch currentMatch = new IndexMatch(Sets.newHashSet(nodeSet), uid2.getUid(), IndexMatchType.AND);
matches = reduce(matches, currentMatch);
}
}
}
}
}
return matches;
}
use of datawave.query.util.Tuple2 in project datawave by NationalSecurityAgency.
the class DefaultQueryPlanner method getQueryRanges.
/**
* Returns a Tuple2<Iterable<Range>,Boolean> whose elements represent the Ranges to use for querying the shard table and whether or not this is
* a "full-table-scan" query.
*
* @param scannerFactory
* @param metadataHelper
* @param config
* @param queryTree
* @return
* @throws DatawaveQueryException
*/
public Tuple2<CloseableIterable<QueryPlan>, Boolean> getQueryRanges(ScannerFactory scannerFactory, MetadataHelper metadataHelper, ShardQueryConfiguration config, JexlNode queryTree) throws DatawaveQueryException {
Preconditions.checkNotNull(queryTree);
boolean needsFullTable = false;
CloseableIterable<QueryPlan> ranges = null;
// if the query has already been reduced to false there is no reason to do more
if (QueryPruningVisitor.getState(queryTree) == QueryPruningVisitor.TruthState.FALSE) {
return new Tuple2<>(emptyCloseableIterator(), false);
}
// if we still have an unexecutable tree, then a full table scan is
// required
List<String> debugOutput = null;
if (log.isDebugEnabled()) {
debugOutput = new ArrayList<>(32);
}
STATE state = ExecutableDeterminationVisitor.getState(queryTree, config, metadataHelper, debugOutput);
if (log.isDebugEnabled()) {
logDebug(debugOutput, "ExecutableDeterminationVisitor at getQueryRanges:");
}
if (state != STATE.EXECUTABLE) {
if (state == STATE.ERROR) {
log.warn("After expanding the query, it is determined that the query cannot be executed due to index-only fields mixed with expressions that cannot be run against the index.");
BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_MIXED_INVALID_EXPRESSIONS);
throw new InvalidQueryException(qe);
}
log.warn("After expanding the query, it is determined that the query cannot be executed against the field index and a full table scan is required");
needsFullTable = true;
}
// scan, then lets try to compute ranges
if (!needsFullTable) {
// count the terms
int termCount = TermCountingVisitor.countTerms(queryTree);
if (termCount >= pushdownThreshold) {
if (log.isTraceEnabled()) {
log.trace("pushing down query because it has " + termCount + " when our max is " + pushdownThreshold);
}
config.setCollapseUids(true);
}
TraceStopwatch stopwatch = config.getTimers().newStartedStopwatch("DefaultQueryPlanner - Begin stream of ranges from inverted index");
RangeStream stream = initializeRangeStream(config, scannerFactory, metadataHelper);
ranges = stream.streamPlans(queryTree);
if (log.isTraceEnabled()) {
log.trace("query stream is " + stream.context());
}
// if a term threshold is exceeded and we cannot handle that, then
// throw unsupported
boolean thresholdExceeded = StreamContext.EXCEEDED_TERM_THRESHOLD.equals(stream.context());
if (thresholdExceeded && !config.canHandleExceededTermThreshold()) {
throw new UnsupportedOperationException(EXCEED_TERM_EXPANSION_ERROR);
}
if (StreamContext.UNINDEXED.equals(stream.context())) {
log.debug("Needs full table scan because of unindexed fields");
needsFullTable = true;
} else if (StreamContext.DELAYED_FIELD.equals(stream.context())) {
log.debug("Needs full table scan because query consists of only delayed expressions");
needsFullTable = true;
} else // force a full table scan
if (IvaratorRequiredVisitor.isIvaratorRequired(queryTree) && !config.canHandleExceededValueThreshold()) {
log.debug("Needs full table scan because we exceeded the value threshold and config.canHandleExceededValueThreshold() is false");
needsFullTable = true;
}
stopwatch.stop();
}
if (needsFullTable) {
if (config.getFullTableScanEnabled()) {
ranges = this.getFullScanRange(config, queryTree);
} else {
if (log.isTraceEnabled())
log.trace("Full table scans are not enabled, query will not be run");
QueryException qe = new QueryException(DatawaveErrorCode.FULL_TABLE_SCAN_REQUIRED_BUT_DISABLED);
throw new FullTableScansDisallowedException(qe);
}
if (log.isTraceEnabled())
log.trace("Ranges are " + ranges);
}
return new Tuple2<>(ranges, needsFullTable);
}
use of datawave.query.util.Tuple2 in project datawave by NationalSecurityAgency.
the class FacetedQueryPlanner method getQueryRanges.
@Override
public Tuple2<CloseableIterable<QueryPlan>, Boolean> getQueryRanges(ScannerFactory scannerFactory, MetadataHelper metadataHelper, ShardQueryConfiguration config, JexlNode queryTree) throws DatawaveQueryException {
if (usePrecomputedFacets) {
config.setBypassExecutabilityCheck();
FacetQueryPlanVisitor visitor = new FacetQueryPlanVisitor(config, facetedConfig, metadataHelper, facetedConfig.getFacetedFields());
queryTree.jjtAccept(visitor, null);
return new Tuple2<>(visitor, false);
} else {
return new Tuple2<>(this.getFullScanRange(config, queryTree), false);
}
}
use of datawave.query.util.Tuple2 in project datawave by NationalSecurityAgency.
the class DelayedPredicatePushDown method visit.
@Override
public Object visit(ASTAndNode node, Object data) {
// we are a top level And
SortedSet<Tuple2<JexlNode, Cost>> costEstimates = new TreeSet<>(new CostCompartor());
Preconditions.checkNotNull(costEstimator);
for (int i = 0; i < node.jjtGetNumChildren(); i++) {
JexlNode child = node.jjtGetChild(i);
costEstimates.add(new Tuple2<>(child, costEstimator.computeCostForSubtree(child)));
}
JexlNode newAnd = new ASTAndNode(ParserTreeConstants.JJTANDNODE);
newAnd.jjtSetParent(node.jjtGetParent());
Iterator<Tuple2<JexlNode, Cost>> tupleIter = costEstimates.iterator();
if (!tupleIter.hasNext())
return node;
JexlNode child = tupleIter.next().first();
child.jjtSetParent(newAnd);
newAnd.jjtAddChild(child, 0);
int i = 1;
while (tupleIter.hasNext()) {
child = ASTDelayedPredicate.create(tupleIter.next().first());
newAnd.jjtAddChild(child, i);
child.jjtSetParent(newAnd);
i++;
}
return newAnd;
}
use of datawave.query.util.Tuple2 in project datawave by NationalSecurityAgency.
the class UnionTest method buildFullScannerStream.
// Build a ScannerStream specifically for testing the ability to seek through the stream.
private ScannerStream buildFullScannerStream(SortedSet<String> shards, String field, String value) {
JexlNode node = JexlNodeFactory.buildEQNode(field, value);
List<Tuple2<String, IndexInfo>> elements = new ArrayList<>();
for (String shard : shards) {
IndexInfo info = new IndexInfo(-1);
info.applyNode(node);
elements.add(new Tuple2<>(shard, info));
}
return ScannerStream.variable(elements.iterator(), node);
}
Aggregations