use of datawave.query.exceptions.DatawaveQueryException in project datawave by NationalSecurityAgency.
the class DefaultQueryPlanner method getQueryRanges.
/**
* Returns a Tuple2<Iterable<Range>,Boolean> whose elements represent the Ranges to use for querying the shard table and whether or not this is
* a "full-table-scan" query.
*
* @param scannerFactory
* @param metadataHelper
* @param config
* @param queryTree
* @return
* @throws DatawaveQueryException
*/
public Tuple2<CloseableIterable<QueryPlan>, Boolean> getQueryRanges(ScannerFactory scannerFactory, MetadataHelper metadataHelper, ShardQueryConfiguration config, JexlNode queryTree) throws DatawaveQueryException {
Preconditions.checkNotNull(queryTree);
boolean needsFullTable = false;
CloseableIterable<QueryPlan> ranges = null;
// if the query has already been reduced to false there is no reason to do more
if (QueryPruningVisitor.getState(queryTree) == QueryPruningVisitor.TruthState.FALSE) {
return new Tuple2<>(emptyCloseableIterator(), false);
}
// if we still have an unexecutable tree, then a full table scan is
// required
List<String> debugOutput = null;
if (log.isDebugEnabled()) {
debugOutput = new ArrayList<>(32);
}
STATE state = ExecutableDeterminationVisitor.getState(queryTree, config, metadataHelper, debugOutput);
if (log.isDebugEnabled()) {
logDebug(debugOutput, "ExecutableDeterminationVisitor at getQueryRanges:");
}
if (state != STATE.EXECUTABLE) {
if (state == STATE.ERROR) {
log.warn("After expanding the query, it is determined that the query cannot be executed due to index-only fields mixed with expressions that cannot be run against the index.");
BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_MIXED_INVALID_EXPRESSIONS);
throw new InvalidQueryException(qe);
}
log.warn("After expanding the query, it is determined that the query cannot be executed against the field index and a full table scan is required");
needsFullTable = true;
}
// scan, then lets try to compute ranges
if (!needsFullTable) {
// count the terms
int termCount = TermCountingVisitor.countTerms(queryTree);
if (termCount >= pushdownThreshold) {
if (log.isTraceEnabled()) {
log.trace("pushing down query because it has " + termCount + " when our max is " + pushdownThreshold);
}
config.setCollapseUids(true);
}
TraceStopwatch stopwatch = config.getTimers().newStartedStopwatch("DefaultQueryPlanner - Begin stream of ranges from inverted index");
RangeStream stream = initializeRangeStream(config, scannerFactory, metadataHelper);
ranges = stream.streamPlans(queryTree);
if (log.isTraceEnabled()) {
log.trace("query stream is " + stream.context());
}
// if a term threshold is exceeded and we cannot handle that, then
// throw unsupported
boolean thresholdExceeded = StreamContext.EXCEEDED_TERM_THRESHOLD.equals(stream.context());
if (thresholdExceeded && !config.canHandleExceededTermThreshold()) {
throw new UnsupportedOperationException(EXCEED_TERM_EXPANSION_ERROR);
}
if (StreamContext.UNINDEXED.equals(stream.context())) {
log.debug("Needs full table scan because of unindexed fields");
needsFullTable = true;
} else if (StreamContext.DELAYED_FIELD.equals(stream.context())) {
log.debug("Needs full table scan because query consists of only delayed expressions");
needsFullTable = true;
} else // force a full table scan
if (IvaratorRequiredVisitor.isIvaratorRequired(queryTree) && !config.canHandleExceededValueThreshold()) {
log.debug("Needs full table scan because we exceeded the value threshold and config.canHandleExceededValueThreshold() is false");
needsFullTable = true;
}
stopwatch.stop();
}
if (needsFullTable) {
if (config.getFullTableScanEnabled()) {
ranges = this.getFullScanRange(config, queryTree);
} else {
if (log.isTraceEnabled())
log.trace("Full table scans are not enabled, query will not be run");
QueryException qe = new QueryException(DatawaveErrorCode.FULL_TABLE_SCAN_REQUIRED_BUT_DISABLED);
throw new FullTableScansDisallowedException(qe);
}
if (log.isTraceEnabled())
log.trace("Ranges are " + ranges);
}
return new Tuple2<>(ranges, needsFullTable);
}
use of datawave.query.exceptions.DatawaveQueryException in project datawave by NationalSecurityAgency.
the class ShardRangeStream method streamPlans.
@Override
public CloseableIterable<QueryPlan> streamPlans(JexlNode node) {
try {
String queryString = JexlStringBuildingVisitor.buildQuery(node);
int stackStart = config.getBaseIteratorPriority() + 40;
BatchScanner scanner = scanners.newScanner(config.getShardTableName(), config.getAuthorizations(), config.getNumQueryThreads(), config.getQuery(), true);
IteratorSetting cfg = new IteratorSetting(stackStart++, "query", FieldIndexOnlyQueryIterator.class);
DefaultQueryPlanner.addOption(cfg, QueryOptions.QUERY_ID, config.getQuery().getId().toString(), false);
DefaultQueryPlanner.addOption(cfg, QueryOptions.QUERY, queryString, false);
try {
DefaultQueryPlanner.addOption(cfg, QueryOptions.INDEX_ONLY_FIELDS, QueryOptions.buildFieldStringFromSet(metadataHelper.getIndexOnlyFields(config.getDatatypeFilter())), true);
DefaultQueryPlanner.addOption(cfg, QueryOptions.INDEXED_FIELDS, QueryOptions.buildFieldStringFromSet(metadataHelper.getIndexedFields(config.getDatatypeFilter())), true);
} catch (TableNotFoundException e) {
throw new RuntimeException(e);
}
DefaultQueryPlanner.addOption(cfg, QueryOptions.START_TIME, Long.toString(config.getBeginDate().getTime()), false);
DefaultQueryPlanner.addOption(cfg, QueryOptions.DATATYPE_FILTER, config.getDatatypeFilterAsString(), false);
DefaultQueryPlanner.addOption(cfg, QueryOptions.END_TIME, Long.toString(config.getEndDate().getTime()), false);
DefaultQueryPlanner.configureTypeMappings(config, cfg, metadataHelper, true);
scanner.setRanges(Collections.singleton(rangeForTerm(null, null, config)));
scanner.addScanIterator(cfg);
Iterator<Entry<Key, Value>> kvIter = scanner.iterator();
itr = Collections.emptyIterator();
if (kvIter.hasNext()) {
PeekingIterator<Entry<Key, Value>> peeking = new PeekingIterator<>(kvIter);
Entry<Key, Value> peekKey = peeking.peek();
ErrorKey errorKey = ErrorKey.getErrorKey(peekKey.getKey());
if (errorKey != null) {
switch(errorKey.getErrorType()) {
case UNINDEXED_FIELD:
this.context = StreamContext.UNINDEXED;
break;
case UNKNOWN:
this.context = StreamContext.ABSENT;
}
} else {
itr = Iterators.transform(peeking, new FieldIndexParser(node));
this.context = StreamContext.PRESENT;
}
} else {
this.context = StreamContext.ABSENT;
}
} catch (TableNotFoundException | DatawaveQueryException e) {
throw new RuntimeException(e);
} finally {
// shut down the executor as all threads have completed
shutdownThreads();
}
return this;
}
use of datawave.query.exceptions.DatawaveQueryException in project datawave by NationalSecurityAgency.
the class DefaultQueryPlanner method capDateRange.
/**
* If configured, cap the start of the date range. If configured, throw an exception if the start AND end dates are outside the valid date range.
*
* @param config
*/
protected void capDateRange(ShardQueryConfiguration config) throws DatawaveQueryException {
if (config.getBeginDateCap() > 0) {
long minStartTime = System.currentTimeMillis() - config.getBeginDateCap();
if (config.getBeginDate().getTime() < minStartTime) {
if (config.isFailOutsideValidDateRange() && config.getEndDate().getTime() < minStartTime) {
throw new DatawaveQueryException("This requested date range is outside of range of data on this system");
} else {
config.setBeginDate(new Date(minStartTime));
log.info("Resetting begin date to the beginDateCap: " + config.getBeginDate());
if (config.getEndDate().getTime() < minStartTime) {
// setting the end date to the same as the begin date will result in no ranges being created (@see
// GenericQueryConfiguration.canRunQuery())
config.setEndDate(new Date(minStartTime - 1));
log.info("Resetting end date to the beginDateCap: " + config.getEndDate());
}
}
}
}
}
use of datawave.query.exceptions.DatawaveQueryException in project datawave by NationalSecurityAgency.
the class DefaultQueryPlanner method configureTypeMappings.
public static void configureTypeMappings(ShardQueryConfiguration config, IteratorSetting cfg, MetadataHelper metadataHelper, boolean compressMappings) throws DatawaveQueryException {
try {
addOption(cfg, QueryOptions.QUERY_MAPPING_COMPRESS, Boolean.valueOf(compressMappings).toString(), false);
// now lets filter the query field datatypes to those that are not
// indexed
Multimap<String, Type<?>> nonIndexedQueryFieldsDatatypes = HashMultimap.create(config.getQueryFieldsDatatypes());
nonIndexedQueryFieldsDatatypes.keySet().removeAll(config.getIndexedFields());
String nonIndexedTypes = QueryOptions.buildFieldNormalizerString(nonIndexedQueryFieldsDatatypes);
String typeMetadataString = metadataHelper.getTypeMetadata(config.getDatatypeFilter()).toString();
String requiredAuthsString = metadataHelper.getUsersMetadataAuthorizationSubset();
if (compressMappings) {
nonIndexedTypes = QueryOptions.compressOption(nonIndexedTypes, QueryOptions.UTF8);
typeMetadataString = QueryOptions.compressOption(typeMetadataString, QueryOptions.UTF8);
requiredAuthsString = QueryOptions.compressOption(requiredAuthsString, QueryOptions.UTF8);
}
addOption(cfg, QueryOptions.NON_INDEXED_DATATYPES, nonIndexedTypes, false);
addOption(cfg, QueryOptions.TYPE_METADATA, typeMetadataString, false);
addOption(cfg, QueryOptions.TYPE_METADATA_AUTHS, requiredAuthsString, false);
addOption(cfg, QueryOptions.METADATA_TABLE_NAME, config.getMetadataTableName(), false);
} catch (TableNotFoundException | IOException e) {
QueryException qe = new QueryException(DatawaveErrorCode.TYPE_MAPPING_CONFIG_ERROR, e);
throw new DatawaveQueryException(qe);
}
}
use of datawave.query.exceptions.DatawaveQueryException in project datawave by NationalSecurityAgency.
the class DefaultQueryPlanner method processTree.
protected ASTJexlScript processTree(final ASTJexlScript originalQueryTree, ShardQueryConfiguration config, Query settings, MetadataHelper metadataHelper, ScannerFactory scannerFactory, QueryData queryData, QueryStopwatch timers, QueryModel queryModel) throws DatawaveQueryException {
config.setQueryTree(originalQueryTree);
TraceStopwatch stopwatch = null;
if (!disableWhindexFieldMappings) {
// apply the value-specific field mappings for GeoWave functions
config.setQueryTree(timedApplyWhindexFieldMappings(timers, config.getQueryTree(), config, metadataHelper, settings));
}
if (!disableExpandIndexFunction) {
// expand the index queries for the functions
config.setQueryTree(timedExpandIndexQueriesForFunctions(timers, config.getQueryTree(), config, metadataHelper));
}
// apply the node transform rules
// running it here before any unfielded expansions to enable potentially pushing down terms before index lookups
config.setQueryTree(timedApplyNodeTransformRules(timers, "Apply Node Transform Rules - Pre Unfielded Expansions", config.getQueryTree(), config, metadataHelper, getTransformRules()));
// left as a regex
if (!disableAnyFieldLookup) {
config.setQueryTree(timedExpandAnyFieldRegexNodes(timers, config.getQueryTree(), config, metadataHelper, scannerFactory, settings.getQuery()));
}
if (reduceQuery) {
config.setQueryTree(timedReduce(timers, "Reduce Query After ANYFIELD Expansions", config.getQueryTree()));
}
if (!disableTestNonExistentFields) {
timedTestForNonExistentFields(timers, config.getQueryTree(), config, metadataHelper, queryModel, settings);
}
// apply the node transform rules
// running it here before any regex or range expansions to enable potentially pushing down terms before index lookups
config.setQueryTree(timedApplyNodeTransformRules(timers, "Apply Node Transform Rules - Pre Regex/Range Expansions", config.getQueryTree(), config, metadataHelper, getTransformRules()));
timedFetchDatatypes(timers, "Fetch Required Datatypes", config.getQueryTree(), config);
config.setQueryTree(timedFixUnindexedNumerics(timers, config.getQueryTree(), config));
config.setQueryTree(timedExpandMultiNormalizedTerms(timers, config.getQueryTree(), config, metadataHelper));
// if we have any index holes, then mark em
if (!config.getIndexHoles().isEmpty()) {
config.setQueryTree(timedMarkIndexHoles(timers, config.getQueryTree(), config, metadataHelper));
}
// lets precompute the indexed fields and index only fields for the specific datatype if needed below
Set<String> indexedFields = null;
Set<String> indexOnlyFields = null;
Set<String> nonEventFields = null;
if (config.getMinSelectivity() > 0 || !disableBoundedLookup) {
try {
indexedFields = metadataHelper.getIndexedFields(config.getDatatypeFilter());
indexOnlyFields = metadataHelper.getIndexOnlyFields(config.getDatatypeFilter());
nonEventFields = metadataHelper.getNonEventFields(config.getDatatypeFilter());
} catch (TableNotFoundException te) {
QueryException qe = new QueryException(DatawaveErrorCode.METADATA_ACCESS_ERROR, te);
throw new DatawaveFatalQueryException(qe);
}
}
// apply the node transform rules
config.setQueryTree(timedApplyNodeTransformRules(timers, "Apply Node Transform Rules - Pre Pushdown/Pullup Expansions", config.getQueryTree(), config, metadataHelper, getTransformRules()));
// push down terms that are over the min selectivity
if (config.getMinSelectivity() > 0) {
config.setQueryTree(timedPushdownLowSelectiveTerms(timers, config.getQueryTree(), config, indexedFields, indexOnlyFields, nonEventFields));
}
config.setQueryTree(timedForceFieldToFieldComparison(timers, config.getQueryTree()));
if (!disableCompositeFields) {
config.setQueryTree(timedExpandCompositeFields(timers, config.getQueryTree(), config));
}
if (!disableBoundedLookup) {
stopwatch = timers.newStartedStopwatch("DefaultQueryPlanner - Expand bounded query ranges (total)");
// Expand any bounded ranges into a conjunction of discrete terms
try {
Map<String, IndexLookup> indexLookupMap = new HashMap<>();
// Check if there is any regex to expand.
NodeTypeCount nodeCount = NodeTypeCountVisitor.countNodes(config.getQueryTree());
if (nodeCount.hasAny(ASTNRNode.class, ASTERNode.class)) {
config.setQueryTree(timedExpandRegex(timers, "Expand Regex", config.getQueryTree(), config, metadataHelper, scannerFactory, indexLookupMap));
}
// Check if there are any bounded ranges to expand.
if (nodeCount.isPresent(BoundedRange.class)) {
config.setQueryTree(timedExpandRanges(timers, "Expand Ranges", config.getQueryTree(), config, metadataHelper, scannerFactory));
}
// NOTE: GeoWavePruningVisitor should run before QueryPruningVisitor. If it runs after, there is a chance
// that GeoWavePruningVisitor will prune all of the remaining indexed terms, which would leave a GeoWave
// function without any indexed terms or ranges, which should evaluate to false. That case won't be handled
// properly if we run GeoWavePruningVisitor after QueryPruningVisitor.
config.setQueryTree(timedPruneGeoWaveTerms(timers, config.getQueryTree(), metadataHelper));
if (reduceQuery) {
config.setQueryTree(timedReduce(timers, "Reduce Query After Range Expansion", config.getQueryTree()));
}
// Check if there are functions that can be pushed into exceeded value ranges.
if (nodeCount.hasAll(ASTFunctionNode.class, ExceededValueThresholdMarkerJexlNode.class)) {
config.setQueryTree(timedPushFunctions(timers, config.getQueryTree(), config, metadataHelper));
}
if (executableExpansion) {
config.setQueryTree(timedExecutableExpansion(timers, config.getQueryTree(), config, metadataHelper));
}
List<String> debugOutput = null;
if (log.isDebugEnabled()) {
debugOutput = new ArrayList<>(32);
}
// the terms to be delayed.
if (!ExecutableDeterminationVisitor.isExecutable(config.getQueryTree(), config, indexedFields, indexOnlyFields, nonEventFields, debugOutput, metadataHelper)) {
// if we now have an unexecutable tree because of delayed
// predicates, then remove delayed predicates as needed and
// reexpand
config.setQueryTree(timedRemoveDelayedPredicates(timers, "Remove Delayed Predicates", config.getQueryTree(), config, metadataHelper, indexedFields, indexOnlyFields, nonEventFields, indexLookupMap, scannerFactory, metadataHelper, debugOutput));
}
// if we now have an unexecutable tree because of missing
// delayed predicates, then add delayed predicates where
// possible
config.setQueryTree(timedAddDelayedPredicates(timers, "Add Delayed Predicates", config.getQueryTree(), config, metadataHelper, indexedFields, indexOnlyFields, nonEventFields, debugOutput));
} catch (TableNotFoundException e) {
stopwatch.stop();
QueryException qe = new QueryException(DatawaveErrorCode.METADATA_ACCESS_ERROR, e);
throw new DatawaveFatalQueryException(qe);
} catch (CannotExpandUnfieldedTermFatalException e) {
if (null != e.getCause() && e.getCause() instanceof DoNotPerformOptimizedQueryException)
throw (DoNotPerformOptimizedQueryException) e.getCause();
QueryException qe = new QueryException(DatawaveErrorCode.INDETERMINATE_INDEX_STATUS, e);
throw new DatawaveFatalQueryException(qe);
}
stopwatch.stop();
} else {
if (log.isDebugEnabled()) {
log.debug("Bounded range and regex conversion has been disabled");
}
}
return config.getQueryTree();
}
Aggregations