use of datawave.query.exceptions.InvalidQueryException in project datawave by NationalSecurityAgency.
the class VisitorFunction method apply.
@Override
@Nullable
public ScannerChunk apply(@Nullable ScannerChunk input) {
SessionOptions options = input.getOptions();
ScannerChunk newSettings = new ScannerChunk(null, input.getRanges(), input.getLastKnownLocation());
SessionOptions newOptions = new SessionOptions(options);
for (IteratorSetting setting : options.getIterators()) {
final String query = setting.getOptions().get(QueryOptions.QUERY);
if (null != query) {
IteratorSetting newIteratorSetting = new IteratorSetting(setting.getPriority(), setting.getName(), setting.getIteratorClass());
newIteratorSetting.addOptions(setting.getOptions());
try {
ASTJexlScript script = null;
boolean evaluatedPreviously = previouslyExecutable(query);
boolean madeChange = false;
if (!evaluatedPreviously && config.isCleanupShardsAndDaysQueryHints()) {
script = JexlASTHelper.parseAndFlattenJexlQuery(query);
script = DateIndexCleanupVisitor.cleanup(script);
madeChange = true;
}
String newQuery = evaluatedPreviously ? previouslyExpanded.get(query) : query;
List<String> debug = null;
if (log.isTraceEnabled())
debug = Lists.newArrayList();
if (!config.isDisableWhindexFieldMappings() && !evaluatedPreviously) {
if (null == script)
script = JexlASTHelper.parseAndFlattenJexlQuery(query);
// apply the whindex using the shard date
ASTJexlScript rebuiltScript = WhindexVisitor.apply(script, config, getEarliestBeginDate(newSettings.getRanges()), metadataHelper);
// if the query changed, save it, and mark it as such
if (!TreeEqualityVisitor.isEqual(script, rebuiltScript)) {
log.debug("[" + config.getQuery().getId() + "] The WhindexVisitor updated the query: " + JexlStringBuildingVisitor.buildQuery(script));
script = rebuiltScript;
madeChange = true;
}
}
if (!config.isBypassExecutabilityCheck() || !evaluatedPreviously) {
if (null == script)
script = JexlASTHelper.parseAndFlattenJexlQuery(query);
if (!ExecutableDeterminationVisitor.isExecutable(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, this.metadataHelper)) {
if (log.isTraceEnabled()) {
log.trace("Need to pull up non-executable query: " + JexlStringBuildingVisitor.buildQuery(script));
for (String debugStatement : debug) {
log.trace(debugStatement);
}
DefaultQueryPlanner.logQuery(script, "Failing query:");
}
script = (ASTJexlScript) PullupUnexecutableNodesVisitor.pullupDelayedPredicates(script, true, config, indexedFields, indexOnlyFields, nonEventFields, metadataHelper);
madeChange = true;
STATE state = ExecutableDeterminationVisitor.getState(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, metadataHelper);
/**
* We could achieve better performance if we live with the small number of queries that error due to the full table scan exception.
*
* Either look at improving PushdownUnexecutableNodesVisitor or avoid the process altogether.
*/
if (state != STATE.EXECUTABLE) {
if (log.isTraceEnabled()) {
log.trace("Need to push down non-executable query: " + JexlStringBuildingVisitor.buildQuery(script));
for (String debugStatement : debug) {
log.trace(debugStatement);
}
}
script = (ASTJexlScript) PushdownUnexecutableNodesVisitor.pushdownPredicates(script, true, config, indexedFields, indexOnlyFields, nonEventFields, metadataHelper);
}
state = ExecutableDeterminationVisitor.getState(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, metadataHelper);
if (state != STATE.EXECUTABLE) {
if (state == STATE.ERROR) {
log.warn("After expanding the query, it is determined that the query cannot be executed due to index-only fields mixed with expressions that cannot be run against the index.");
BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_MIXED_INVALID_EXPRESSIONS);
throw new InvalidQueryException(qe);
}
log.warn("After expanding the query, it is determined that the query cannot be executed against the field index and a full table scan is required");
if (!config.getFullTableScanEnabled()) {
if (log.isTraceEnabled()) {
log.trace("Full Table fail of " + JexlStringBuildingVisitor.buildQuery(script));
for (String debugStatement : debug) {
log.trace(debugStatement);
}
DefaultQueryPlanner.logQuery(script, "Failing query:");
}
PreConditionFailedQueryException qe = new PreConditionFailedQueryException(DatawaveErrorCode.FULL_TABLE_SCAN_REQUIRED_BUT_DISABLED);
throw new DatawaveFatalQueryException(qe);
}
}
if (log.isTraceEnabled()) {
for (String debugStatement : debug) {
log.trace(debugStatement);
}
DefaultQueryPlanner.logQuery(script, "Query pushing down large fielded lists:");
}
}
}
if (config.getSerializeQueryIterator()) {
serializeQuery(newIteratorSetting);
} else {
if (!evaluatedPreviously) {
// if we have an hdfs configuration, then we can pushdown large fielded lists to an ivarator
if (config.getHdfsSiteConfigURLs() != null && setting.getOptions().get(QueryOptions.BATCHED_QUERY) == null) {
if (null == script)
script = JexlASTHelper.parseAndFlattenJexlQuery(query);
try {
script = pushdownLargeFieldedLists(config, script);
madeChange = true;
} catch (IOException ioe) {
log.error("Unable to pushdown large fielded lists....leaving in expanded form", ioe);
}
}
}
}
// only recompile the script if changes were made to the query
if (madeChange)
newQuery = JexlStringBuildingVisitor.buildQuery(script);
try {
previouslyExpanded.put(query, newQuery);
} catch (NullPointerException npe) {
throw new DatawaveFatalQueryException(String.format("New query is null! madeChange: %b, qid: %s", madeChange, setting.getOptions().get(QueryOptions.QUERY_ID)), npe);
}
// test the final script for thresholds
DefaultQueryPlanner.validateQuerySize("VisitorFunction", script, config, false);
newIteratorSetting.addOption(QueryOptions.QUERY, newQuery);
newOptions.removeScanIterator(setting.getName());
newOptions.addScanIterator(newIteratorSetting);
if (log.isDebugEnabled()) {
log.debug("VisitorFunction result: " + newSettings.getRanges());
}
if (log.isTraceEnabled()) {
DefaultQueryPlanner.logTrace(PrintingVisitor.formattedQueryStringList(script), "VistorFunction::apply method");
} else if (log.isDebugEnabled()) {
DefaultQueryPlanner.logDebug(PrintingVisitor.formattedQueryStringList(script, DefaultQueryPlanner.maxChildNodesToPrint), "VistorFunction::apply method");
}
} catch (ParseException e) {
throw new DatawaveFatalQueryException(e);
}
}
}
newSettings.setOptions(newOptions);
return newSettings;
}
use of datawave.query.exceptions.InvalidQueryException in project datawave by NationalSecurityAgency.
the class DefaultQueryPlanner method timedTestForNonExistentFields.
protected void timedTestForNonExistentFields(QueryStopwatch timers, final ASTJexlScript script, ShardQueryConfiguration config, MetadataHelper metadataHelper, QueryModel queryModel, Query settings) throws DatawaveQueryException {
TraceStopwatch stopwatch = timers.newStartedStopwatch("DefaultQueryPlanner - Test for Non-Existent Fields");
// Verify that the query does not contain fields we've never seen
// before
Set<String> specialFields = Sets.newHashSet(QueryOptions.DEFAULT_DATATYPE_FIELDNAME, Constants.ANY_FIELD, Constants.NO_FIELD);
specialFields.addAll(config.getEvaluationOnlyFields());
Set<String> nonexistentFields = FieldMissingFromSchemaVisitor.getNonExistentFields(metadataHelper, script, config.getDatatypeFilter(), specialFields);
if (log.isDebugEnabled()) {
log.debug("Testing for non-existent fields, found: " + nonexistentFields.size());
}
// ensure that all of the fields actually exist in the data dictionary
Set<String> allFields = null;
try {
allFields = metadataHelper.getAllFields(config.getDatatypeFilter());
} catch (TableNotFoundException e) {
throw new DatawaveQueryException("Unable get get data dictionary", e);
}
// Fields in the data dictionary is always uppercase. Convert the unique fields to uppercase
// so the comparisons are case insensitive
List<String> fields = config.getUniqueFields().getFields().stream().map(String::toUpperCase).collect(Collectors.toList());
// for the unique fields we need to also look for any model aliases (forward or reverse) and fields generated post evaluation (e.g. HIT_TERM)
// this is because unique fields operate on the fields as returned to the user. We essentially leave all variants of the fields
// in the unique field list to ensure we catch everything
Set<String> uniqueFields = new HashSet<>(allFields);
if (queryModel != null) {
uniqueFields.addAll(queryModel.getForwardQueryMapping().keySet());
uniqueFields.addAll(queryModel.getReverseQueryMapping().values());
}
uniqueFields.add(JexlEvaluation.HIT_TERM_FIELD);
if (!uniqueFields.containsAll(fields)) {
Set<String> missingFields = Sets.newHashSet(config.getUniqueFields().getFields());
missingFields.removeAll(uniqueFields);
nonexistentFields.addAll(missingFields);
}
if (!nonexistentFields.isEmpty()) {
String datatypeFilterSet = (null == config.getDatatypeFilter()) ? "none" : config.getDatatypeFilter().toString();
if (log.isTraceEnabled()) {
try {
log.trace("current size of fields" + metadataHelper.getAllFields(config.getDatatypeFilter()));
log.trace("all fields: " + metadataHelper.getAllFields(config.getDatatypeFilter()));
} catch (TableNotFoundException e) {
log.error("table not found when reading metadata", e);
}
log.trace("QueryModel:" + (null == queryModel ? "null" : queryModel));
log.trace("metadataHelper " + metadataHelper);
}
log.trace("QueryModel:" + (null == queryModel ? "null" : queryModel));
log.trace("metadataHelper " + metadataHelper);
BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.FIELDS_NOT_IN_DATA_DICTIONARY, MessageFormat.format("Datatype Filter: {0}, Missing Fields: {1}, Auths: {2}", datatypeFilterSet, nonexistentFields, settings.getQueryAuthorizations()));
log.error(qe);
throw new InvalidQueryException(qe);
}
stopwatch.stop();
}
use of datawave.query.exceptions.InvalidQueryException in project datawave by NationalSecurityAgency.
the class DefaultQueryPlanner method getQueryRanges.
/**
* Returns a Tuple2<Iterable<Range>,Boolean> whose elements represent the Ranges to use for querying the shard table and whether or not this is
* a "full-table-scan" query.
*
* @param scannerFactory
* @param metadataHelper
* @param config
* @param queryTree
* @return
* @throws DatawaveQueryException
*/
public Tuple2<CloseableIterable<QueryPlan>, Boolean> getQueryRanges(ScannerFactory scannerFactory, MetadataHelper metadataHelper, ShardQueryConfiguration config, JexlNode queryTree) throws DatawaveQueryException {
Preconditions.checkNotNull(queryTree);
boolean needsFullTable = false;
CloseableIterable<QueryPlan> ranges = null;
// if the query has already been reduced to false there is no reason to do more
if (QueryPruningVisitor.getState(queryTree) == QueryPruningVisitor.TruthState.FALSE) {
return new Tuple2<>(emptyCloseableIterator(), false);
}
// if we still have an unexecutable tree, then a full table scan is
// required
List<String> debugOutput = null;
if (log.isDebugEnabled()) {
debugOutput = new ArrayList<>(32);
}
STATE state = ExecutableDeterminationVisitor.getState(queryTree, config, metadataHelper, debugOutput);
if (log.isDebugEnabled()) {
logDebug(debugOutput, "ExecutableDeterminationVisitor at getQueryRanges:");
}
if (state != STATE.EXECUTABLE) {
if (state == STATE.ERROR) {
log.warn("After expanding the query, it is determined that the query cannot be executed due to index-only fields mixed with expressions that cannot be run against the index.");
BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_MIXED_INVALID_EXPRESSIONS);
throw new InvalidQueryException(qe);
}
log.warn("After expanding the query, it is determined that the query cannot be executed against the field index and a full table scan is required");
needsFullTable = true;
}
// scan, then lets try to compute ranges
if (!needsFullTable) {
// count the terms
int termCount = TermCountingVisitor.countTerms(queryTree);
if (termCount >= pushdownThreshold) {
if (log.isTraceEnabled()) {
log.trace("pushing down query because it has " + termCount + " when our max is " + pushdownThreshold);
}
config.setCollapseUids(true);
}
TraceStopwatch stopwatch = config.getTimers().newStartedStopwatch("DefaultQueryPlanner - Begin stream of ranges from inverted index");
RangeStream stream = initializeRangeStream(config, scannerFactory, metadataHelper);
ranges = stream.streamPlans(queryTree);
if (log.isTraceEnabled()) {
log.trace("query stream is " + stream.context());
}
// if a term threshold is exceeded and we cannot handle that, then
// throw unsupported
boolean thresholdExceeded = StreamContext.EXCEEDED_TERM_THRESHOLD.equals(stream.context());
if (thresholdExceeded && !config.canHandleExceededTermThreshold()) {
throw new UnsupportedOperationException(EXCEED_TERM_EXPANSION_ERROR);
}
if (StreamContext.UNINDEXED.equals(stream.context())) {
log.debug("Needs full table scan because of unindexed fields");
needsFullTable = true;
} else if (StreamContext.DELAYED_FIELD.equals(stream.context())) {
log.debug("Needs full table scan because query consists of only delayed expressions");
needsFullTable = true;
} else // force a full table scan
if (IvaratorRequiredVisitor.isIvaratorRequired(queryTree) && !config.canHandleExceededValueThreshold()) {
log.debug("Needs full table scan because we exceeded the value threshold and config.canHandleExceededValueThreshold() is false");
needsFullTable = true;
}
stopwatch.stop();
}
if (needsFullTable) {
if (config.getFullTableScanEnabled()) {
ranges = this.getFullScanRange(config, queryTree);
} else {
if (log.isTraceEnabled())
log.trace("Full table scans are not enabled, query will not be run");
QueryException qe = new QueryException(DatawaveErrorCode.FULL_TABLE_SCAN_REQUIRED_BUT_DISABLED);
throw new FullTableScansDisallowedException(qe);
}
if (log.isTraceEnabled())
log.trace("Ranges are " + ranges);
}
return new Tuple2<>(ranges, needsFullTable);
}
Aggregations