Search in sources :

Example 1 with InvalidQueryException

use of datawave.query.exceptions.InvalidQueryException in project datawave by NationalSecurityAgency.

the class VisitorFunction method apply.

@Override
@Nullable
public ScannerChunk apply(@Nullable ScannerChunk input) {
    SessionOptions options = input.getOptions();
    ScannerChunk newSettings = new ScannerChunk(null, input.getRanges(), input.getLastKnownLocation());
    SessionOptions newOptions = new SessionOptions(options);
    for (IteratorSetting setting : options.getIterators()) {
        final String query = setting.getOptions().get(QueryOptions.QUERY);
        if (null != query) {
            IteratorSetting newIteratorSetting = new IteratorSetting(setting.getPriority(), setting.getName(), setting.getIteratorClass());
            newIteratorSetting.addOptions(setting.getOptions());
            try {
                ASTJexlScript script = null;
                boolean evaluatedPreviously = previouslyExecutable(query);
                boolean madeChange = false;
                if (!evaluatedPreviously && config.isCleanupShardsAndDaysQueryHints()) {
                    script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                    script = DateIndexCleanupVisitor.cleanup(script);
                    madeChange = true;
                }
                String newQuery = evaluatedPreviously ? previouslyExpanded.get(query) : query;
                List<String> debug = null;
                if (log.isTraceEnabled())
                    debug = Lists.newArrayList();
                if (!config.isDisableWhindexFieldMappings() && !evaluatedPreviously) {
                    if (null == script)
                        script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                    // apply the whindex using the shard date
                    ASTJexlScript rebuiltScript = WhindexVisitor.apply(script, config, getEarliestBeginDate(newSettings.getRanges()), metadataHelper);
                    // if the query changed, save it, and mark it as such
                    if (!TreeEqualityVisitor.isEqual(script, rebuiltScript)) {
                        log.debug("[" + config.getQuery().getId() + "] The WhindexVisitor updated the query: " + JexlStringBuildingVisitor.buildQuery(script));
                        script = rebuiltScript;
                        madeChange = true;
                    }
                }
                if (!config.isBypassExecutabilityCheck() || !evaluatedPreviously) {
                    if (null == script)
                        script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                    if (!ExecutableDeterminationVisitor.isExecutable(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, this.metadataHelper)) {
                        if (log.isTraceEnabled()) {
                            log.trace("Need to pull up non-executable query: " + JexlStringBuildingVisitor.buildQuery(script));
                            for (String debugStatement : debug) {
                                log.trace(debugStatement);
                            }
                            DefaultQueryPlanner.logQuery(script, "Failing query:");
                        }
                        script = (ASTJexlScript) PullupUnexecutableNodesVisitor.pullupDelayedPredicates(script, true, config, indexedFields, indexOnlyFields, nonEventFields, metadataHelper);
                        madeChange = true;
                        STATE state = ExecutableDeterminationVisitor.getState(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, metadataHelper);
                        /**
                         * We could achieve better performance if we live with the small number of queries that error due to the full table scan exception.
                         *
                         * Either look at improving PushdownUnexecutableNodesVisitor or avoid the process altogether.
                         */
                        if (state != STATE.EXECUTABLE) {
                            if (log.isTraceEnabled()) {
                                log.trace("Need to push down non-executable query: " + JexlStringBuildingVisitor.buildQuery(script));
                                for (String debugStatement : debug) {
                                    log.trace(debugStatement);
                                }
                            }
                            script = (ASTJexlScript) PushdownUnexecutableNodesVisitor.pushdownPredicates(script, true, config, indexedFields, indexOnlyFields, nonEventFields, metadataHelper);
                        }
                        state = ExecutableDeterminationVisitor.getState(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, metadataHelper);
                        if (state != STATE.EXECUTABLE) {
                            if (state == STATE.ERROR) {
                                log.warn("After expanding the query, it is determined that the query cannot be executed due to index-only fields mixed with expressions that cannot be run against the index.");
                                BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_MIXED_INVALID_EXPRESSIONS);
                                throw new InvalidQueryException(qe);
                            }
                            log.warn("After expanding the query, it is determined that the query cannot be executed against the field index and a full table scan is required");
                            if (!config.getFullTableScanEnabled()) {
                                if (log.isTraceEnabled()) {
                                    log.trace("Full Table fail of " + JexlStringBuildingVisitor.buildQuery(script));
                                    for (String debugStatement : debug) {
                                        log.trace(debugStatement);
                                    }
                                    DefaultQueryPlanner.logQuery(script, "Failing query:");
                                }
                                PreConditionFailedQueryException qe = new PreConditionFailedQueryException(DatawaveErrorCode.FULL_TABLE_SCAN_REQUIRED_BUT_DISABLED);
                                throw new DatawaveFatalQueryException(qe);
                            }
                        }
                        if (log.isTraceEnabled()) {
                            for (String debugStatement : debug) {
                                log.trace(debugStatement);
                            }
                            DefaultQueryPlanner.logQuery(script, "Query pushing down large fielded lists:");
                        }
                    }
                }
                if (config.getSerializeQueryIterator()) {
                    serializeQuery(newIteratorSetting);
                } else {
                    if (!evaluatedPreviously) {
                        // if we have an hdfs configuration, then we can pushdown large fielded lists to an ivarator
                        if (config.getHdfsSiteConfigURLs() != null && setting.getOptions().get(QueryOptions.BATCHED_QUERY) == null) {
                            if (null == script)
                                script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                            try {
                                script = pushdownLargeFieldedLists(config, script);
                                madeChange = true;
                            } catch (IOException ioe) {
                                log.error("Unable to pushdown large fielded lists....leaving in expanded form", ioe);
                            }
                        }
                    }
                }
                // only recompile the script if changes were made to the query
                if (madeChange)
                    newQuery = JexlStringBuildingVisitor.buildQuery(script);
                try {
                    previouslyExpanded.put(query, newQuery);
                } catch (NullPointerException npe) {
                    throw new DatawaveFatalQueryException(String.format("New query is null! madeChange: %b, qid: %s", madeChange, setting.getOptions().get(QueryOptions.QUERY_ID)), npe);
                }
                // test the final script for thresholds
                DefaultQueryPlanner.validateQuerySize("VisitorFunction", script, config, false);
                newIteratorSetting.addOption(QueryOptions.QUERY, newQuery);
                newOptions.removeScanIterator(setting.getName());
                newOptions.addScanIterator(newIteratorSetting);
                if (log.isDebugEnabled()) {
                    log.debug("VisitorFunction result: " + newSettings.getRanges());
                }
                if (log.isTraceEnabled()) {
                    DefaultQueryPlanner.logTrace(PrintingVisitor.formattedQueryStringList(script), "VistorFunction::apply method");
                } else if (log.isDebugEnabled()) {
                    DefaultQueryPlanner.logDebug(PrintingVisitor.formattedQueryStringList(script, DefaultQueryPlanner.maxChildNodesToPrint), "VistorFunction::apply method");
                }
            } catch (ParseException e) {
                throw new DatawaveFatalQueryException(e);
            }
        }
    }
    newSettings.setOptions(newOptions);
    return newSettings;
}
Also used : BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) PreConditionFailedQueryException(datawave.webservice.query.exception.PreConditionFailedQueryException) SessionOptions(datawave.query.tables.SessionOptions) ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) STATE(datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE) ScannerChunk(datawave.query.tables.async.ScannerChunk) IOException(java.io.IOException) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) ParseException(org.apache.commons.jexl2.parser.ParseException) InvalidQueryException(datawave.query.exceptions.InvalidQueryException) Nullable(javax.annotation.Nullable)

Example 2 with InvalidQueryException

use of datawave.query.exceptions.InvalidQueryException in project datawave by NationalSecurityAgency.

the class DefaultQueryPlanner method timedTestForNonExistentFields.

protected void timedTestForNonExistentFields(QueryStopwatch timers, final ASTJexlScript script, ShardQueryConfiguration config, MetadataHelper metadataHelper, QueryModel queryModel, Query settings) throws DatawaveQueryException {
    TraceStopwatch stopwatch = timers.newStartedStopwatch("DefaultQueryPlanner - Test for Non-Existent Fields");
    // Verify that the query does not contain fields we've never seen
    // before
    Set<String> specialFields = Sets.newHashSet(QueryOptions.DEFAULT_DATATYPE_FIELDNAME, Constants.ANY_FIELD, Constants.NO_FIELD);
    specialFields.addAll(config.getEvaluationOnlyFields());
    Set<String> nonexistentFields = FieldMissingFromSchemaVisitor.getNonExistentFields(metadataHelper, script, config.getDatatypeFilter(), specialFields);
    if (log.isDebugEnabled()) {
        log.debug("Testing for non-existent fields, found: " + nonexistentFields.size());
    }
    // ensure that all of the fields actually exist in the data dictionary
    Set<String> allFields = null;
    try {
        allFields = metadataHelper.getAllFields(config.getDatatypeFilter());
    } catch (TableNotFoundException e) {
        throw new DatawaveQueryException("Unable get get data dictionary", e);
    }
    // Fields in the data dictionary is always uppercase. Convert the unique fields to uppercase
    // so the comparisons are case insensitive
    List<String> fields = config.getUniqueFields().getFields().stream().map(String::toUpperCase).collect(Collectors.toList());
    // for the unique fields we need to also look for any model aliases (forward or reverse) and fields generated post evaluation (e.g. HIT_TERM)
    // this is because unique fields operate on the fields as returned to the user. We essentially leave all variants of the fields
    // in the unique field list to ensure we catch everything
    Set<String> uniqueFields = new HashSet<>(allFields);
    if (queryModel != null) {
        uniqueFields.addAll(queryModel.getForwardQueryMapping().keySet());
        uniqueFields.addAll(queryModel.getReverseQueryMapping().values());
    }
    uniqueFields.add(JexlEvaluation.HIT_TERM_FIELD);
    if (!uniqueFields.containsAll(fields)) {
        Set<String> missingFields = Sets.newHashSet(config.getUniqueFields().getFields());
        missingFields.removeAll(uniqueFields);
        nonexistentFields.addAll(missingFields);
    }
    if (!nonexistentFields.isEmpty()) {
        String datatypeFilterSet = (null == config.getDatatypeFilter()) ? "none" : config.getDatatypeFilter().toString();
        if (log.isTraceEnabled()) {
            try {
                log.trace("current size of fields" + metadataHelper.getAllFields(config.getDatatypeFilter()));
                log.trace("all fields: " + metadataHelper.getAllFields(config.getDatatypeFilter()));
            } catch (TableNotFoundException e) {
                log.error("table not found when reading metadata", e);
            }
            log.trace("QueryModel:" + (null == queryModel ? "null" : queryModel));
            log.trace("metadataHelper " + metadataHelper);
        }
        log.trace("QueryModel:" + (null == queryModel ? "null" : queryModel));
        log.trace("metadataHelper " + metadataHelper);
        BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.FIELDS_NOT_IN_DATA_DICTIONARY, MessageFormat.format("Datatype Filter: {0}, Missing Fields: {1}, Auths: {2}", datatypeFilterSet, nonexistentFields, settings.getQueryAuthorizations()));
        log.error(qe);
        throw new InvalidQueryException(qe);
    }
    stopwatch.stop();
}
Also used : TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) DatawaveQueryException(datawave.query.exceptions.DatawaveQueryException) BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) TraceStopwatch(datawave.util.time.TraceStopwatch) InvalidQueryException(datawave.query.exceptions.InvalidQueryException) HashSet(java.util.HashSet)

Example 3 with InvalidQueryException

use of datawave.query.exceptions.InvalidQueryException in project datawave by NationalSecurityAgency.

the class DefaultQueryPlanner method getQueryRanges.

/**
 * Returns a Tuple2&lt;Iterable&lt;Range&gt;,Boolean&gt; whose elements represent the Ranges to use for querying the shard table and whether or not this is
 * a "full-table-scan" query.
 *
 * @param scannerFactory
 * @param metadataHelper
 * @param config
 * @param queryTree
 * @return
 * @throws DatawaveQueryException
 */
public Tuple2<CloseableIterable<QueryPlan>, Boolean> getQueryRanges(ScannerFactory scannerFactory, MetadataHelper metadataHelper, ShardQueryConfiguration config, JexlNode queryTree) throws DatawaveQueryException {
    Preconditions.checkNotNull(queryTree);
    boolean needsFullTable = false;
    CloseableIterable<QueryPlan> ranges = null;
    // if the query has already been reduced to false there is no reason to do more
    if (QueryPruningVisitor.getState(queryTree) == QueryPruningVisitor.TruthState.FALSE) {
        return new Tuple2<>(emptyCloseableIterator(), false);
    }
    // if we still have an unexecutable tree, then a full table scan is
    // required
    List<String> debugOutput = null;
    if (log.isDebugEnabled()) {
        debugOutput = new ArrayList<>(32);
    }
    STATE state = ExecutableDeterminationVisitor.getState(queryTree, config, metadataHelper, debugOutput);
    if (log.isDebugEnabled()) {
        logDebug(debugOutput, "ExecutableDeterminationVisitor at getQueryRanges:");
    }
    if (state != STATE.EXECUTABLE) {
        if (state == STATE.ERROR) {
            log.warn("After expanding the query, it is determined that the query cannot be executed due to index-only fields mixed with expressions that cannot be run against the index.");
            BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_MIXED_INVALID_EXPRESSIONS);
            throw new InvalidQueryException(qe);
        }
        log.warn("After expanding the query, it is determined that the query cannot be executed against the field index and a full table scan is required");
        needsFullTable = true;
    }
    // scan, then lets try to compute ranges
    if (!needsFullTable) {
        // count the terms
        int termCount = TermCountingVisitor.countTerms(queryTree);
        if (termCount >= pushdownThreshold) {
            if (log.isTraceEnabled()) {
                log.trace("pushing down query because it has " + termCount + " when our max is " + pushdownThreshold);
            }
            config.setCollapseUids(true);
        }
        TraceStopwatch stopwatch = config.getTimers().newStartedStopwatch("DefaultQueryPlanner - Begin stream of ranges from inverted index");
        RangeStream stream = initializeRangeStream(config, scannerFactory, metadataHelper);
        ranges = stream.streamPlans(queryTree);
        if (log.isTraceEnabled()) {
            log.trace("query stream is " + stream.context());
        }
        // if a term threshold is exceeded and we cannot handle that, then
        // throw unsupported
        boolean thresholdExceeded = StreamContext.EXCEEDED_TERM_THRESHOLD.equals(stream.context());
        if (thresholdExceeded && !config.canHandleExceededTermThreshold()) {
            throw new UnsupportedOperationException(EXCEED_TERM_EXPANSION_ERROR);
        }
        if (StreamContext.UNINDEXED.equals(stream.context())) {
            log.debug("Needs full table scan because of unindexed fields");
            needsFullTable = true;
        } else if (StreamContext.DELAYED_FIELD.equals(stream.context())) {
            log.debug("Needs full table scan because query consists of only delayed expressions");
            needsFullTable = true;
        } else // force a full table scan
        if (IvaratorRequiredVisitor.isIvaratorRequired(queryTree) && !config.canHandleExceededValueThreshold()) {
            log.debug("Needs full table scan because we exceeded the value threshold and config.canHandleExceededValueThreshold() is false");
            needsFullTable = true;
        }
        stopwatch.stop();
    }
    if (needsFullTable) {
        if (config.getFullTableScanEnabled()) {
            ranges = this.getFullScanRange(config, queryTree);
        } else {
            if (log.isTraceEnabled())
                log.trace("Full table scans are not enabled, query will not be run");
            QueryException qe = new QueryException(DatawaveErrorCode.FULL_TABLE_SCAN_REQUIRED_BUT_DISABLED);
            throw new FullTableScansDisallowedException(qe);
        }
        if (log.isTraceEnabled())
            log.trace("Ranges are " + ranges);
    }
    return new Tuple2<>(ranges, needsFullTable);
}
Also used : BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) STATE(datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE) DatawaveQueryException(datawave.query.exceptions.DatawaveQueryException) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) PreConditionFailedQueryException(datawave.webservice.query.exception.PreConditionFailedQueryException) DoNotPerformOptimizedQueryException(datawave.query.exceptions.DoNotPerformOptimizedQueryException) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) QueryException(datawave.webservice.query.exception.QueryException) InvalidQueryException(datawave.query.exceptions.InvalidQueryException) BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) FullTableScansDisallowedException(datawave.query.exceptions.FullTableScansDisallowedException) Tuple2(datawave.query.util.Tuple2) TraceStopwatch(datawave.util.time.TraceStopwatch) RangeStream(datawave.query.index.lookup.RangeStream) InvalidQueryException(datawave.query.exceptions.InvalidQueryException)

Aggregations

InvalidQueryException (datawave.query.exceptions.InvalidQueryException)3 BadRequestQueryException (datawave.webservice.query.exception.BadRequestQueryException)3 DatawaveFatalQueryException (datawave.query.exceptions.DatawaveFatalQueryException)2 DatawaveQueryException (datawave.query.exceptions.DatawaveQueryException)2 STATE (datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE)2 TraceStopwatch (datawave.util.time.TraceStopwatch)2 PreConditionFailedQueryException (datawave.webservice.query.exception.PreConditionFailedQueryException)2 DoNotPerformOptimizedQueryException (datawave.query.exceptions.DoNotPerformOptimizedQueryException)1 FullTableScansDisallowedException (datawave.query.exceptions.FullTableScansDisallowedException)1 RangeStream (datawave.query.index.lookup.RangeStream)1 SessionOptions (datawave.query.tables.SessionOptions)1 ScannerChunk (datawave.query.tables.async.ScannerChunk)1 Tuple2 (datawave.query.util.Tuple2)1 NotFoundQueryException (datawave.webservice.query.exception.NotFoundQueryException)1 QueryException (datawave.webservice.query.exception.QueryException)1 IOException (java.io.IOException)1 HashSet (java.util.HashSet)1 Nullable (javax.annotation.Nullable)1 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)1 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)1