Search in sources :

Example 1 with STATE

use of datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE in project datawave by NationalSecurityAgency.

the class VisitorFunction method apply.

@Override
@Nullable
public ScannerChunk apply(@Nullable ScannerChunk input) {
    SessionOptions options = input.getOptions();
    ScannerChunk newSettings = new ScannerChunk(null, input.getRanges(), input.getLastKnownLocation());
    SessionOptions newOptions = new SessionOptions(options);
    for (IteratorSetting setting : options.getIterators()) {
        final String query = setting.getOptions().get(QueryOptions.QUERY);
        if (null != query) {
            IteratorSetting newIteratorSetting = new IteratorSetting(setting.getPriority(), setting.getName(), setting.getIteratorClass());
            newIteratorSetting.addOptions(setting.getOptions());
            try {
                ASTJexlScript script = null;
                boolean evaluatedPreviously = previouslyExecutable(query);
                boolean madeChange = false;
                if (!evaluatedPreviously && config.isCleanupShardsAndDaysQueryHints()) {
                    script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                    script = DateIndexCleanupVisitor.cleanup(script);
                    madeChange = true;
                }
                String newQuery = evaluatedPreviously ? previouslyExpanded.get(query) : query;
                List<String> debug = null;
                if (log.isTraceEnabled())
                    debug = Lists.newArrayList();
                if (!config.isDisableWhindexFieldMappings() && !evaluatedPreviously) {
                    if (null == script)
                        script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                    // apply the whindex using the shard date
                    ASTJexlScript rebuiltScript = WhindexVisitor.apply(script, config, getEarliestBeginDate(newSettings.getRanges()), metadataHelper);
                    // if the query changed, save it, and mark it as such
                    if (!TreeEqualityVisitor.isEqual(script, rebuiltScript)) {
                        log.debug("[" + config.getQuery().getId() + "] The WhindexVisitor updated the query: " + JexlStringBuildingVisitor.buildQuery(script));
                        script = rebuiltScript;
                        madeChange = true;
                    }
                }
                if (!config.isBypassExecutabilityCheck() || !evaluatedPreviously) {
                    if (null == script)
                        script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                    if (!ExecutableDeterminationVisitor.isExecutable(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, this.metadataHelper)) {
                        if (log.isTraceEnabled()) {
                            log.trace("Need to pull up non-executable query: " + JexlStringBuildingVisitor.buildQuery(script));
                            for (String debugStatement : debug) {
                                log.trace(debugStatement);
                            }
                            DefaultQueryPlanner.logQuery(script, "Failing query:");
                        }
                        script = (ASTJexlScript) PullupUnexecutableNodesVisitor.pullupDelayedPredicates(script, true, config, indexedFields, indexOnlyFields, nonEventFields, metadataHelper);
                        madeChange = true;
                        STATE state = ExecutableDeterminationVisitor.getState(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, metadataHelper);
                        /**
                         * We could achieve better performance if we live with the small number of queries that error due to the full table scan exception.
                         *
                         * Either look at improving PushdownUnexecutableNodesVisitor or avoid the process altogether.
                         */
                        if (state != STATE.EXECUTABLE) {
                            if (log.isTraceEnabled()) {
                                log.trace("Need to push down non-executable query: " + JexlStringBuildingVisitor.buildQuery(script));
                                for (String debugStatement : debug) {
                                    log.trace(debugStatement);
                                }
                            }
                            script = (ASTJexlScript) PushdownUnexecutableNodesVisitor.pushdownPredicates(script, true, config, indexedFields, indexOnlyFields, nonEventFields, metadataHelper);
                        }
                        state = ExecutableDeterminationVisitor.getState(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, metadataHelper);
                        if (state != STATE.EXECUTABLE) {
                            if (state == STATE.ERROR) {
                                log.warn("After expanding the query, it is determined that the query cannot be executed due to index-only fields mixed with expressions that cannot be run against the index.");
                                BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_MIXED_INVALID_EXPRESSIONS);
                                throw new InvalidQueryException(qe);
                            }
                            log.warn("After expanding the query, it is determined that the query cannot be executed against the field index and a full table scan is required");
                            if (!config.getFullTableScanEnabled()) {
                                if (log.isTraceEnabled()) {
                                    log.trace("Full Table fail of " + JexlStringBuildingVisitor.buildQuery(script));
                                    for (String debugStatement : debug) {
                                        log.trace(debugStatement);
                                    }
                                    DefaultQueryPlanner.logQuery(script, "Failing query:");
                                }
                                PreConditionFailedQueryException qe = new PreConditionFailedQueryException(DatawaveErrorCode.FULL_TABLE_SCAN_REQUIRED_BUT_DISABLED);
                                throw new DatawaveFatalQueryException(qe);
                            }
                        }
                        if (log.isTraceEnabled()) {
                            for (String debugStatement : debug) {
                                log.trace(debugStatement);
                            }
                            DefaultQueryPlanner.logQuery(script, "Query pushing down large fielded lists:");
                        }
                    }
                }
                if (config.getSerializeQueryIterator()) {
                    serializeQuery(newIteratorSetting);
                } else {
                    if (!evaluatedPreviously) {
                        // if we have an hdfs configuration, then we can pushdown large fielded lists to an ivarator
                        if (config.getHdfsSiteConfigURLs() != null && setting.getOptions().get(QueryOptions.BATCHED_QUERY) == null) {
                            if (null == script)
                                script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                            try {
                                script = pushdownLargeFieldedLists(config, script);
                                madeChange = true;
                            } catch (IOException ioe) {
                                log.error("Unable to pushdown large fielded lists....leaving in expanded form", ioe);
                            }
                        }
                    }
                }
                // only recompile the script if changes were made to the query
                if (madeChange)
                    newQuery = JexlStringBuildingVisitor.buildQuery(script);
                try {
                    previouslyExpanded.put(query, newQuery);
                } catch (NullPointerException npe) {
                    throw new DatawaveFatalQueryException(String.format("New query is null! madeChange: %b, qid: %s", madeChange, setting.getOptions().get(QueryOptions.QUERY_ID)), npe);
                }
                // test the final script for thresholds
                DefaultQueryPlanner.validateQuerySize("VisitorFunction", script, config, false);
                newIteratorSetting.addOption(QueryOptions.QUERY, newQuery);
                newOptions.removeScanIterator(setting.getName());
                newOptions.addScanIterator(newIteratorSetting);
                if (log.isDebugEnabled()) {
                    log.debug("VisitorFunction result: " + newSettings.getRanges());
                }
                if (log.isTraceEnabled()) {
                    DefaultQueryPlanner.logTrace(PrintingVisitor.formattedQueryStringList(script), "VistorFunction::apply method");
                } else if (log.isDebugEnabled()) {
                    DefaultQueryPlanner.logDebug(PrintingVisitor.formattedQueryStringList(script, DefaultQueryPlanner.maxChildNodesToPrint), "VistorFunction::apply method");
                }
            } catch (ParseException e) {
                throw new DatawaveFatalQueryException(e);
            }
        }
    }
    newSettings.setOptions(newOptions);
    return newSettings;
}
Also used : BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) PreConditionFailedQueryException(datawave.webservice.query.exception.PreConditionFailedQueryException) SessionOptions(datawave.query.tables.SessionOptions) ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) STATE(datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE) ScannerChunk(datawave.query.tables.async.ScannerChunk) IOException(java.io.IOException) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) ParseException(org.apache.commons.jexl2.parser.ParseException) InvalidQueryException(datawave.query.exceptions.InvalidQueryException) Nullable(javax.annotation.Nullable)

Example 2 with STATE

use of datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE in project datawave by NationalSecurityAgency.

the class DefaultQueryPlanner method getQueryRanges.

/**
 * Returns a Tuple2&lt;Iterable&lt;Range&gt;,Boolean&gt; whose elements represent the Ranges to use for querying the shard table and whether or not this is
 * a "full-table-scan" query.
 *
 * @param scannerFactory
 * @param metadataHelper
 * @param config
 * @param queryTree
 * @return
 * @throws DatawaveQueryException
 */
public Tuple2<CloseableIterable<QueryPlan>, Boolean> getQueryRanges(ScannerFactory scannerFactory, MetadataHelper metadataHelper, ShardQueryConfiguration config, JexlNode queryTree) throws DatawaveQueryException {
    Preconditions.checkNotNull(queryTree);
    boolean needsFullTable = false;
    CloseableIterable<QueryPlan> ranges = null;
    // if the query has already been reduced to false there is no reason to do more
    if (QueryPruningVisitor.getState(queryTree) == QueryPruningVisitor.TruthState.FALSE) {
        return new Tuple2<>(emptyCloseableIterator(), false);
    }
    // if we still have an unexecutable tree, then a full table scan is
    // required
    List<String> debugOutput = null;
    if (log.isDebugEnabled()) {
        debugOutput = new ArrayList<>(32);
    }
    STATE state = ExecutableDeterminationVisitor.getState(queryTree, config, metadataHelper, debugOutput);
    if (log.isDebugEnabled()) {
        logDebug(debugOutput, "ExecutableDeterminationVisitor at getQueryRanges:");
    }
    if (state != STATE.EXECUTABLE) {
        if (state == STATE.ERROR) {
            log.warn("After expanding the query, it is determined that the query cannot be executed due to index-only fields mixed with expressions that cannot be run against the index.");
            BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_MIXED_INVALID_EXPRESSIONS);
            throw new InvalidQueryException(qe);
        }
        log.warn("After expanding the query, it is determined that the query cannot be executed against the field index and a full table scan is required");
        needsFullTable = true;
    }
    // scan, then lets try to compute ranges
    if (!needsFullTable) {
        // count the terms
        int termCount = TermCountingVisitor.countTerms(queryTree);
        if (termCount >= pushdownThreshold) {
            if (log.isTraceEnabled()) {
                log.trace("pushing down query because it has " + termCount + " when our max is " + pushdownThreshold);
            }
            config.setCollapseUids(true);
        }
        TraceStopwatch stopwatch = config.getTimers().newStartedStopwatch("DefaultQueryPlanner - Begin stream of ranges from inverted index");
        RangeStream stream = initializeRangeStream(config, scannerFactory, metadataHelper);
        ranges = stream.streamPlans(queryTree);
        if (log.isTraceEnabled()) {
            log.trace("query stream is " + stream.context());
        }
        // if a term threshold is exceeded and we cannot handle that, then
        // throw unsupported
        boolean thresholdExceeded = StreamContext.EXCEEDED_TERM_THRESHOLD.equals(stream.context());
        if (thresholdExceeded && !config.canHandleExceededTermThreshold()) {
            throw new UnsupportedOperationException(EXCEED_TERM_EXPANSION_ERROR);
        }
        if (StreamContext.UNINDEXED.equals(stream.context())) {
            log.debug("Needs full table scan because of unindexed fields");
            needsFullTable = true;
        } else if (StreamContext.DELAYED_FIELD.equals(stream.context())) {
            log.debug("Needs full table scan because query consists of only delayed expressions");
            needsFullTable = true;
        } else // force a full table scan
        if (IvaratorRequiredVisitor.isIvaratorRequired(queryTree) && !config.canHandleExceededValueThreshold()) {
            log.debug("Needs full table scan because we exceeded the value threshold and config.canHandleExceededValueThreshold() is false");
            needsFullTable = true;
        }
        stopwatch.stop();
    }
    if (needsFullTable) {
        if (config.getFullTableScanEnabled()) {
            ranges = this.getFullScanRange(config, queryTree);
        } else {
            if (log.isTraceEnabled())
                log.trace("Full table scans are not enabled, query will not be run");
            QueryException qe = new QueryException(DatawaveErrorCode.FULL_TABLE_SCAN_REQUIRED_BUT_DISABLED);
            throw new FullTableScansDisallowedException(qe);
        }
        if (log.isTraceEnabled())
            log.trace("Ranges are " + ranges);
    }
    return new Tuple2<>(ranges, needsFullTable);
}
Also used : BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) STATE(datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE) DatawaveQueryException(datawave.query.exceptions.DatawaveQueryException) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) PreConditionFailedQueryException(datawave.webservice.query.exception.PreConditionFailedQueryException) DoNotPerformOptimizedQueryException(datawave.query.exceptions.DoNotPerformOptimizedQueryException) NotFoundQueryException(datawave.webservice.query.exception.NotFoundQueryException) QueryException(datawave.webservice.query.exception.QueryException) InvalidQueryException(datawave.query.exceptions.InvalidQueryException) BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) FullTableScansDisallowedException(datawave.query.exceptions.FullTableScansDisallowedException) Tuple2(datawave.query.util.Tuple2) TraceStopwatch(datawave.util.time.TraceStopwatch) RangeStream(datawave.query.index.lookup.RangeStream) InvalidQueryException(datawave.query.exceptions.InvalidQueryException)

Aggregations

DatawaveFatalQueryException (datawave.query.exceptions.DatawaveFatalQueryException)2 InvalidQueryException (datawave.query.exceptions.InvalidQueryException)2 STATE (datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE)2 BadRequestQueryException (datawave.webservice.query.exception.BadRequestQueryException)2 PreConditionFailedQueryException (datawave.webservice.query.exception.PreConditionFailedQueryException)2 DatawaveQueryException (datawave.query.exceptions.DatawaveQueryException)1 DoNotPerformOptimizedQueryException (datawave.query.exceptions.DoNotPerformOptimizedQueryException)1 FullTableScansDisallowedException (datawave.query.exceptions.FullTableScansDisallowedException)1 RangeStream (datawave.query.index.lookup.RangeStream)1 SessionOptions (datawave.query.tables.SessionOptions)1 ScannerChunk (datawave.query.tables.async.ScannerChunk)1 Tuple2 (datawave.query.util.Tuple2)1 TraceStopwatch (datawave.util.time.TraceStopwatch)1 NotFoundQueryException (datawave.webservice.query.exception.NotFoundQueryException)1 QueryException (datawave.webservice.query.exception.QueryException)1 IOException (java.io.IOException)1 Nullable (javax.annotation.Nullable)1 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)1 ASTJexlScript (org.apache.commons.jexl2.parser.ASTJexlScript)1 ParseException (org.apache.commons.jexl2.parser.ParseException)1