Search in sources :

Example 1 with ASTJexlScript

use of org.apache.commons.jexl3.parser.ASTJexlScript in project datawave by NationalSecurityAgency.

the class QueryJexl method createNormalizedExpression.

// =================================
// private methods
private Expression createNormalizedExpression(final String query) {
    try {
        ASTJexlScript script = JexlASTHelper.parseJexlQuery(query);
        Deque<SimpleNode> nodes = new LinkedList<>();
        normalizeScript(script, nodes);
        return new NormalizedExpression(jEngine, query, script);
    } catch (TokenMgrError | org.apache.commons.jexl2.parser.ParseException pe) {
        throw new AssertionError(pe);
    }
}
Also used : ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) TokenMgrError(org.apache.commons.jexl2.parser.TokenMgrError) ParseException(java.text.ParseException) LinkedList(java.util.LinkedList) SimpleNode(org.apache.commons.jexl2.parser.SimpleNode)

Example 2 with ASTJexlScript

use of org.apache.commons.jexl3.parser.ASTJexlScript in project datawave by NationalSecurityAgency.

the class QueryIterator method buildDocumentIterator.

/**
 * Build the document iterator
 *
 * @param documentRange
 * @param seekRange
 * @param columnFamilies
 * @param inclusive
 * @return
 * @throws IOException
 */
protected NestedIterator<Key> buildDocumentIterator(Range documentRange, Range seekRange, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException, ConfigException, InstantiationException, IllegalAccessException {
    NestedIterator<Key> docIter = null;
    if (log.isTraceEnabled()) {
        log.trace("Batched queries is " + batchedQueries);
    }
    if (batchedQueries >= 1) {
        List<NestedQuery<Key>> nests = Lists.newArrayList();
        for (Entry<Range, String> queries : batchStack) {
            Range myRange = queries.getKey();
            if (log.isTraceEnabled()) {
                log.trace("Adding " + myRange + " from seekrange " + seekRange);
            }
            /*
                 * Only perform the following checks if start key is not infinite and document range is specified
                 */
            if (null != seekRange && !seekRange.isInfiniteStartKey()) {
                Key seekStartKey = seekRange.getStartKey();
                Key myStartKey = myRange.getStartKey();
                /*
                     * if our seek key is greater than our start key we can skip this batched query. myStartKey.compareTo(seekStartKey) must be <= 0, which
                     * means that startKey must be greater than or equal be seekStartKey
                     */
                if (null != myStartKey && null != seekStartKey && !seekRange.contains(myStartKey)) {
                    if (log.isTraceEnabled()) {
                        log.trace("skipping " + myRange);
                    }
                    continue;
                }
            }
            JexlArithmetic myArithmetic;
            if (arithmetic instanceof StatefulArithmetic) {
                myArithmetic = ((StatefulArithmetic) arithmetic).clone();
            } else {
                myArithmetic = new DefaultArithmetic();
            }
            // Parse the query
            ASTJexlScript myScript = null;
            JexlEvaluation eval = null;
            try {
                myScript = JexlASTHelper.parseJexlQuery(queries.getValue());
                eval = new JexlEvaluation(queries.getValue(), myArithmetic);
            } catch (Exception e) {
                throw new IOException("Could not parse the JEXL query: '" + this.getQuery() + "'", e);
            }
            // If we had an event-specific range previously, we need to
            // reset it back
            // to the source we created during init
            NestedIterator<Key> subDocIter = getOrSetKeySource(myRange, myScript);
            if (log.isTraceEnabled()) {
                log.trace("Using init()'ialized source: " + subDocIter.getClass().getName());
            }
            if (gatherTimingDetails()) {
                subDocIter = new EvaluationTrackingNestedIterator(QuerySpan.Stage.FieldIndexTree, trackingSpan, subDocIter, myEnvironment);
            }
            // Seek() the boolean logic stuff
            ((SeekableIterator) subDocIter).seek(myRange, columnFamilies, inclusive);
            NestedQuery<Key> nestedQueryObj = new NestedQuery<>();
            nestedQueryObj.setQuery(queries.getValue());
            nestedQueryObj.setIterator(subDocIter);
            nestedQueryObj.setQueryScript(myScript);
            nestedQueryObj.setEvaluation(eval);
            nestedQueryObj.setRange(queries.getKey());
            nests.add(nestedQueryObj);
        }
        docIter = new NestedQueryIterator<>(nests);
        // now lets start off the nested iterator
        docIter.initialize();
        initKeySource = docIter;
    } else {
        // If we had an event-specific range previously, we need to reset it back
        // to the source we created during init
        docIter = getOrSetKeySource(documentRange, script);
        initKeySource = docIter;
        if (log.isTraceEnabled()) {
            log.trace("Using init()'ialized source: " + this.initKeySource.getClass().getName());
        }
        if (gatherTimingDetails()) {
            docIter = new EvaluationTrackingNestedIterator(QuerySpan.Stage.FieldIndexTree, trackingSpan, docIter, myEnvironment);
        }
        // Seek() the boolean logic stuff
        ((SeekableIterator) docIter).seek(range, columnFamilies, inclusive);
        // now lets start off the nested iterator
        docIter.initialize();
    }
    return docIter;
}
Also used : DefaultArithmetic(datawave.query.jexl.DefaultArithmetic) ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) JexlEvaluation(datawave.query.function.JexlEvaluation) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) ConfigException(org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) TabletClosedException(org.apache.accumulo.tserver.tablet.TabletClosedException) MalformedURLException(java.net.MalformedURLException) JexlArithmetic(org.apache.commons.jexl2.JexlArithmetic) StatefulArithmetic(datawave.query.jexl.StatefulArithmetic) EvaluationTrackingNestedIterator(datawave.query.iterator.profile.EvaluationTrackingNestedIterator) Key(org.apache.accumulo.core.data.Key)

Example 3 with ASTJexlScript

use of org.apache.commons.jexl3.parser.ASTJexlScript in project datawave by NationalSecurityAgency.

the class EdgeQueryLogic method applyQueryModel.

/**
 * Parses the Jexl Query string into an ASTJexlScript and then uses QueryModelVisitor to apply queryModel to the query string, and then rewrites the
 * translated ASTJexlScript back to a query string using JexlStringBuildingVisitor.
 *
 * @param queryString
 * @return
 */
protected String applyQueryModel(String queryString) {
    ASTJexlScript origScript = null;
    ASTJexlScript script = null;
    try {
        origScript = JexlASTHelper.parseAndFlattenJexlQuery(queryString);
        HashSet<String> allFields = new HashSet<>();
        allFields.addAll(getEdgeQueryModel().getAllInternalFieldNames());
        script = QueryModelVisitor.applyModel(origScript, getEdgeQueryModel(), allFields);
        return JexlStringBuildingVisitor.buildQuery(script);
    } catch (Throwable t) {
        throw new IllegalStateException("Edge query model could not be applied", t);
    }
}
Also used : ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) HashSet(java.util.HashSet)

Example 4 with ASTJexlScript

use of org.apache.commons.jexl3.parser.ASTJexlScript in project datawave by NationalSecurityAgency.

the class VisitorFunction method apply.

@Override
@Nullable
public ScannerChunk apply(@Nullable ScannerChunk input) {
    SessionOptions options = input.getOptions();
    ScannerChunk newSettings = new ScannerChunk(null, input.getRanges(), input.getLastKnownLocation());
    SessionOptions newOptions = new SessionOptions(options);
    for (IteratorSetting setting : options.getIterators()) {
        final String query = setting.getOptions().get(QueryOptions.QUERY);
        if (null != query) {
            IteratorSetting newIteratorSetting = new IteratorSetting(setting.getPriority(), setting.getName(), setting.getIteratorClass());
            newIteratorSetting.addOptions(setting.getOptions());
            try {
                ASTJexlScript script = null;
                boolean evaluatedPreviously = previouslyExecutable(query);
                boolean madeChange = false;
                if (!evaluatedPreviously && config.isCleanupShardsAndDaysQueryHints()) {
                    script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                    script = DateIndexCleanupVisitor.cleanup(script);
                    madeChange = true;
                }
                String newQuery = evaluatedPreviously ? previouslyExpanded.get(query) : query;
                List<String> debug = null;
                if (log.isTraceEnabled())
                    debug = Lists.newArrayList();
                if (!config.isDisableWhindexFieldMappings() && !evaluatedPreviously) {
                    if (null == script)
                        script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                    // apply the whindex using the shard date
                    ASTJexlScript rebuiltScript = WhindexVisitor.apply(script, config, getEarliestBeginDate(newSettings.getRanges()), metadataHelper);
                    // if the query changed, save it, and mark it as such
                    if (!TreeEqualityVisitor.isEqual(script, rebuiltScript)) {
                        log.debug("[" + config.getQuery().getId() + "] The WhindexVisitor updated the query: " + JexlStringBuildingVisitor.buildQuery(script));
                        script = rebuiltScript;
                        madeChange = true;
                    }
                }
                if (!config.isBypassExecutabilityCheck() || !evaluatedPreviously) {
                    if (null == script)
                        script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                    if (!ExecutableDeterminationVisitor.isExecutable(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, this.metadataHelper)) {
                        if (log.isTraceEnabled()) {
                            log.trace("Need to pull up non-executable query: " + JexlStringBuildingVisitor.buildQuery(script));
                            for (String debugStatement : debug) {
                                log.trace(debugStatement);
                            }
                            DefaultQueryPlanner.logQuery(script, "Failing query:");
                        }
                        script = (ASTJexlScript) PullupUnexecutableNodesVisitor.pullupDelayedPredicates(script, true, config, indexedFields, indexOnlyFields, nonEventFields, metadataHelper);
                        madeChange = true;
                        STATE state = ExecutableDeterminationVisitor.getState(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, metadataHelper);
                        /**
                         * We could achieve better performance if we live with the small number of queries that error due to the full table scan exception.
                         *
                         * Either look at improving PushdownUnexecutableNodesVisitor or avoid the process altogether.
                         */
                        if (state != STATE.EXECUTABLE) {
                            if (log.isTraceEnabled()) {
                                log.trace("Need to push down non-executable query: " + JexlStringBuildingVisitor.buildQuery(script));
                                for (String debugStatement : debug) {
                                    log.trace(debugStatement);
                                }
                            }
                            script = (ASTJexlScript) PushdownUnexecutableNodesVisitor.pushdownPredicates(script, true, config, indexedFields, indexOnlyFields, nonEventFields, metadataHelper);
                        }
                        state = ExecutableDeterminationVisitor.getState(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, metadataHelper);
                        if (state != STATE.EXECUTABLE) {
                            if (state == STATE.ERROR) {
                                log.warn("After expanding the query, it is determined that the query cannot be executed due to index-only fields mixed with expressions that cannot be run against the index.");
                                BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_MIXED_INVALID_EXPRESSIONS);
                                throw new InvalidQueryException(qe);
                            }
                            log.warn("After expanding the query, it is determined that the query cannot be executed against the field index and a full table scan is required");
                            if (!config.getFullTableScanEnabled()) {
                                if (log.isTraceEnabled()) {
                                    log.trace("Full Table fail of " + JexlStringBuildingVisitor.buildQuery(script));
                                    for (String debugStatement : debug) {
                                        log.trace(debugStatement);
                                    }
                                    DefaultQueryPlanner.logQuery(script, "Failing query:");
                                }
                                PreConditionFailedQueryException qe = new PreConditionFailedQueryException(DatawaveErrorCode.FULL_TABLE_SCAN_REQUIRED_BUT_DISABLED);
                                throw new DatawaveFatalQueryException(qe);
                            }
                        }
                        if (log.isTraceEnabled()) {
                            for (String debugStatement : debug) {
                                log.trace(debugStatement);
                            }
                            DefaultQueryPlanner.logQuery(script, "Query pushing down large fielded lists:");
                        }
                    }
                }
                if (config.getSerializeQueryIterator()) {
                    serializeQuery(newIteratorSetting);
                } else {
                    if (!evaluatedPreviously) {
                        // if we have an hdfs configuration, then we can pushdown large fielded lists to an ivarator
                        if (config.getHdfsSiteConfigURLs() != null && setting.getOptions().get(QueryOptions.BATCHED_QUERY) == null) {
                            if (null == script)
                                script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                            try {
                                script = pushdownLargeFieldedLists(config, script);
                                madeChange = true;
                            } catch (IOException ioe) {
                                log.error("Unable to pushdown large fielded lists....leaving in expanded form", ioe);
                            }
                        }
                    }
                }
                // only recompile the script if changes were made to the query
                if (madeChange)
                    newQuery = JexlStringBuildingVisitor.buildQuery(script);
                try {
                    previouslyExpanded.put(query, newQuery);
                } catch (NullPointerException npe) {
                    throw new DatawaveFatalQueryException(String.format("New query is null! madeChange: %b, qid: %s", madeChange, setting.getOptions().get(QueryOptions.QUERY_ID)), npe);
                }
                // test the final script for thresholds
                DefaultQueryPlanner.validateQuerySize("VisitorFunction", script, config, false);
                newIteratorSetting.addOption(QueryOptions.QUERY, newQuery);
                newOptions.removeScanIterator(setting.getName());
                newOptions.addScanIterator(newIteratorSetting);
                if (log.isDebugEnabled()) {
                    log.debug("VisitorFunction result: " + newSettings.getRanges());
                }
                if (log.isTraceEnabled()) {
                    DefaultQueryPlanner.logTrace(PrintingVisitor.formattedQueryStringList(script), "VistorFunction::apply method");
                } else if (log.isDebugEnabled()) {
                    DefaultQueryPlanner.logDebug(PrintingVisitor.formattedQueryStringList(script, DefaultQueryPlanner.maxChildNodesToPrint), "VistorFunction::apply method");
                }
            } catch (ParseException e) {
                throw new DatawaveFatalQueryException(e);
            }
        }
    }
    newSettings.setOptions(newOptions);
    return newSettings;
}
Also used : BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) PreConditionFailedQueryException(datawave.webservice.query.exception.PreConditionFailedQueryException) SessionOptions(datawave.query.tables.SessionOptions) ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) STATE(datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE) ScannerChunk(datawave.query.tables.async.ScannerChunk) IOException(java.io.IOException) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) ParseException(org.apache.commons.jexl2.parser.ParseException) InvalidQueryException(datawave.query.exceptions.InvalidQueryException) Nullable(javax.annotation.Nullable)

Example 5 with ASTJexlScript

use of org.apache.commons.jexl3.parser.ASTJexlScript in project datawave by NationalSecurityAgency.

the class ShardIndexQueryTable method initialize.

@Override
public GenericQueryConfiguration initialize(Connector connection, Query settings, Set<Authorizations> auths) throws Exception {
    ShardIndexQueryConfiguration config = new ShardIndexQueryConfiguration(this, settings);
    this.scannerFactory = new ScannerFactory(connection);
    MetadataHelper metadataHelper = initializeMetadataHelper(connection, config.getMetadataTableName(), auths);
    if (StringUtils.isEmpty(settings.getQuery())) {
        throw new IllegalArgumentException("Query cannot be null");
    }
    if (log.isDebugEnabled()) {
        log.debug("Query parameters set to " + settings.getParameters());
    }
    String tModelName = getTrimmedOrNull(settings, QueryParameters.PARAMETER_MODEL_NAME);
    if (tModelName != null) {
        modelName = tModelName;
    }
    String tModelTableName = getTrimmedOrNull(settings, QueryParameters.PARAMETER_MODEL_TABLE_NAME);
    if (tModelTableName != null) {
        modelTableName = tModelTableName;
    }
    queryModel = metadataHelper.getQueryModel(modelTableName, modelName, null);
    String datatypeFilterString = getTrimmedOrNull(settings, QueryParameters.DATATYPE_FILTER_SET);
    if (datatypeFilterString != null) {
        config.setDatatypeFilter(new HashSet<>(Arrays.asList(datatypeFilterString.split(PARAM_VALUE_SEP_STR))));
        if (log.isDebugEnabled()) {
            log.debug("Data type filter set to " + config.getDatatypeFilterAsString());
        }
    }
    config.setConnector(connection);
    config.setAuthorizations(auths);
    if (indexTableName != null) {
        config.setIndexTableName(indexTableName);
    }
    if (reverseIndexTableName != null) {
        config.setReverseIndexTableName(reverseIndexTableName);
    }
    if (settings.getBeginDate() != null) {
        config.setBeginDate(settings.getBeginDate());
    } else {
        config.setBeginDate(new Date(0));
        if (log.isDebugEnabled()) {
            log.debug("No begin date supplied in settings.");
        }
    }
    if (settings.getEndDate() != null) {
        config.setEndDate(settings.getEndDate());
    } else {
        config.setEndDate(new Date(Long.MAX_VALUE));
        if (log.isDebugEnabled()) {
            log.debug("No end date supplied in settings.");
        }
    }
    // start with a trimmed version of the query, converted to JEXL
    LuceneToJexlQueryParser parser = new LuceneToJexlQueryParser();
    parser.setAllowLeadingWildCard(this.isAllowLeadingWildcard());
    QueryNode node = parser.parse(settings.getQuery().trim());
    // TODO: Validate that this is a simple list of terms type of query
    config.setQueryString(node.getOriginalQuery());
    if (log.isDebugEnabled()) {
        log.debug("Original Query = " + settings.getQuery().trim());
        log.debug("JEXL Query = " + node.getOriginalQuery());
    }
    // Parse & flatten the query.
    ASTJexlScript origScript = JexlASTHelper.parseAndFlattenJexlQuery(config.getQueryString());
    ASTJexlScript script;
    try {
        script = UnfieldedIndexExpansionVisitor.expandUnfielded(config, this.scannerFactory, metadataHelper, origScript);
    } catch (EmptyUnfieldedTermExpansionException e) {
        Multimap<String, String> emptyMap = Multimaps.unmodifiableMultimap(HashMultimap.create());
        config.setNormalizedTerms(emptyMap);
        config.setNormalizedPatterns(emptyMap);
        return config;
    }
    Set<String> dataTypes = config.getDatatypeFilter();
    Set<String> allFields = metadataHelper.getAllFields(dataTypes);
    script = QueryModelVisitor.applyModel(script, queryModel, allFields);
    if (log.isTraceEnabled()) {
        log.trace("fetching dataTypes from FetchDataTypesVisitor");
    }
    Multimap<String, Type<?>> fieldToDataTypeMap = FetchDataTypesVisitor.fetchDataTypes(metadataHelper, config.getDatatypeFilter(), script);
    config.setDataTypes(fieldToDataTypeMap);
    config.setQueryFieldsDatatypes(fieldToDataTypeMap);
    final Set<String> indexedFields = metadataHelper.getIndexedFields(dataTypes);
    config.setIndexedFields(indexedFields);
    final Set<String> reverseIndexedFields = metadataHelper.getReverseIndexedFields(dataTypes);
    config.setReverseIndexedFields(reverseIndexedFields);
    final Multimap<String, Type<?>> normalizedFields = metadataHelper.getFieldsToDatatypes(dataTypes);
    config.setNormalizedFieldsDatatypes(normalizedFields);
    if (log.isTraceEnabled()) {
        log.trace("Normalizers:");
        for (String field : fieldToDataTypeMap.keySet()) {
            log.trace(field + ": " + fieldToDataTypeMap.get(field));
        }
    }
    script = ExpandMultiNormalizedTerms.expandTerms(config, metadataHelper, script);
    Multimap<String, String> literals = LiteralNodeVisitor.getLiterals(script);
    Multimap<String, String> patterns = PatternNodeVisitor.getPatterns(script);
    Map<Entry<String, String>, Range> rangesForTerms = Maps.newHashMap();
    Map<Entry<String, String>, Entry<Range, Boolean>> rangesForPatterns = Maps.newHashMap();
    config.setNormalizedTerms(literals);
    config.setNormalizedPatterns(patterns);
    if (log.isDebugEnabled()) {
        log.debug("Normalized Literals = " + literals);
        log.debug("Normalized Patterns = " + patterns);
    }
    for (Entry<String, String> entry : literals.entries()) {
        rangesForTerms.put(entry, ShardIndexQueryTableStaticMethods.getLiteralRange(entry));
    }
    for (Entry<String, String> entry : patterns.entries()) {
        ShardIndexQueryTableStaticMethods.RefactoredRangeDescription r = ShardIndexQueryTableStaticMethods.getRegexRange(entry, isFullTableScanEnabled(), metadataHelper, config);
        rangesForPatterns.put(entry, Maps.immutableEntry(r.range, r.isForReverseIndex));
    }
    config.setRangesForTerms(rangesForTerms);
    config.setRangesForPatterns(rangesForPatterns);
    return config;
}
Also used : ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) LuceneToJexlQueryParser(datawave.query.language.parser.jexl.LuceneToJexlQueryParser) LongRange(org.apache.commons.lang.math.LongRange) Range(org.apache.accumulo.core.data.Range) Date(java.util.Date) HashMultimap(com.google.common.collect.HashMultimap) Multimap(com.google.common.collect.Multimap) ShardIndexQueryConfiguration(datawave.query.config.ShardIndexQueryConfiguration) MetadataHelper(datawave.query.util.MetadataHelper) Type(datawave.data.type.Type) Entry(java.util.Map.Entry) ShardIndexQueryTableStaticMethods(datawave.query.jexl.lookups.ShardIndexQueryTableStaticMethods) QueryNode(datawave.query.language.tree.QueryNode) EmptyUnfieldedTermExpansionException(datawave.query.exceptions.EmptyUnfieldedTermExpansionException)

Aggregations

ASTJexlScript (org.apache.commons.jexl2.parser.ASTJexlScript)457 Test (org.junit.Test)385 Key (org.apache.accumulo.core.data.Key)69 JexlNode (org.apache.commons.jexl2.parser.JexlNode)61 HashSet (java.util.HashSet)50 ShardQueryConfiguration (datawave.query.config.ShardQueryConfiguration)48 MockMetadataHelper (datawave.query.util.MockMetadataHelper)40 Type (datawave.data.type.Type)38 LcNoDiacriticsType (datawave.data.type.LcNoDiacriticsType)37 ArrayList (java.util.ArrayList)37 ExpressionFilter (datawave.query.jexl.visitors.EventDataQueryExpressionVisitor.ExpressionFilter)36 ScannerFactory (datawave.query.tables.ScannerFactory)35 Date (java.util.Date)35 NoOpType (datawave.data.type.NoOpType)34 NumberType (datawave.data.type.NumberType)34 AbstractMap (java.util.AbstractMap)32 Range (org.apache.accumulo.core.data.Range)30 Value (org.apache.accumulo.core.data.Value)29 RangeFactoryForTests.makeTestRange (datawave.common.test.utils.query.RangeFactoryForTests.makeTestRange)26 QueryPlan (datawave.query.planner.QueryPlan)26