Search in sources :

Example 1 with SessionOptions

use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.

the class VisitorFunction method apply.

@Override
@Nullable
public ScannerChunk apply(@Nullable ScannerChunk input) {
    SessionOptions options = input.getOptions();
    ScannerChunk newSettings = new ScannerChunk(null, input.getRanges(), input.getLastKnownLocation());
    SessionOptions newOptions = new SessionOptions(options);
    for (IteratorSetting setting : options.getIterators()) {
        final String query = setting.getOptions().get(QueryOptions.QUERY);
        if (null != query) {
            IteratorSetting newIteratorSetting = new IteratorSetting(setting.getPriority(), setting.getName(), setting.getIteratorClass());
            newIteratorSetting.addOptions(setting.getOptions());
            try {
                ASTJexlScript script = null;
                boolean evaluatedPreviously = previouslyExecutable(query);
                boolean madeChange = false;
                if (!evaluatedPreviously && config.isCleanupShardsAndDaysQueryHints()) {
                    script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                    script = DateIndexCleanupVisitor.cleanup(script);
                    madeChange = true;
                }
                String newQuery = evaluatedPreviously ? previouslyExpanded.get(query) : query;
                List<String> debug = null;
                if (log.isTraceEnabled())
                    debug = Lists.newArrayList();
                if (!config.isDisableWhindexFieldMappings() && !evaluatedPreviously) {
                    if (null == script)
                        script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                    // apply the whindex using the shard date
                    ASTJexlScript rebuiltScript = WhindexVisitor.apply(script, config, getEarliestBeginDate(newSettings.getRanges()), metadataHelper);
                    // if the query changed, save it, and mark it as such
                    if (!TreeEqualityVisitor.isEqual(script, rebuiltScript)) {
                        log.debug("[" + config.getQuery().getId() + "] The WhindexVisitor updated the query: " + JexlStringBuildingVisitor.buildQuery(script));
                        script = rebuiltScript;
                        madeChange = true;
                    }
                }
                if (!config.isBypassExecutabilityCheck() || !evaluatedPreviously) {
                    if (null == script)
                        script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                    if (!ExecutableDeterminationVisitor.isExecutable(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, this.metadataHelper)) {
                        if (log.isTraceEnabled()) {
                            log.trace("Need to pull up non-executable query: " + JexlStringBuildingVisitor.buildQuery(script));
                            for (String debugStatement : debug) {
                                log.trace(debugStatement);
                            }
                            DefaultQueryPlanner.logQuery(script, "Failing query:");
                        }
                        script = (ASTJexlScript) PullupUnexecutableNodesVisitor.pullupDelayedPredicates(script, true, config, indexedFields, indexOnlyFields, nonEventFields, metadataHelper);
                        madeChange = true;
                        STATE state = ExecutableDeterminationVisitor.getState(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, metadataHelper);
                        /**
                         * We could achieve better performance if we live with the small number of queries that error due to the full table scan exception.
                         *
                         * Either look at improving PushdownUnexecutableNodesVisitor or avoid the process altogether.
                         */
                        if (state != STATE.EXECUTABLE) {
                            if (log.isTraceEnabled()) {
                                log.trace("Need to push down non-executable query: " + JexlStringBuildingVisitor.buildQuery(script));
                                for (String debugStatement : debug) {
                                    log.trace(debugStatement);
                                }
                            }
                            script = (ASTJexlScript) PushdownUnexecutableNodesVisitor.pushdownPredicates(script, true, config, indexedFields, indexOnlyFields, nonEventFields, metadataHelper);
                        }
                        state = ExecutableDeterminationVisitor.getState(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, metadataHelper);
                        if (state != STATE.EXECUTABLE) {
                            if (state == STATE.ERROR) {
                                log.warn("After expanding the query, it is determined that the query cannot be executed due to index-only fields mixed with expressions that cannot be run against the index.");
                                BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_MIXED_INVALID_EXPRESSIONS);
                                throw new InvalidQueryException(qe);
                            }
                            log.warn("After expanding the query, it is determined that the query cannot be executed against the field index and a full table scan is required");
                            if (!config.getFullTableScanEnabled()) {
                                if (log.isTraceEnabled()) {
                                    log.trace("Full Table fail of " + JexlStringBuildingVisitor.buildQuery(script));
                                    for (String debugStatement : debug) {
                                        log.trace(debugStatement);
                                    }
                                    DefaultQueryPlanner.logQuery(script, "Failing query:");
                                }
                                PreConditionFailedQueryException qe = new PreConditionFailedQueryException(DatawaveErrorCode.FULL_TABLE_SCAN_REQUIRED_BUT_DISABLED);
                                throw new DatawaveFatalQueryException(qe);
                            }
                        }
                        if (log.isTraceEnabled()) {
                            for (String debugStatement : debug) {
                                log.trace(debugStatement);
                            }
                            DefaultQueryPlanner.logQuery(script, "Query pushing down large fielded lists:");
                        }
                    }
                }
                if (config.getSerializeQueryIterator()) {
                    serializeQuery(newIteratorSetting);
                } else {
                    if (!evaluatedPreviously) {
                        // if we have an hdfs configuration, then we can pushdown large fielded lists to an ivarator
                        if (config.getHdfsSiteConfigURLs() != null && setting.getOptions().get(QueryOptions.BATCHED_QUERY) == null) {
                            if (null == script)
                                script = JexlASTHelper.parseAndFlattenJexlQuery(query);
                            try {
                                script = pushdownLargeFieldedLists(config, script);
                                madeChange = true;
                            } catch (IOException ioe) {
                                log.error("Unable to pushdown large fielded lists....leaving in expanded form", ioe);
                            }
                        }
                    }
                }
                // only recompile the script if changes were made to the query
                if (madeChange)
                    newQuery = JexlStringBuildingVisitor.buildQuery(script);
                try {
                    previouslyExpanded.put(query, newQuery);
                } catch (NullPointerException npe) {
                    throw new DatawaveFatalQueryException(String.format("New query is null! madeChange: %b, qid: %s", madeChange, setting.getOptions().get(QueryOptions.QUERY_ID)), npe);
                }
                // test the final script for thresholds
                DefaultQueryPlanner.validateQuerySize("VisitorFunction", script, config, false);
                newIteratorSetting.addOption(QueryOptions.QUERY, newQuery);
                newOptions.removeScanIterator(setting.getName());
                newOptions.addScanIterator(newIteratorSetting);
                if (log.isDebugEnabled()) {
                    log.debug("VisitorFunction result: " + newSettings.getRanges());
                }
                if (log.isTraceEnabled()) {
                    DefaultQueryPlanner.logTrace(PrintingVisitor.formattedQueryStringList(script), "VistorFunction::apply method");
                } else if (log.isDebugEnabled()) {
                    DefaultQueryPlanner.logDebug(PrintingVisitor.formattedQueryStringList(script, DefaultQueryPlanner.maxChildNodesToPrint), "VistorFunction::apply method");
                }
            } catch (ParseException e) {
                throw new DatawaveFatalQueryException(e);
            }
        }
    }
    newSettings.setOptions(newOptions);
    return newSettings;
}
Also used : BadRequestQueryException(datawave.webservice.query.exception.BadRequestQueryException) PreConditionFailedQueryException(datawave.webservice.query.exception.PreConditionFailedQueryException) SessionOptions(datawave.query.tables.SessionOptions) ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) STATE(datawave.query.jexl.visitors.ExecutableDeterminationVisitor.STATE) ScannerChunk(datawave.query.tables.async.ScannerChunk) IOException(java.io.IOException) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) ParseException(org.apache.commons.jexl2.parser.ParseException) InvalidQueryException(datawave.query.exceptions.InvalidQueryException) Nullable(javax.annotation.Nullable)

Example 2 with SessionOptions

use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.

the class PushdownFunction method apply.

public List<ScannerChunk> apply(QueryData qd) {
    Multimap<String, QueryPlan> serverPlan = ArrayListMultimap.create();
    List<ScannerChunk> chunks = Lists.newArrayList();
    try {
        redistributeQueries(serverPlan, tl, new QueryPlan(qd));
        for (String server : serverPlan.keySet()) {
            Collection<QueryPlan> plans = serverPlan.get(server);
            Set<QueryPlan> reducedSet = Sets.newHashSet(plans);
            for (QueryPlan plan : reducedSet) {
                Integer hashCode = plan.hashCode();
                if (queryPlanSet.contains(hashCode)) {
                    continue;
                } else
                    queryPlanSet.clear();
                queryPlanSet.add(hashCode);
                try {
                    SessionOptions options = new SessionOptions();
                    if (log.isTraceEnabled()) {
                        log.trace("setting ranges" + plan.getRanges());
                        log.trace("range set size" + plan.getSettings().size());
                    }
                    for (IteratorSetting setting : plan.getSettings()) {
                        options.addScanIterator(setting);
                    }
                    for (IteratorSetting setting : customSettings) {
                        options.addScanIterator(setting);
                    }
                    for (String cf : plan.getColumnFamilies()) {
                        options.fetchColumnFamily(new Text(cf));
                    }
                    options.setQueryConfig(this.config);
                    chunks.add(new ScannerChunk(options, Lists.newArrayList(plan.getRanges()), server));
                } catch (Exception e) {
                    log.error(e);
                    throw new AccumuloException(e);
                }
            }
        }
    } catch (AccumuloException e) {
        throw new RuntimeException(e);
    } catch (AccumuloSecurityException e) {
        throw new RuntimeException(e);
    } catch (TableNotFoundException e) {
        throw new RuntimeException(e);
    } catch (ParseException e) {
        throw new RuntimeException(e);
    }
    return chunks;
}
Also used : AccumuloException(org.apache.accumulo.core.client.AccumuloException) SessionOptions(datawave.query.tables.SessionOptions) ScannerChunk(datawave.query.tables.async.ScannerChunk) Text(org.apache.hadoop.io.Text) QueryPlan(datawave.query.planner.QueryPlan) TableOfflineException(org.apache.accumulo.core.client.TableOfflineException) ParseException(org.apache.commons.jexl2.parser.ParseException) TableDeletedException(org.apache.accumulo.core.client.TableDeletedException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) ParseException(org.apache.commons.jexl2.parser.ParseException)

Example 3 with SessionOptions

use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.

the class VisitorFunctionTest method overTermThresholdTest.

@Test
public void overTermThresholdTest() throws IOException, TableNotFoundException, URISyntaxException {
    setupExpects();
    config.setCleanupShardsAndDaysQueryHints(false);
    config.setBypassExecutabilityCheck(true);
    config.setSerializeQueryIterator(false);
    Query mockQuery = createMock(Query.class);
    config.setQuery(mockQuery);
    EasyMock.expect(mockQuery.getId()).andReturn(new UUID(0, 0)).anyTimes();
    EasyMock.expect(mockQuery.duplicate("testQuery1")).andReturn(mockQuery).anyTimes();
    // set thresholds
    config.setMaxTermThreshold(1);
    config.setMaxDepthThreshold(10);
    config.setMaxOrExpansionFstThreshold(100);
    config.setMaxOrExpansionThreshold(20);
    config.setMaxOrRangeThreshold(2);
    config.setMaxRangesPerRangeIvarator(50);
    config.setMaxOrRangeThreshold(2);
    SessionOptions options = new SessionOptions();
    IteratorSetting iteratorSetting = new IteratorSetting(10, "itr", QueryIterator.class);
    String query = "FIELD1 == 'a' || FIELD1 == 'b'";
    iteratorSetting.addOption(QueryOptions.QUERY, query);
    options.addScanIterator(iteratorSetting);
    ScannerChunk chunk = new ScannerChunk(options, Collections.singleton(new Range("20210101_0", "20210101_0")));
    replayAll();
    function = new VisitorFunction(config, helper);
    ScannerChunk updatedChunk = function.apply(chunk);
    verifyAll();
    Assert.assertNotEquals(chunk, updatedChunk);
    String updatedQuery = updatedChunk.getOptions().getIterators().iterator().next().getOptions().get(QueryOptions.QUERY);
    Assert.assertNotEquals(query, updatedQuery);
    Assert.assertTrue(updatedQuery, updatedQuery.contains("_List_"));
    Assert.assertTrue(updatedQuery, updatedQuery.contains("field = 'FIELD1'"));
    Assert.assertTrue(updatedQuery, updatedQuery.contains("values\":[\"a\",\"b\"]"));
}
Also used : Query(datawave.webservice.query.Query) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) SessionOptions(datawave.query.tables.SessionOptions) ScannerChunk(datawave.query.tables.async.ScannerChunk) UUID(java.util.UUID) Range(org.apache.accumulo.core.data.Range) Test(org.junit.Test)

Example 4 with SessionOptions

use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.

the class VisitorFunctionTest method overTermThresholdAfterFirstReductionOverrideSecondTest.

@Test
public void overTermThresholdAfterFirstReductionOverrideSecondTest() throws IOException, TableNotFoundException, URISyntaxException {
    setupExpects();
    config.setCleanupShardsAndDaysQueryHints(false);
    config.setBypassExecutabilityCheck(true);
    config.setSerializeQueryIterator(false);
    Query mockQuery = createMock(Query.class);
    config.setQuery(mockQuery);
    EasyMock.expect(mockQuery.getId()).andReturn(new UUID(0, 0)).anyTimes();
    EasyMock.expect(mockQuery.getQueryName()).andReturn("testQuery1").anyTimes();
    EasyMock.expect(mockQuery.duplicate("testQuery1")).andReturn(mockQuery).anyTimes();
    // set thresholds
    config.setMaxTermThreshold(5);
    config.setMaxDepthThreshold(20);
    config.setMaxOrExpansionFstThreshold(100);
    config.setMaxOrExpansionThreshold(5);
    config.setMaxOrRangeThreshold(2);
    config.setMaxRangesPerRangeIvarator(50);
    config.setMaxOrRangeThreshold(2);
    SessionOptions options = new SessionOptions();
    IteratorSetting iteratorSetting = new IteratorSetting(10, "itr", QueryIterator.class);
    String query = "(FIELD1 == 'a' || FIELD1 == 'b' || FIELD1 == 'c' || FIELD1 == 'd' || FIELD1 == 'e') && (FIELD1 == 'x' || FIELD1 == 'y' || FIELD2 == 'a' || FIELD2 == 'b' || FIELD2 == 'c' || FIELD2 == 'd')";
    iteratorSetting.addOption(QueryOptions.QUERY, query);
    options.addScanIterator(iteratorSetting);
    ScannerChunk chunk = new ScannerChunk(options, Collections.singleton(new Range("20210101_0", "20210101_0")));
    replayAll();
    function = new VisitorFunction(config, helper);
    ScannerChunk updatedChunk = function.apply(chunk);
    verifyAll();
    Assert.assertNotEquals(chunk, updatedChunk);
    String updatedQuery = updatedChunk.getOptions().getIterators().iterator().next().getOptions().get(QueryOptions.QUERY);
    Assert.assertNotEquals(query, updatedQuery);
    Assert.assertTrue(updatedQuery, updatedQuery.contains("_List_"));
    Assert.assertTrue(updatedQuery, updatedQuery.contains("field = 'FIELD1'"));
    Assert.assertTrue(updatedQuery, updatedQuery.contains("values\":[\"a\",\"b\",\"c\",\"d\",\"e\"]"));
    Assert.assertTrue(updatedQuery, updatedQuery.contains("FIELD1 == 'x' || FIELD1 == 'y'"));
    Assert.assertTrue(updatedQuery, updatedQuery.contains("&& (field = 'FIELD2') && (params = '{\"values\":[\"a\",\"b\",\"c\",\"d\"]}')"));
}
Also used : Query(datawave.webservice.query.Query) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) SessionOptions(datawave.query.tables.SessionOptions) ScannerChunk(datawave.query.tables.async.ScannerChunk) UUID(java.util.UUID) Range(org.apache.accumulo.core.data.Range) Test(org.junit.Test)

Example 5 with SessionOptions

use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.

the class ShardIndexQueryTableStaticMethods method configureTermMatchOnly.

/**
 * We only need to concern ourselves with looking for field names.
 *
 * @param config
 * @param scannerFactory
 * @param tableName
 * @param ranges
 * @param literals
 * @param patterns
 * @param reverseIndex
 * @param limitToUniqueTerms
 * @return
 * @throws Exception
 */
public static ScannerSession configureTermMatchOnly(ShardQueryConfiguration config, ScannerFactory scannerFactory, String tableName, Collection<Range> ranges, Collection<String> literals, Collection<String> patterns, boolean reverseIndex, boolean limitToUniqueTerms) throws Exception {
    // if we have no ranges, then nothing to scan
    if (ranges.isEmpty()) {
        return null;
    }
    ScannerSession bs = scannerFactory.newLimitedScanner(AnyFieldScanner.class, tableName, config.getAuthorizations(), config.getQuery());
    bs.setRanges(ranges);
    SessionOptions options = new SessionOptions();
    IteratorSetting setting = configureDateRangeIterator(config);
    options.addScanIterator(setting);
    setting = configureGlobalIndexTermMatchingIterator(config, literals, patterns, reverseIndex, limitToUniqueTerms);
    if (setting != null) {
        options.addScanIterator(setting);
    }
    bs.setOptions(options);
    return bs;
}
Also used : IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) SessionOptions(datawave.query.tables.SessionOptions) ScannerSession(datawave.query.tables.ScannerSession)

Aggregations

SessionOptions (datawave.query.tables.SessionOptions)11 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)11 ScannerChunk (datawave.query.tables.async.ScannerChunk)7 Range (org.apache.accumulo.core.data.Range)7 Query (datawave.webservice.query.Query)5 UUID (java.util.UUID)5 Test (org.junit.Test)5 DatawaveFatalQueryException (datawave.query.exceptions.DatawaveFatalQueryException)3 ScannerSession (datawave.query.tables.ScannerSession)3 PreConditionFailedQueryException (datawave.webservice.query.exception.PreConditionFailedQueryException)3 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)3 Text (org.apache.hadoop.io.Text)3 ParseException (org.apache.commons.jexl2.parser.ParseException)2 TimeoutExceptionIterator (datawave.core.iterators.TimeoutExceptionIterator)1 TimeoutIterator (datawave.core.iterators.TimeoutIterator)1 DoNotPerformOptimizedQueryException (datawave.query.exceptions.DoNotPerformOptimizedQueryException)1 InvalidQueryException (datawave.query.exceptions.InvalidQueryException)1 IdentifierOpLiteral (datawave.query.jexl.JexlASTHelper.IdentifierOpLiteral)1 LiteralRange (datawave.query.jexl.LiteralRange)1 BoundedRange (datawave.query.jexl.nodes.BoundedRange)1