Search in sources :

Example 6 with SessionOptions

use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.

the class RangeStream method visit.

@Override
public ScannerStream visit(ASTEQNode node, Object data) {
    if (isUnOrNotFielded(node)) {
        return ScannerStream.noData(node);
    }
    // We are looking for identifier = literal
    IdentifierOpLiteral op = JexlASTHelper.getIdentifierOpLiteral(node);
    if (op == null) {
        return ScannerStream.unindexed(node);
    }
    final String fieldName = op.deconstructIdentifier();
    // Null literals cannot be resolved against the index.
    if (op.getLiteralValue() == null) {
        return ScannerStream.unindexed(node);
    }
    // toString of String returns the String
    String literal = op.getLiteralValue().toString();
    if (QueryOptions.DEFAULT_DATATYPE_FIELDNAME.equals(fieldName)) {
        return ScannerStream.unindexed(node);
    }
    // Check if field is not indexed
    if (!isIndexed(fieldName, config.getIndexedFields())) {
        try {
            if (this.getAllFieldsFromHelper().contains(fieldName)) {
                log.debug("{\"" + fieldName + "\": \"" + literal + "\"} is not indexed.");
                return ScannerStream.unindexed(node);
            }
        } catch (TableNotFoundException e) {
            log.error(e);
            throw new RuntimeException(e);
        }
        log.debug("{\"" + fieldName + "\": \"" + literal + "\"} is not an observed field.");
        return ScannerStream.unknownField(node);
    }
    // Final case, field is indexed
    log.debug("\"" + fieldName + "\" is indexed. for " + literal);
    try {
        int stackStart = config.getBaseIteratorPriority();
        RangeStreamScanner scannerSession;
        SessionOptions options = new SessionOptions();
        options.fetchColumnFamily(new Text(fieldName));
        options.addScanIterator(makeDataTypeFilter(config, stackStart++));
        final IteratorSetting uidSetting;
        // Create the range for the term from the provided config.
        Range range = rangeForTerm(literal, fieldName, config);
        if (limitScanners) {
            // Setup the CreateUidsIterator
            scannerSession = scanners.newRangeScanner(config.getIndexTableName(), config.getAuthorizations(), config.getQuery(), config.getShardsPerDayThreshold());
            uidSetting = new IteratorSetting(stackStart++, createUidsIteratorClass);
            uidSetting.addOption(CreateUidsIterator.COLLAPSE_UIDS, Boolean.valueOf(collapseUids).toString());
            uidSetting.addOption(CreateUidsIterator.PARSE_TLD_UIDS, Boolean.valueOf(config.getParseTldUids()).toString());
        } else {
            // Setup so this is a pass-through
            scannerSession = scanners.newRangeScanner(config.getIndexTableName(), config.getAuthorizations(), config.getQuery(), config.getShardsPerDayThreshold());
            uidSetting = new IteratorSetting(stackStart++, createUidsIteratorClass);
            uidSetting.addOption(CreateUidsIterator.COLLAPSE_UIDS, Boolean.valueOf(false).toString());
            uidSetting.addOption(CreateUidsIterator.PARSE_TLD_UIDS, Boolean.valueOf(false).toString());
        }
        /*
             * Create a scanner in the initialized state so that we can scan immediately
             */
        if (log.isTraceEnabled()) {
            log.trace("Building delayed scanner for " + fieldName + ", literal= " + literal);
        }
        // Configure common settings on the ScannerSession
        options.addScanIterator(uidSetting);
        String queryString = fieldName + "=='" + literal + "'";
        options.addScanIterator(QueryScannerHelper.getQueryInfoIterator(config.getQuery(), false, queryString));
        scannerSession.setOptions(options);
        scannerSession.setMaxResults(config.getMaxIndexBatchSize());
        scannerSession.setExecutor(streamExecutor);
        scannerSession.setRanges(Collections.singleton(range));
        // Create the EntryParser prior to ScannerStream.
        EntryParser entryParser = new EntryParser(node, fieldName, literal, indexOnlyFields);
        return ScannerStream.initialized(scannerSession, entryParser, node);
    } catch (Exception e) {
        log.error(e);
        throw new RuntimeException(e);
    }
}
Also used : TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) IdentifierOpLiteral(datawave.query.jexl.JexlASTHelper.IdentifierOpLiteral) RangeStreamScanner(datawave.query.tables.RangeStreamScanner) SessionOptions(datawave.query.tables.SessionOptions) Text(org.apache.hadoop.io.Text) LiteralRange(datawave.query.jexl.LiteralRange) BoundedRange(datawave.query.jexl.nodes.BoundedRange) Range(org.apache.accumulo.core.data.Range) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) PreConditionFailedQueryException(datawave.webservice.query.exception.PreConditionFailedQueryException) QueryException(datawave.webservice.query.exception.QueryException)

Example 7 with SessionOptions

use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.

the class VisitorFunctionTest method rangeOverTermThresholdTest.

@Test
public void rangeOverTermThresholdTest() throws IOException, TableNotFoundException, URISyntaxException {
    setupExpects();
    config.setCleanupShardsAndDaysQueryHints(false);
    config.setBypassExecutabilityCheck(true);
    config.setSerializeQueryIterator(false);
    Query mockQuery = createMock(Query.class);
    config.setQuery(mockQuery);
    EasyMock.expect(mockQuery.getId()).andReturn(new UUID(0, 0)).anyTimes();
    EasyMock.expect(mockQuery.duplicate("testQuery1")).andReturn(mockQuery).anyTimes();
    // set thresholds
    config.setMaxTermThreshold(1);
    config.setMaxDepthThreshold(10);
    config.setMaxOrExpansionFstThreshold(100);
    config.setMaxOrExpansionThreshold(20);
    config.setMaxOrRangeThreshold(2);
    config.setMaxRangesPerRangeIvarator(50);
    config.setMaxOrRangeThreshold(2);
    SessionOptions options = new SessionOptions();
    IteratorSetting iteratorSetting = new IteratorSetting(10, "itr", QueryIterator.class);
    String query = "((_Bounded_ = true) && (FIELD1 > 'a' && FIELD1 < 'y')) || ((_Bounded_ = true) && (FIELD1 > 'c' && FIELD1 < 'z'))";
    iteratorSetting.addOption(QueryOptions.QUERY, query);
    options.addScanIterator(iteratorSetting);
    ScannerChunk chunk = new ScannerChunk(options, Collections.singleton(new Range("20210101_0", "20210101_0")));
    replayAll();
    function = new VisitorFunction(config, helper);
    ScannerChunk updatedChunk = function.apply(chunk);
    verifyAll();
    Assert.assertNotEquals(chunk, updatedChunk);
    String updatedQuery = updatedChunk.getOptions().getIterators().iterator().next().getOptions().get(QueryOptions.QUERY);
    Assert.assertNotEquals(query, updatedQuery);
    Assert.assertTrue(updatedQuery, updatedQuery.contains("_List_"));
    Assert.assertTrue(updatedQuery, updatedQuery.contains("field = 'FIELD1'"));
    Assert.assertTrue(updatedQuery, updatedQuery.contains("ranges\":[[\"(a\",\"z)\"]"));
}
Also used : Query(datawave.webservice.query.Query) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) SessionOptions(datawave.query.tables.SessionOptions) ScannerChunk(datawave.query.tables.async.ScannerChunk) UUID(java.util.UUID) Range(org.apache.accumulo.core.data.Range) Test(org.junit.Test)

Example 8 with SessionOptions

use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.

the class VisitorFunctionTest method underTermThresholdTest.

@Test
public void underTermThresholdTest() throws IOException, TableNotFoundException, URISyntaxException {
    setupExpects();
    // test specific expects
    config.setCleanupShardsAndDaysQueryHints(false);
    config.setBypassExecutabilityCheck(true);
    config.setSerializeQueryIterator(false);
    Query mockQuery = createMock(Query.class);
    config.setQuery(mockQuery);
    EasyMock.expect(mockQuery.getId()).andReturn(new UUID(0, 0)).anyTimes();
    // set thresholds
    config.setMaxTermThreshold(2);
    config.setMaxDepthThreshold(2);
    SessionOptions options = new SessionOptions();
    IteratorSetting iteratorSetting = new IteratorSetting(10, "itr", QueryIterator.class);
    iteratorSetting.addOption(QueryOptions.QUERY, "FIELD1 == 'a'");
    options.addScanIterator(iteratorSetting);
    ScannerChunk chunk = new ScannerChunk(options, Collections.singleton(new Range("20210101_0", "20210101_0")));
    replayAll();
    function = new VisitorFunction(config, helper);
    function.apply(chunk);
    verifyAll();
}
Also used : Query(datawave.webservice.query.Query) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) SessionOptions(datawave.query.tables.SessionOptions) ScannerChunk(datawave.query.tables.async.ScannerChunk) UUID(java.util.UUID) Range(org.apache.accumulo.core.data.Range) Test(org.junit.Test)

Example 9 with SessionOptions

use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.

the class VisitorFunctionTest method overTermThresholdCantReduceTest.

@Test(expected = DatawaveFatalQueryException.class)
public void overTermThresholdCantReduceTest() throws IOException, TableNotFoundException, URISyntaxException {
    setupExpects();
    config.setCleanupShardsAndDaysQueryHints(false);
    config.setBypassExecutabilityCheck(true);
    config.setSerializeQueryIterator(false);
    Query mockQuery = createMock(Query.class);
    config.setQuery(mockQuery);
    EasyMock.expect(mockQuery.getId()).andReturn(new UUID(0, 0)).anyTimes();
    EasyMock.expect(mockQuery.getQueryName()).andReturn("testQuery1").anyTimes();
    EasyMock.expect(mockQuery.duplicate("testQuery1")).andReturn(mockQuery).anyTimes();
    // set thresholds
    config.setMaxTermThreshold(1);
    config.setMaxDepthThreshold(10);
    config.setMaxOrExpansionFstThreshold(100);
    config.setMaxOrExpansionThreshold(20);
    config.setMaxOrRangeThreshold(2);
    config.setMaxRangesPerRangeIvarator(50);
    config.setMaxOrRangeThreshold(2);
    SessionOptions options = new SessionOptions();
    IteratorSetting iteratorSetting = new IteratorSetting(10, "itr", QueryIterator.class);
    String query = "FIELD2 == 'a' || FIELD1 == 'b'";
    iteratorSetting.addOption(QueryOptions.QUERY, query);
    options.addScanIterator(iteratorSetting);
    ScannerChunk chunk = new ScannerChunk(options, Collections.singleton(new Range("20210101_0", "20210101_0")));
    replayAll();
    function = new VisitorFunction(config, helper);
    ScannerChunk updatedChunk = function.apply(chunk);
    verifyAll();
    Assert.assertNotEquals(chunk, updatedChunk);
    String updatedQuery = updatedChunk.getOptions().getIterators().iterator().next().getOptions().get(QueryOptions.QUERY);
    Assert.assertNotEquals(query, updatedQuery);
    Assert.assertTrue(updatedQuery, updatedQuery.contains("_List_"));
    Assert.assertTrue(updatedQuery, updatedQuery.contains("field = 'FIELD1'"));
    Assert.assertTrue(updatedQuery, updatedQuery.contains("values\":[\"a\",\"b\"]"));
}
Also used : Query(datawave.webservice.query.Query) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) SessionOptions(datawave.query.tables.SessionOptions) ScannerChunk(datawave.query.tables.async.ScannerChunk) UUID(java.util.UUID) Range(org.apache.accumulo.core.data.Range) Test(org.junit.Test)

Example 10 with SessionOptions

use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.

the class RegexIndexLookup method submit.

@Override
public synchronized void submit() {
    if (indexLookupMap == null) {
        indexLookupMap = new IndexLookupMap(config.getMaxUnfieldedExpansionThreshold(), config.getMaxValueExpansionThreshold());
        indexLookupMap.setPatterns(patterns);
        Multimap<String, Range> forwardMap = ArrayListMultimap.create(), reverseMap = ArrayListMultimap.create();
        // Loop over all the patterns, classifying them as forward or reverse index satisfiable
        Iterator<Entry<Key, Value>> iter = Iterators.emptyIterator();
        ScannerSession bs;
        IteratorSetting fairnessIterator = null;
        if (config.getMaxIndexScanTimeMillis() > 0) {
            // The fairness iterator solves the problem whereby we have runaway iterators as a result of an evaluation that never finds anything
            fairnessIterator = new IteratorSetting(1, TimeoutIterator.class);
            long maxTime = (long) (config.getMaxIndexScanTimeMillis() * 1.25);
            fairnessIterator.addOption(TimeoutIterator.MAX_SESSION_TIME, Long.valueOf(maxTime).toString());
        }
        for (String pattern : patterns) {
            if (config.getDisallowedRegexPatterns().contains(pattern)) {
                PreConditionFailedQueryException qe = new PreConditionFailedQueryException(DatawaveErrorCode.IGNORE_PATTERN_FOR_INDEX_LOOKUP, MessageFormat.format("Pattern: {0}", pattern));
                log.debug(qe);
                throw new DoNotPerformOptimizedQueryException(qe);
            }
            ShardIndexQueryTableStaticMethods.RefactoredRangeDescription rangeDescription;
            try {
                rangeDescription = ShardIndexQueryTableStaticMethods.getRegexRange(null, pattern, config.getFullTableScanEnabled(), helper, config);
            } catch (IllegalArgumentException | JavaRegexParseException e) {
                log.debug("Ignoring pattern that was not capable of being looked up in the index: " + pattern, e);
                continue;
            } catch (TableNotFoundException e) {
                log.error(e);
                throw new DatawaveFatalQueryException(e);
            } catch (ExecutionException e) {
                throw new DatawaveFatalQueryException(e);
            }
            if (log.isTraceEnabled()) {
                log.trace("Adding pattern " + pattern);
                log.trace("Adding pattern " + rangeDescription);
            }
            if (rangeDescription.isForReverseIndex) {
                reverseMap.put(pattern, rangeDescription.range);
            } else {
                forwardMap.put(pattern, rangeDescription.range);
            }
        }
        if (!fields.isEmpty() && !forwardMap.isEmpty()) {
            for (String key : forwardMap.keySet()) {
                Collection<Range> ranges = forwardMap.get(key);
                try {
                    bs = ShardIndexQueryTableStaticMethods.configureLimitedDiscovery(config, scannerFactory, config.getIndexTableName(), ranges, Collections.emptySet(), Collections.singleton(key), false, true);
                    bs.setResourceClass(BatchResource.class);
                } catch (Exception e) {
                    throw new DatawaveFatalQueryException(e);
                }
                SessionOptions opts = bs.getOptions();
                if (null != fairnessIterator) {
                    opts.addScanIterator(fairnessIterator);
                    IteratorSetting cfg = new IteratorSetting(config.getBaseIteratorPriority() + 100, TimeoutExceptionIterator.class);
                    opts.addScanIterator(cfg);
                }
                for (String field : fields) {
                    opts.fetchColumnFamily(new Text(field));
                }
                forwardLookupData.getSessions().add(bs);
                iter = Iterators.concat(iter, bs);
            }
            forwardLookupData.setTimedScanFuture(execService.submit(createTimedCallable(iter, fields, forwardLookupData, indexLookupMap)));
        }
        if (!reverseFields.isEmpty() && !reverseMap.isEmpty()) {
            for (String key : reverseMap.keySet()) {
                Collection<Range> ranges = reverseMap.get(key);
                if (log.isTraceEnabled()) {
                    log.trace("adding " + ranges + " for reverse");
                }
                try {
                    bs = ShardIndexQueryTableStaticMethods.configureLimitedDiscovery(config, scannerFactory, config.getReverseIndexTableName(), ranges, Collections.emptySet(), Collections.singleton(key), true, true);
                    bs.setResourceClass(BatchResource.class);
                } catch (Exception e) {
                    throw new DatawaveFatalQueryException(e);
                }
                SessionOptions opts = bs.getOptions();
                if (null != fairnessIterator) {
                    opts.addScanIterator(fairnessIterator);
                    opts.addScanIterator(new IteratorSetting(config.getBaseIteratorPriority() + 100, TimeoutExceptionIterator.class));
                }
                for (String field : reverseFields) {
                    opts.fetchColumnFamily(new Text(field));
                }
                reverseLookupData.getSessions().add(bs);
                iter = Iterators.concat(iter, bs);
            }
            reverseLookupData.setTimedScanFuture(execService.submit(createTimedCallable(iter, reverseFields, reverseLookupData, indexLookupMap)));
        }
    }
}
Also used : PreConditionFailedQueryException(datawave.webservice.query.exception.PreConditionFailedQueryException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Entry(java.util.Map.Entry) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) JavaRegexParseException(datawave.query.parser.JavaRegexAnalyzer.JavaRegexParseException) ExecutionException(java.util.concurrent.ExecutionException) ScannerSession(datawave.query.tables.ScannerSession) TimeoutExceptionIterator(datawave.core.iterators.TimeoutExceptionIterator) DoNotPerformOptimizedQueryException(datawave.query.exceptions.DoNotPerformOptimizedQueryException) TimeoutIterator(datawave.core.iterators.TimeoutIterator) SessionOptions(datawave.query.tables.SessionOptions) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) DoNotPerformOptimizedQueryException(datawave.query.exceptions.DoNotPerformOptimizedQueryException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) JavaRegexParseException(datawave.query.parser.JavaRegexAnalyzer.JavaRegexParseException) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) PreConditionFailedQueryException(datawave.webservice.query.exception.PreConditionFailedQueryException) ExecutionException(java.util.concurrent.ExecutionException) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting)

Aggregations

SessionOptions (datawave.query.tables.SessionOptions)11 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)11 ScannerChunk (datawave.query.tables.async.ScannerChunk)7 Range (org.apache.accumulo.core.data.Range)7 Query (datawave.webservice.query.Query)5 UUID (java.util.UUID)5 Test (org.junit.Test)5 DatawaveFatalQueryException (datawave.query.exceptions.DatawaveFatalQueryException)3 ScannerSession (datawave.query.tables.ScannerSession)3 PreConditionFailedQueryException (datawave.webservice.query.exception.PreConditionFailedQueryException)3 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)3 Text (org.apache.hadoop.io.Text)3 ParseException (org.apache.commons.jexl2.parser.ParseException)2 TimeoutExceptionIterator (datawave.core.iterators.TimeoutExceptionIterator)1 TimeoutIterator (datawave.core.iterators.TimeoutIterator)1 DoNotPerformOptimizedQueryException (datawave.query.exceptions.DoNotPerformOptimizedQueryException)1 InvalidQueryException (datawave.query.exceptions.InvalidQueryException)1 IdentifierOpLiteral (datawave.query.jexl.JexlASTHelper.IdentifierOpLiteral)1 LiteralRange (datawave.query.jexl.LiteralRange)1 BoundedRange (datawave.query.jexl.nodes.BoundedRange)1