use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.
the class RangeStream method visit.
@Override
public ScannerStream visit(ASTEQNode node, Object data) {
if (isUnOrNotFielded(node)) {
return ScannerStream.noData(node);
}
// We are looking for identifier = literal
IdentifierOpLiteral op = JexlASTHelper.getIdentifierOpLiteral(node);
if (op == null) {
return ScannerStream.unindexed(node);
}
final String fieldName = op.deconstructIdentifier();
// Null literals cannot be resolved against the index.
if (op.getLiteralValue() == null) {
return ScannerStream.unindexed(node);
}
// toString of String returns the String
String literal = op.getLiteralValue().toString();
if (QueryOptions.DEFAULT_DATATYPE_FIELDNAME.equals(fieldName)) {
return ScannerStream.unindexed(node);
}
// Check if field is not indexed
if (!isIndexed(fieldName, config.getIndexedFields())) {
try {
if (this.getAllFieldsFromHelper().contains(fieldName)) {
log.debug("{\"" + fieldName + "\": \"" + literal + "\"} is not indexed.");
return ScannerStream.unindexed(node);
}
} catch (TableNotFoundException e) {
log.error(e);
throw new RuntimeException(e);
}
log.debug("{\"" + fieldName + "\": \"" + literal + "\"} is not an observed field.");
return ScannerStream.unknownField(node);
}
// Final case, field is indexed
log.debug("\"" + fieldName + "\" is indexed. for " + literal);
try {
int stackStart = config.getBaseIteratorPriority();
RangeStreamScanner scannerSession;
SessionOptions options = new SessionOptions();
options.fetchColumnFamily(new Text(fieldName));
options.addScanIterator(makeDataTypeFilter(config, stackStart++));
final IteratorSetting uidSetting;
// Create the range for the term from the provided config.
Range range = rangeForTerm(literal, fieldName, config);
if (limitScanners) {
// Setup the CreateUidsIterator
scannerSession = scanners.newRangeScanner(config.getIndexTableName(), config.getAuthorizations(), config.getQuery(), config.getShardsPerDayThreshold());
uidSetting = new IteratorSetting(stackStart++, createUidsIteratorClass);
uidSetting.addOption(CreateUidsIterator.COLLAPSE_UIDS, Boolean.valueOf(collapseUids).toString());
uidSetting.addOption(CreateUidsIterator.PARSE_TLD_UIDS, Boolean.valueOf(config.getParseTldUids()).toString());
} else {
// Setup so this is a pass-through
scannerSession = scanners.newRangeScanner(config.getIndexTableName(), config.getAuthorizations(), config.getQuery(), config.getShardsPerDayThreshold());
uidSetting = new IteratorSetting(stackStart++, createUidsIteratorClass);
uidSetting.addOption(CreateUidsIterator.COLLAPSE_UIDS, Boolean.valueOf(false).toString());
uidSetting.addOption(CreateUidsIterator.PARSE_TLD_UIDS, Boolean.valueOf(false).toString());
}
/*
* Create a scanner in the initialized state so that we can scan immediately
*/
if (log.isTraceEnabled()) {
log.trace("Building delayed scanner for " + fieldName + ", literal= " + literal);
}
// Configure common settings on the ScannerSession
options.addScanIterator(uidSetting);
String queryString = fieldName + "=='" + literal + "'";
options.addScanIterator(QueryScannerHelper.getQueryInfoIterator(config.getQuery(), false, queryString));
scannerSession.setOptions(options);
scannerSession.setMaxResults(config.getMaxIndexBatchSize());
scannerSession.setExecutor(streamExecutor);
scannerSession.setRanges(Collections.singleton(range));
// Create the EntryParser prior to ScannerStream.
EntryParser entryParser = new EntryParser(node, fieldName, literal, indexOnlyFields);
return ScannerStream.initialized(scannerSession, entryParser, node);
} catch (Exception e) {
log.error(e);
throw new RuntimeException(e);
}
}
use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.
the class VisitorFunctionTest method rangeOverTermThresholdTest.
@Test
public void rangeOverTermThresholdTest() throws IOException, TableNotFoundException, URISyntaxException {
setupExpects();
config.setCleanupShardsAndDaysQueryHints(false);
config.setBypassExecutabilityCheck(true);
config.setSerializeQueryIterator(false);
Query mockQuery = createMock(Query.class);
config.setQuery(mockQuery);
EasyMock.expect(mockQuery.getId()).andReturn(new UUID(0, 0)).anyTimes();
EasyMock.expect(mockQuery.duplicate("testQuery1")).andReturn(mockQuery).anyTimes();
// set thresholds
config.setMaxTermThreshold(1);
config.setMaxDepthThreshold(10);
config.setMaxOrExpansionFstThreshold(100);
config.setMaxOrExpansionThreshold(20);
config.setMaxOrRangeThreshold(2);
config.setMaxRangesPerRangeIvarator(50);
config.setMaxOrRangeThreshold(2);
SessionOptions options = new SessionOptions();
IteratorSetting iteratorSetting = new IteratorSetting(10, "itr", QueryIterator.class);
String query = "((_Bounded_ = true) && (FIELD1 > 'a' && FIELD1 < 'y')) || ((_Bounded_ = true) && (FIELD1 > 'c' && FIELD1 < 'z'))";
iteratorSetting.addOption(QueryOptions.QUERY, query);
options.addScanIterator(iteratorSetting);
ScannerChunk chunk = new ScannerChunk(options, Collections.singleton(new Range("20210101_0", "20210101_0")));
replayAll();
function = new VisitorFunction(config, helper);
ScannerChunk updatedChunk = function.apply(chunk);
verifyAll();
Assert.assertNotEquals(chunk, updatedChunk);
String updatedQuery = updatedChunk.getOptions().getIterators().iterator().next().getOptions().get(QueryOptions.QUERY);
Assert.assertNotEquals(query, updatedQuery);
Assert.assertTrue(updatedQuery, updatedQuery.contains("_List_"));
Assert.assertTrue(updatedQuery, updatedQuery.contains("field = 'FIELD1'"));
Assert.assertTrue(updatedQuery, updatedQuery.contains("ranges\":[[\"(a\",\"z)\"]"));
}
use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.
the class VisitorFunctionTest method underTermThresholdTest.
@Test
public void underTermThresholdTest() throws IOException, TableNotFoundException, URISyntaxException {
setupExpects();
// test specific expects
config.setCleanupShardsAndDaysQueryHints(false);
config.setBypassExecutabilityCheck(true);
config.setSerializeQueryIterator(false);
Query mockQuery = createMock(Query.class);
config.setQuery(mockQuery);
EasyMock.expect(mockQuery.getId()).andReturn(new UUID(0, 0)).anyTimes();
// set thresholds
config.setMaxTermThreshold(2);
config.setMaxDepthThreshold(2);
SessionOptions options = new SessionOptions();
IteratorSetting iteratorSetting = new IteratorSetting(10, "itr", QueryIterator.class);
iteratorSetting.addOption(QueryOptions.QUERY, "FIELD1 == 'a'");
options.addScanIterator(iteratorSetting);
ScannerChunk chunk = new ScannerChunk(options, Collections.singleton(new Range("20210101_0", "20210101_0")));
replayAll();
function = new VisitorFunction(config, helper);
function.apply(chunk);
verifyAll();
}
use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.
the class VisitorFunctionTest method overTermThresholdCantReduceTest.
@Test(expected = DatawaveFatalQueryException.class)
public void overTermThresholdCantReduceTest() throws IOException, TableNotFoundException, URISyntaxException {
setupExpects();
config.setCleanupShardsAndDaysQueryHints(false);
config.setBypassExecutabilityCheck(true);
config.setSerializeQueryIterator(false);
Query mockQuery = createMock(Query.class);
config.setQuery(mockQuery);
EasyMock.expect(mockQuery.getId()).andReturn(new UUID(0, 0)).anyTimes();
EasyMock.expect(mockQuery.getQueryName()).andReturn("testQuery1").anyTimes();
EasyMock.expect(mockQuery.duplicate("testQuery1")).andReturn(mockQuery).anyTimes();
// set thresholds
config.setMaxTermThreshold(1);
config.setMaxDepthThreshold(10);
config.setMaxOrExpansionFstThreshold(100);
config.setMaxOrExpansionThreshold(20);
config.setMaxOrRangeThreshold(2);
config.setMaxRangesPerRangeIvarator(50);
config.setMaxOrRangeThreshold(2);
SessionOptions options = new SessionOptions();
IteratorSetting iteratorSetting = new IteratorSetting(10, "itr", QueryIterator.class);
String query = "FIELD2 == 'a' || FIELD1 == 'b'";
iteratorSetting.addOption(QueryOptions.QUERY, query);
options.addScanIterator(iteratorSetting);
ScannerChunk chunk = new ScannerChunk(options, Collections.singleton(new Range("20210101_0", "20210101_0")));
replayAll();
function = new VisitorFunction(config, helper);
ScannerChunk updatedChunk = function.apply(chunk);
verifyAll();
Assert.assertNotEquals(chunk, updatedChunk);
String updatedQuery = updatedChunk.getOptions().getIterators().iterator().next().getOptions().get(QueryOptions.QUERY);
Assert.assertNotEquals(query, updatedQuery);
Assert.assertTrue(updatedQuery, updatedQuery.contains("_List_"));
Assert.assertTrue(updatedQuery, updatedQuery.contains("field = 'FIELD1'"));
Assert.assertTrue(updatedQuery, updatedQuery.contains("values\":[\"a\",\"b\"]"));
}
use of datawave.query.tables.SessionOptions in project datawave by NationalSecurityAgency.
the class RegexIndexLookup method submit.
@Override
public synchronized void submit() {
if (indexLookupMap == null) {
indexLookupMap = new IndexLookupMap(config.getMaxUnfieldedExpansionThreshold(), config.getMaxValueExpansionThreshold());
indexLookupMap.setPatterns(patterns);
Multimap<String, Range> forwardMap = ArrayListMultimap.create(), reverseMap = ArrayListMultimap.create();
// Loop over all the patterns, classifying them as forward or reverse index satisfiable
Iterator<Entry<Key, Value>> iter = Iterators.emptyIterator();
ScannerSession bs;
IteratorSetting fairnessIterator = null;
if (config.getMaxIndexScanTimeMillis() > 0) {
// The fairness iterator solves the problem whereby we have runaway iterators as a result of an evaluation that never finds anything
fairnessIterator = new IteratorSetting(1, TimeoutIterator.class);
long maxTime = (long) (config.getMaxIndexScanTimeMillis() * 1.25);
fairnessIterator.addOption(TimeoutIterator.MAX_SESSION_TIME, Long.valueOf(maxTime).toString());
}
for (String pattern : patterns) {
if (config.getDisallowedRegexPatterns().contains(pattern)) {
PreConditionFailedQueryException qe = new PreConditionFailedQueryException(DatawaveErrorCode.IGNORE_PATTERN_FOR_INDEX_LOOKUP, MessageFormat.format("Pattern: {0}", pattern));
log.debug(qe);
throw new DoNotPerformOptimizedQueryException(qe);
}
ShardIndexQueryTableStaticMethods.RefactoredRangeDescription rangeDescription;
try {
rangeDescription = ShardIndexQueryTableStaticMethods.getRegexRange(null, pattern, config.getFullTableScanEnabled(), helper, config);
} catch (IllegalArgumentException | JavaRegexParseException e) {
log.debug("Ignoring pattern that was not capable of being looked up in the index: " + pattern, e);
continue;
} catch (TableNotFoundException e) {
log.error(e);
throw new DatawaveFatalQueryException(e);
} catch (ExecutionException e) {
throw new DatawaveFatalQueryException(e);
}
if (log.isTraceEnabled()) {
log.trace("Adding pattern " + pattern);
log.trace("Adding pattern " + rangeDescription);
}
if (rangeDescription.isForReverseIndex) {
reverseMap.put(pattern, rangeDescription.range);
} else {
forwardMap.put(pattern, rangeDescription.range);
}
}
if (!fields.isEmpty() && !forwardMap.isEmpty()) {
for (String key : forwardMap.keySet()) {
Collection<Range> ranges = forwardMap.get(key);
try {
bs = ShardIndexQueryTableStaticMethods.configureLimitedDiscovery(config, scannerFactory, config.getIndexTableName(), ranges, Collections.emptySet(), Collections.singleton(key), false, true);
bs.setResourceClass(BatchResource.class);
} catch (Exception e) {
throw new DatawaveFatalQueryException(e);
}
SessionOptions opts = bs.getOptions();
if (null != fairnessIterator) {
opts.addScanIterator(fairnessIterator);
IteratorSetting cfg = new IteratorSetting(config.getBaseIteratorPriority() + 100, TimeoutExceptionIterator.class);
opts.addScanIterator(cfg);
}
for (String field : fields) {
opts.fetchColumnFamily(new Text(field));
}
forwardLookupData.getSessions().add(bs);
iter = Iterators.concat(iter, bs);
}
forwardLookupData.setTimedScanFuture(execService.submit(createTimedCallable(iter, fields, forwardLookupData, indexLookupMap)));
}
if (!reverseFields.isEmpty() && !reverseMap.isEmpty()) {
for (String key : reverseMap.keySet()) {
Collection<Range> ranges = reverseMap.get(key);
if (log.isTraceEnabled()) {
log.trace("adding " + ranges + " for reverse");
}
try {
bs = ShardIndexQueryTableStaticMethods.configureLimitedDiscovery(config, scannerFactory, config.getReverseIndexTableName(), ranges, Collections.emptySet(), Collections.singleton(key), true, true);
bs.setResourceClass(BatchResource.class);
} catch (Exception e) {
throw new DatawaveFatalQueryException(e);
}
SessionOptions opts = bs.getOptions();
if (null != fairnessIterator) {
opts.addScanIterator(fairnessIterator);
opts.addScanIterator(new IteratorSetting(config.getBaseIteratorPriority() + 100, TimeoutExceptionIterator.class));
}
for (String field : reverseFields) {
opts.fetchColumnFamily(new Text(field));
}
reverseLookupData.getSessions().add(bs);
iter = Iterators.concat(iter, bs);
}
reverseLookupData.setTimedScanFuture(execService.submit(createTimedCallable(iter, reverseFields, reverseLookupData, indexLookupMap)));
}
}
}
Aggregations