use of datawave.query.tables.async.ScannerChunk in project datawave by NationalSecurityAgency.
the class VisitorFunction method apply.
@Override
@Nullable
public ScannerChunk apply(@Nullable ScannerChunk input) {
SessionOptions options = input.getOptions();
ScannerChunk newSettings = new ScannerChunk(null, input.getRanges(), input.getLastKnownLocation());
SessionOptions newOptions = new SessionOptions(options);
for (IteratorSetting setting : options.getIterators()) {
final String query = setting.getOptions().get(QueryOptions.QUERY);
if (null != query) {
IteratorSetting newIteratorSetting = new IteratorSetting(setting.getPriority(), setting.getName(), setting.getIteratorClass());
newIteratorSetting.addOptions(setting.getOptions());
try {
ASTJexlScript script = null;
boolean evaluatedPreviously = previouslyExecutable(query);
boolean madeChange = false;
if (!evaluatedPreviously && config.isCleanupShardsAndDaysQueryHints()) {
script = JexlASTHelper.parseAndFlattenJexlQuery(query);
script = DateIndexCleanupVisitor.cleanup(script);
madeChange = true;
}
String newQuery = evaluatedPreviously ? previouslyExpanded.get(query) : query;
List<String> debug = null;
if (log.isTraceEnabled())
debug = Lists.newArrayList();
if (!config.isDisableWhindexFieldMappings() && !evaluatedPreviously) {
if (null == script)
script = JexlASTHelper.parseAndFlattenJexlQuery(query);
// apply the whindex using the shard date
ASTJexlScript rebuiltScript = WhindexVisitor.apply(script, config, getEarliestBeginDate(newSettings.getRanges()), metadataHelper);
// if the query changed, save it, and mark it as such
if (!TreeEqualityVisitor.isEqual(script, rebuiltScript)) {
log.debug("[" + config.getQuery().getId() + "] The WhindexVisitor updated the query: " + JexlStringBuildingVisitor.buildQuery(script));
script = rebuiltScript;
madeChange = true;
}
}
if (!config.isBypassExecutabilityCheck() || !evaluatedPreviously) {
if (null == script)
script = JexlASTHelper.parseAndFlattenJexlQuery(query);
if (!ExecutableDeterminationVisitor.isExecutable(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, this.metadataHelper)) {
if (log.isTraceEnabled()) {
log.trace("Need to pull up non-executable query: " + JexlStringBuildingVisitor.buildQuery(script));
for (String debugStatement : debug) {
log.trace(debugStatement);
}
DefaultQueryPlanner.logQuery(script, "Failing query:");
}
script = (ASTJexlScript) PullupUnexecutableNodesVisitor.pullupDelayedPredicates(script, true, config, indexedFields, indexOnlyFields, nonEventFields, metadataHelper);
madeChange = true;
STATE state = ExecutableDeterminationVisitor.getState(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, metadataHelper);
/**
* We could achieve better performance if we live with the small number of queries that error due to the full table scan exception.
*
* Either look at improving PushdownUnexecutableNodesVisitor or avoid the process altogether.
*/
if (state != STATE.EXECUTABLE) {
if (log.isTraceEnabled()) {
log.trace("Need to push down non-executable query: " + JexlStringBuildingVisitor.buildQuery(script));
for (String debugStatement : debug) {
log.trace(debugStatement);
}
}
script = (ASTJexlScript) PushdownUnexecutableNodesVisitor.pushdownPredicates(script, true, config, indexedFields, indexOnlyFields, nonEventFields, metadataHelper);
}
state = ExecutableDeterminationVisitor.getState(script, config, indexedFields, indexOnlyFields, nonEventFields, true, debug, metadataHelper);
if (state != STATE.EXECUTABLE) {
if (state == STATE.ERROR) {
log.warn("After expanding the query, it is determined that the query cannot be executed due to index-only fields mixed with expressions that cannot be run against the index.");
BadRequestQueryException qe = new BadRequestQueryException(DatawaveErrorCode.INDEX_ONLY_FIELDS_MIXED_INVALID_EXPRESSIONS);
throw new InvalidQueryException(qe);
}
log.warn("After expanding the query, it is determined that the query cannot be executed against the field index and a full table scan is required");
if (!config.getFullTableScanEnabled()) {
if (log.isTraceEnabled()) {
log.trace("Full Table fail of " + JexlStringBuildingVisitor.buildQuery(script));
for (String debugStatement : debug) {
log.trace(debugStatement);
}
DefaultQueryPlanner.logQuery(script, "Failing query:");
}
PreConditionFailedQueryException qe = new PreConditionFailedQueryException(DatawaveErrorCode.FULL_TABLE_SCAN_REQUIRED_BUT_DISABLED);
throw new DatawaveFatalQueryException(qe);
}
}
if (log.isTraceEnabled()) {
for (String debugStatement : debug) {
log.trace(debugStatement);
}
DefaultQueryPlanner.logQuery(script, "Query pushing down large fielded lists:");
}
}
}
if (config.getSerializeQueryIterator()) {
serializeQuery(newIteratorSetting);
} else {
if (!evaluatedPreviously) {
// if we have an hdfs configuration, then we can pushdown large fielded lists to an ivarator
if (config.getHdfsSiteConfigURLs() != null && setting.getOptions().get(QueryOptions.BATCHED_QUERY) == null) {
if (null == script)
script = JexlASTHelper.parseAndFlattenJexlQuery(query);
try {
script = pushdownLargeFieldedLists(config, script);
madeChange = true;
} catch (IOException ioe) {
log.error("Unable to pushdown large fielded lists....leaving in expanded form", ioe);
}
}
}
}
// only recompile the script if changes were made to the query
if (madeChange)
newQuery = JexlStringBuildingVisitor.buildQuery(script);
try {
previouslyExpanded.put(query, newQuery);
} catch (NullPointerException npe) {
throw new DatawaveFatalQueryException(String.format("New query is null! madeChange: %b, qid: %s", madeChange, setting.getOptions().get(QueryOptions.QUERY_ID)), npe);
}
// test the final script for thresholds
DefaultQueryPlanner.validateQuerySize("VisitorFunction", script, config, false);
newIteratorSetting.addOption(QueryOptions.QUERY, newQuery);
newOptions.removeScanIterator(setting.getName());
newOptions.addScanIterator(newIteratorSetting);
if (log.isDebugEnabled()) {
log.debug("VisitorFunction result: " + newSettings.getRanges());
}
if (log.isTraceEnabled()) {
DefaultQueryPlanner.logTrace(PrintingVisitor.formattedQueryStringList(script), "VistorFunction::apply method");
} else if (log.isDebugEnabled()) {
DefaultQueryPlanner.logDebug(PrintingVisitor.formattedQueryStringList(script, DefaultQueryPlanner.maxChildNodesToPrint), "VistorFunction::apply method");
}
} catch (ParseException e) {
throw new DatawaveFatalQueryException(e);
}
}
}
newSettings.setOptions(newOptions);
return newSettings;
}
use of datawave.query.tables.async.ScannerChunk in project datawave by NationalSecurityAgency.
the class PushdownFunction method apply.
public List<ScannerChunk> apply(QueryData qd) {
Multimap<String, QueryPlan> serverPlan = ArrayListMultimap.create();
List<ScannerChunk> chunks = Lists.newArrayList();
try {
redistributeQueries(serverPlan, tl, new QueryPlan(qd));
for (String server : serverPlan.keySet()) {
Collection<QueryPlan> plans = serverPlan.get(server);
Set<QueryPlan> reducedSet = Sets.newHashSet(plans);
for (QueryPlan plan : reducedSet) {
Integer hashCode = plan.hashCode();
if (queryPlanSet.contains(hashCode)) {
continue;
} else
queryPlanSet.clear();
queryPlanSet.add(hashCode);
try {
SessionOptions options = new SessionOptions();
if (log.isTraceEnabled()) {
log.trace("setting ranges" + plan.getRanges());
log.trace("range set size" + plan.getSettings().size());
}
for (IteratorSetting setting : plan.getSettings()) {
options.addScanIterator(setting);
}
for (IteratorSetting setting : customSettings) {
options.addScanIterator(setting);
}
for (String cf : plan.getColumnFamilies()) {
options.fetchColumnFamily(new Text(cf));
}
options.setQueryConfig(this.config);
chunks.add(new ScannerChunk(options, Lists.newArrayList(plan.getRanges()), server));
} catch (Exception e) {
log.error(e);
throw new AccumuloException(e);
}
}
}
} catch (AccumuloException e) {
throw new RuntimeException(e);
} catch (AccumuloSecurityException e) {
throw new RuntimeException(e);
} catch (TableNotFoundException e) {
throw new RuntimeException(e);
} catch (ParseException e) {
throw new RuntimeException(e);
}
return chunks;
}
use of datawave.query.tables.async.ScannerChunk in project datawave by NationalSecurityAgency.
the class BatchScannerSession method submitTasks.
protected void submitTasks(List<ScannerChunk> newChunks) {
for (ScannerChunk chunk : newChunks) {
AtomicInteger numChunks = serverMap.get(chunk.getLastKnownLocation());
if (numChunks == null) {
numChunks = new AtomicInteger(1);
serverMap.put(chunk.getLastKnownLocation(), numChunks);
} else
numChunks.incrementAndGet();
Scan scan = null;
if (speculativeScanning && delegatedResourceInitializer == RfileResource.class) {
if (log.isTraceEnabled()) {
log.trace("Using speculative execution");
}
chunk.setQueryId(settings.getId().toString());
scan = new SpeculativeScan(localTableName, localAuths, chunk, delegatorReference, delegatedResourceInitializer, resultQueue, listenerService);
scan.setVisitors(visitorFunctions);
Scan childScan = new Scan(localTableName, localAuths, new ScannerChunk(chunk), delegatorReference, BatchResource.class, ((SpeculativeScan) scan).getQueue(), listenerService);
childScan.setVisitors(visitorFunctions);
((SpeculativeScan) scan).addScan(childScan);
childScan = new Scan(localTableName, localAuths, new ScannerChunk(chunk), delegatorReference, delegatedResourceInitializer, ((SpeculativeScan) scan).getQueue(), listenerService);
childScan.setVisitors(visitorFunctions);
((SpeculativeScan) scan).addScan(childScan);
} else {
scan = new Scan(localTableName, localAuths, chunk, delegatorReference, delegatedResourceInitializer, resultQueue, listenerService);
}
if (backoffEnabled) {
scan.setSessionArbiter(this);
}
scan.setVisitors(visitorFunctions);
scan.setTimeout(scanLimitTimeout);
if (log.isTraceEnabled()) {
log.trace("Adding scanner " + chunk);
}
submitScan(scan, true);
}
}
use of datawave.query.tables.async.ScannerChunk in project datawave by NationalSecurityAgency.
the class VisitorFunctionTest method overTermThresholdTest.
@Test
public void overTermThresholdTest() throws IOException, TableNotFoundException, URISyntaxException {
setupExpects();
config.setCleanupShardsAndDaysQueryHints(false);
config.setBypassExecutabilityCheck(true);
config.setSerializeQueryIterator(false);
Query mockQuery = createMock(Query.class);
config.setQuery(mockQuery);
EasyMock.expect(mockQuery.getId()).andReturn(new UUID(0, 0)).anyTimes();
EasyMock.expect(mockQuery.duplicate("testQuery1")).andReturn(mockQuery).anyTimes();
// set thresholds
config.setMaxTermThreshold(1);
config.setMaxDepthThreshold(10);
config.setMaxOrExpansionFstThreshold(100);
config.setMaxOrExpansionThreshold(20);
config.setMaxOrRangeThreshold(2);
config.setMaxRangesPerRangeIvarator(50);
config.setMaxOrRangeThreshold(2);
SessionOptions options = new SessionOptions();
IteratorSetting iteratorSetting = new IteratorSetting(10, "itr", QueryIterator.class);
String query = "FIELD1 == 'a' || FIELD1 == 'b'";
iteratorSetting.addOption(QueryOptions.QUERY, query);
options.addScanIterator(iteratorSetting);
ScannerChunk chunk = new ScannerChunk(options, Collections.singleton(new Range("20210101_0", "20210101_0")));
replayAll();
function = new VisitorFunction(config, helper);
ScannerChunk updatedChunk = function.apply(chunk);
verifyAll();
Assert.assertNotEquals(chunk, updatedChunk);
String updatedQuery = updatedChunk.getOptions().getIterators().iterator().next().getOptions().get(QueryOptions.QUERY);
Assert.assertNotEquals(query, updatedQuery);
Assert.assertTrue(updatedQuery, updatedQuery.contains("_List_"));
Assert.assertTrue(updatedQuery, updatedQuery.contains("field = 'FIELD1'"));
Assert.assertTrue(updatedQuery, updatedQuery.contains("values\":[\"a\",\"b\"]"));
}
use of datawave.query.tables.async.ScannerChunk in project datawave by NationalSecurityAgency.
the class VisitorFunctionTest method overTermThresholdAfterFirstReductionOverrideSecondTest.
@Test
public void overTermThresholdAfterFirstReductionOverrideSecondTest() throws IOException, TableNotFoundException, URISyntaxException {
setupExpects();
config.setCleanupShardsAndDaysQueryHints(false);
config.setBypassExecutabilityCheck(true);
config.setSerializeQueryIterator(false);
Query mockQuery = createMock(Query.class);
config.setQuery(mockQuery);
EasyMock.expect(mockQuery.getId()).andReturn(new UUID(0, 0)).anyTimes();
EasyMock.expect(mockQuery.getQueryName()).andReturn("testQuery1").anyTimes();
EasyMock.expect(mockQuery.duplicate("testQuery1")).andReturn(mockQuery).anyTimes();
// set thresholds
config.setMaxTermThreshold(5);
config.setMaxDepthThreshold(20);
config.setMaxOrExpansionFstThreshold(100);
config.setMaxOrExpansionThreshold(5);
config.setMaxOrRangeThreshold(2);
config.setMaxRangesPerRangeIvarator(50);
config.setMaxOrRangeThreshold(2);
SessionOptions options = new SessionOptions();
IteratorSetting iteratorSetting = new IteratorSetting(10, "itr", QueryIterator.class);
String query = "(FIELD1 == 'a' || FIELD1 == 'b' || FIELD1 == 'c' || FIELD1 == 'd' || FIELD1 == 'e') && (FIELD1 == 'x' || FIELD1 == 'y' || FIELD2 == 'a' || FIELD2 == 'b' || FIELD2 == 'c' || FIELD2 == 'd')";
iteratorSetting.addOption(QueryOptions.QUERY, query);
options.addScanIterator(iteratorSetting);
ScannerChunk chunk = new ScannerChunk(options, Collections.singleton(new Range("20210101_0", "20210101_0")));
replayAll();
function = new VisitorFunction(config, helper);
ScannerChunk updatedChunk = function.apply(chunk);
verifyAll();
Assert.assertNotEquals(chunk, updatedChunk);
String updatedQuery = updatedChunk.getOptions().getIterators().iterator().next().getOptions().get(QueryOptions.QUERY);
Assert.assertNotEquals(query, updatedQuery);
Assert.assertTrue(updatedQuery, updatedQuery.contains("_List_"));
Assert.assertTrue(updatedQuery, updatedQuery.contains("field = 'FIELD1'"));
Assert.assertTrue(updatedQuery, updatedQuery.contains("values\":[\"a\",\"b\",\"c\",\"d\",\"e\"]"));
Assert.assertTrue(updatedQuery, updatedQuery.contains("FIELD1 == 'x' || FIELD1 == 'y'"));
Assert.assertTrue(updatedQuery, updatedQuery.contains("&& (field = 'FIELD2') && (params = '{\"values\":[\"a\",\"b\",\"c\",\"d\"]}')"));
}
Aggregations