use of datawave.query.parser.JavaRegexAnalyzer.JavaRegexParseException in project datawave by NationalSecurityAgency.
the class DiscoveryLogic method makeRanges.
/**
* Makes two collections of ranges: one for the forward index (value0) and one for the reverse index (value1).
*
* If a literal has a field name, then the Range for that term will include the column family. If there are multiple fields, then multiple ranges are
* created.
*
* @param config
* @return
* @throws TableNotFoundException
* @throws ExecutionException
*/
@SuppressWarnings("unchecked")
public static Pair<Set<Range>, Set<Range>> makeRanges(DiscoveryQueryConfiguration config, Set<Text> familiesToSeek, MetadataHelper metadataHelper) throws TableNotFoundException, ExecutionException {
Set<Range> forwardRanges = new HashSet<>();
for (Entry<String, String> literalAndField : config.getLiterals().entries()) {
String literal = literalAndField.getKey(), field = literalAndField.getValue();
// if we're _ANYFIELD_, then use null when making the literal range
field = Constants.ANY_FIELD.equals(field) ? null : field;
if (field != null) {
familiesToSeek.add(new Text(field));
}
forwardRanges.add(ShardIndexQueryTableStaticMethods.getLiteralRange(field, literal));
}
for (Entry<String, LiteralRange<String>> rangeEntry : config.getRanges().entries()) {
LiteralRange<String> range = rangeEntry.getValue();
String field = rangeEntry.getKey();
// if we're _ANYFIELD_, then use null when making the literal range
field = Constants.ANY_FIELD.equals(field) ? null : field;
if (field != null) {
familiesToSeek.add(new Text(field));
}
try {
forwardRanges.add(ShardIndexQueryTableStaticMethods.getBoundedRangeRange(range));
} catch (IllegalRangeArgumentException e) {
log.error("Error using range [" + range + "]", e);
continue;
}
}
Set<Range> reverseRanges = new HashSet<>();
for (Entry<String, String> patternAndField : config.getPatterns().entries()) {
String pattern = patternAndField.getKey(), field = patternAndField.getValue();
// if we're _ANYFIELD_, then use null when making the literal range
field = Constants.ANY_FIELD.equals(field) ? null : field;
ShardIndexQueryTableStaticMethods.RefactoredRangeDescription description;
try {
if (field != null) {
familiesToSeek.add(new Text(field));
}
description = ShardIndexQueryTableStaticMethods.getRegexRange(field, pattern, false, metadataHelper, config);
} catch (JavaRegexParseException e) {
log.error("Error parsing pattern [" + pattern + "]", e);
continue;
}
if (description.isForReverseIndex) {
reverseRanges.add(description.range);
} else {
forwardRanges.add(description.range);
}
}
return Pair.with(forwardRanges, reverseRanges);
}
use of datawave.query.parser.JavaRegexAnalyzer.JavaRegexParseException in project datawave by NationalSecurityAgency.
the class IteratorBuildingVisitor method buildLiteralRange.
public static LiteralRange<?> buildLiteralRange(ASTERNode node) {
JavaRegexAnalyzer analyzer;
try {
analyzer = new JavaRegexAnalyzer(String.valueOf(JexlASTHelper.getLiteralValue(node)));
LiteralRange<String> range = new LiteralRange<>(JexlASTHelper.getIdentifier(node), NodeOperand.AND);
if (!analyzer.isLeadingLiteral()) {
// if the range is a leading wildcard we have to seek over the whole range since it's forward indexed only
range.updateLower(Constants.NULL_BYTE_STRING, true, node);
range.updateUpper(Constants.MAX_UNICODE_STRING, true, node);
} else {
range.updateLower(analyzer.getLeadingLiteral(), true, node);
if (analyzer.hasWildCard()) {
range.updateUpper(analyzer.getLeadingLiteral() + Constants.MAX_UNICODE_STRING, true, node);
} else {
range.updateUpper(analyzer.getLeadingLiteral(), true, node);
}
}
return range;
} catch (JavaRegexParseException | NoSuchElementException e) {
throw new DatawaveFatalQueryException(e);
}
}
use of datawave.query.parser.JavaRegexAnalyzer.JavaRegexParseException in project datawave by NationalSecurityAgency.
the class IteratorBuildingVisitor method buildLiteralRange.
LiteralRange<?> buildLiteralRange(ASTNRNode node) {
JavaRegexAnalyzer analyzer;
try {
analyzer = new JavaRegexAnalyzer(String.valueOf(JexlASTHelper.getLiteralValue(node)));
LiteralRange<String> range = new LiteralRange<>(JexlASTHelper.getIdentifier(node), NodeOperand.AND);
range.updateLower(analyzer.getLeadingOrTrailingLiteral(), true, node);
range.updateUpper(analyzer.getLeadingOrTrailingLiteral() + Constants.MAX_UNICODE_STRING, true, node);
return range;
} catch (JavaRegexParseException | NoSuchElementException e) {
throw new DatawaveFatalQueryException(e);
}
}
use of datawave.query.parser.JavaRegexAnalyzer.JavaRegexParseException in project datawave by NationalSecurityAgency.
the class FullTableScan method getCost.
/*
* (non-Javadoc)
*
* @see datawave.query.planner.pushdown.PushDown#getCost(org.apache.commons.jexl2.parser.JexlNode)
*/
@Override
public Cost getCost(JexlNode node) {
String pattern = JexlASTHelper.getLiteralValue(node).toString();
JavaRegexAnalyzer regex;
try {
regex = new JavaRegexAnalyzer(pattern);
if (regex.isLeadingRegex() && regex.isTrailingRegex())
return Cost.INFINITE;
} catch (JavaRegexParseException e) {
log.warn("Couldn't parse regex from ERNode: " + pattern);
}
return Cost.UNEVALUATED;
}
use of datawave.query.parser.JavaRegexAnalyzer.JavaRegexParseException in project datawave by NationalSecurityAgency.
the class RegexIndexLookup method submit.
@Override
public synchronized void submit() {
if (indexLookupMap == null) {
indexLookupMap = new IndexLookupMap(config.getMaxUnfieldedExpansionThreshold(), config.getMaxValueExpansionThreshold());
indexLookupMap.setPatterns(patterns);
Multimap<String, Range> forwardMap = ArrayListMultimap.create(), reverseMap = ArrayListMultimap.create();
// Loop over all the patterns, classifying them as forward or reverse index satisfiable
Iterator<Entry<Key, Value>> iter = Iterators.emptyIterator();
ScannerSession bs;
IteratorSetting fairnessIterator = null;
if (config.getMaxIndexScanTimeMillis() > 0) {
// The fairness iterator solves the problem whereby we have runaway iterators as a result of an evaluation that never finds anything
fairnessIterator = new IteratorSetting(1, TimeoutIterator.class);
long maxTime = (long) (config.getMaxIndexScanTimeMillis() * 1.25);
fairnessIterator.addOption(TimeoutIterator.MAX_SESSION_TIME, Long.valueOf(maxTime).toString());
}
for (String pattern : patterns) {
if (config.getDisallowedRegexPatterns().contains(pattern)) {
PreConditionFailedQueryException qe = new PreConditionFailedQueryException(DatawaveErrorCode.IGNORE_PATTERN_FOR_INDEX_LOOKUP, MessageFormat.format("Pattern: {0}", pattern));
log.debug(qe);
throw new DoNotPerformOptimizedQueryException(qe);
}
ShardIndexQueryTableStaticMethods.RefactoredRangeDescription rangeDescription;
try {
rangeDescription = ShardIndexQueryTableStaticMethods.getRegexRange(null, pattern, config.getFullTableScanEnabled(), helper, config);
} catch (IllegalArgumentException | JavaRegexParseException e) {
log.debug("Ignoring pattern that was not capable of being looked up in the index: " + pattern, e);
continue;
} catch (TableNotFoundException e) {
log.error(e);
throw new DatawaveFatalQueryException(e);
} catch (ExecutionException e) {
throw new DatawaveFatalQueryException(e);
}
if (log.isTraceEnabled()) {
log.trace("Adding pattern " + pattern);
log.trace("Adding pattern " + rangeDescription);
}
if (rangeDescription.isForReverseIndex) {
reverseMap.put(pattern, rangeDescription.range);
} else {
forwardMap.put(pattern, rangeDescription.range);
}
}
if (!fields.isEmpty() && !forwardMap.isEmpty()) {
for (String key : forwardMap.keySet()) {
Collection<Range> ranges = forwardMap.get(key);
try {
bs = ShardIndexQueryTableStaticMethods.configureLimitedDiscovery(config, scannerFactory, config.getIndexTableName(), ranges, Collections.emptySet(), Collections.singleton(key), false, true);
bs.setResourceClass(BatchResource.class);
} catch (Exception e) {
throw new DatawaveFatalQueryException(e);
}
SessionOptions opts = bs.getOptions();
if (null != fairnessIterator) {
opts.addScanIterator(fairnessIterator);
IteratorSetting cfg = new IteratorSetting(config.getBaseIteratorPriority() + 100, TimeoutExceptionIterator.class);
opts.addScanIterator(cfg);
}
for (String field : fields) {
opts.fetchColumnFamily(new Text(field));
}
forwardLookupData.getSessions().add(bs);
iter = Iterators.concat(iter, bs);
}
forwardLookupData.setTimedScanFuture(execService.submit(createTimedCallable(iter, fields, forwardLookupData, indexLookupMap)));
}
if (!reverseFields.isEmpty() && !reverseMap.isEmpty()) {
for (String key : reverseMap.keySet()) {
Collection<Range> ranges = reverseMap.get(key);
if (log.isTraceEnabled()) {
log.trace("adding " + ranges + " for reverse");
}
try {
bs = ShardIndexQueryTableStaticMethods.configureLimitedDiscovery(config, scannerFactory, config.getReverseIndexTableName(), ranges, Collections.emptySet(), Collections.singleton(key), true, true);
bs.setResourceClass(BatchResource.class);
} catch (Exception e) {
throw new DatawaveFatalQueryException(e);
}
SessionOptions opts = bs.getOptions();
if (null != fairnessIterator) {
opts.addScanIterator(fairnessIterator);
opts.addScanIterator(new IteratorSetting(config.getBaseIteratorPriority() + 100, TimeoutExceptionIterator.class));
}
for (String field : reverseFields) {
opts.fetchColumnFamily(new Text(field));
}
reverseLookupData.getSessions().add(bs);
iter = Iterators.concat(iter, bs);
}
reverseLookupData.setTimedScanFuture(execService.submit(createTimedCallable(iter, reverseFields, reverseLookupData, indexLookupMap)));
}
}
}
Aggregations