use of datawave.query.parser.JavaRegexAnalyzer in project datawave by NationalSecurityAgency.
the class PushdownMissingIndexRangeNodesVisitor method missingIndexRange.
private boolean missingIndexRange(ASTERNode node) {
Object literal = JexlASTHelper.getLiteralValue(node);
if (literal != null) {
String strLiteral = String.valueOf(literal);
JavaRegexAnalyzer analyzer = null;
try {
analyzer = new JavaRegexAnalyzer(strLiteral);
if (analyzer.isLeadingLiteral()) {
String leadingLiteral = analyzer.getLeadingLiteral();
StringBuilder endRange = new StringBuilder().append(leadingLiteral);
char lastChar = leadingLiteral.charAt(leadingLiteral.length() - 1);
if (lastChar < Character.MAX_VALUE) {
lastChar++;
endRange.setCharAt(endRange.length() - 1, lastChar);
} else {
endRange.append((char) 0);
}
for (IndexHole hole : indexHoles) {
if (hole.overlaps(this.beginDate, this.endDate, leadingLiteral, endRange.toString())) {
return true;
} else if (hole.after(strLiteral)) {
return false;
}
}
}
} catch (JavaRegexAnalyzer.JavaRegexParseException e) {
log.error("Unable to parse regex " + strLiteral, e);
throw new DatawaveFatalQueryException("Unable to parse regex " + strLiteral, e);
}
}
return false;
}
use of datawave.query.parser.JavaRegexAnalyzer in project datawave by NationalSecurityAgency.
the class IteratorBuildingVisitor method buildLiteralRange.
public static LiteralRange<?> buildLiteralRange(ASTERNode node) {
JavaRegexAnalyzer analyzer;
try {
analyzer = new JavaRegexAnalyzer(String.valueOf(JexlASTHelper.getLiteralValue(node)));
LiteralRange<String> range = new LiteralRange<>(JexlASTHelper.getIdentifier(node), NodeOperand.AND);
if (!analyzer.isLeadingLiteral()) {
// if the range is a leading wildcard we have to seek over the whole range since it's forward indexed only
range.updateLower(Constants.NULL_BYTE_STRING, true, node);
range.updateUpper(Constants.MAX_UNICODE_STRING, true, node);
} else {
range.updateLower(analyzer.getLeadingLiteral(), true, node);
if (analyzer.hasWildCard()) {
range.updateUpper(analyzer.getLeadingLiteral() + Constants.MAX_UNICODE_STRING, true, node);
} else {
range.updateUpper(analyzer.getLeadingLiteral(), true, node);
}
}
return range;
} catch (JavaRegexParseException | NoSuchElementException e) {
throw new DatawaveFatalQueryException(e);
}
}
use of datawave.query.parser.JavaRegexAnalyzer in project datawave by NationalSecurityAgency.
the class IteratorBuildingVisitor method buildLiteralRange.
LiteralRange<?> buildLiteralRange(ASTNRNode node) {
JavaRegexAnalyzer analyzer;
try {
analyzer = new JavaRegexAnalyzer(String.valueOf(JexlASTHelper.getLiteralValue(node)));
LiteralRange<String> range = new LiteralRange<>(JexlASTHelper.getIdentifier(node), NodeOperand.AND);
range.updateLower(analyzer.getLeadingOrTrailingLiteral(), true, node);
range.updateUpper(analyzer.getLeadingOrTrailingLiteral() + Constants.MAX_UNICODE_STRING, true, node);
return range;
} catch (JavaRegexParseException | NoSuchElementException e) {
throw new DatawaveFatalQueryException(e);
}
}
use of datawave.query.parser.JavaRegexAnalyzer in project datawave by NationalSecurityAgency.
the class RegexIndexExpansionVisitor method isExpandable.
/**
* Determine whether we can actually expand this regex based on whether it is indexed appropriately.
*
* @param node
* the node to consider
* @return whether the node is expandable
*/
public boolean isExpandable(ASTERNode node) throws TableNotFoundException, JavaRegexAnalyzer.JavaRegexParseException {
// if full table scan enabled, then we can expand anything
if (config.getFullTableScanEnabled()) {
return true;
}
String regex = JexlASTHelper.getLiteralValue(node).toString();
JavaRegexAnalyzer analyzer = new JavaRegexAnalyzer(regex);
// if the regex is double ended, then we cannot expand it
if (analyzer.isNgram()) {
return false;
}
String fieldName = JexlASTHelper.getIdentifier(node);
if (analyzer.isLeadingLiteral() && helper.isIndexed(fieldName, config.getDatatypeFilter())) {
return true;
} else {
return analyzer.isTrailingLiteral() && helper.isReverseIndexed(fieldName, config.getDatatypeFilter());
}
}
use of datawave.query.parser.JavaRegexAnalyzer in project datawave by NationalSecurityAgency.
the class VisitationContext method getLeadingLiteral.
/**
* Gets the leading literal from the search term stored in an IdentityContext. The leading literal is set of characters that are not a regex expression (.*
* /d /w /s ...)
*
* If the whole search term is a literal (eg like with an == expression) then the whole string is returned
*
* If there is no leading literal then the empty String is returned
*
* The leading wildCardAllowed parameter says if leading wild cards are allowed in the search term (leading wild cards are not allowed with SOURCE
* expressions but allowed with every thing else)
*/
private String getLeadingLiteral(IdentityContext term, boolean leadingWildCardAllowed) {
String leadingLiteral = "";
if (term.getOperation().equals(EQUALS_REGEX)) {
try {
JavaRegexAnalyzer regexAnalyzer = new JavaRegexAnalyzer(term.getLiteral());
if (leadingWildCardAllowed == false && regexAnalyzer.isLeadingRegex()) {
log.error("Identifier had leading wildcard expression");
throw new IllegalArgumentException("Can't have leading wildcards on SOURCE");
}
leadingLiteral = regexAnalyzer.getLeadingLiteral();
} catch (JavaRegexAnalyzer.JavaRegexParseException e) {
log.error("Error parsing regex expression: " + term.getLiteral());
throw new IllegalArgumentException("Error parsing regex expression: " + term.getLiteral());
}
} else {
leadingLiteral = term.getLiteral();
}
// string class will literally append the word 'null' instead of nothing
if (leadingLiteral == null) {
leadingLiteral = "";
}
return leadingLiteral;
}
Aggregations