use of datawave.query.exceptions.DatawaveFatalQueryException in project datawave by NationalSecurityAgency.
the class BoundedRangeDetectionVisitor method visit.
@Override
public Object visit(ASTERNode node, Object data) {
try {
if (helper.getNonEventFields(config.getDatatypeFilter()).contains(JexlASTHelper.getIdentifier(node))) {
if (null != data) {
AtomicBoolean hasBounded = (AtomicBoolean) data;
hasBounded.set(true);
}
}
} catch (TableNotFoundException e) {
throw new DatawaveFatalQueryException("Cannot access metadata", e);
}
return false;
}
use of datawave.query.exceptions.DatawaveFatalQueryException in project datawave by NationalSecurityAgency.
the class ExpandMultiNormalizedTerms method expandNodeForNormalizers.
/**
* @param node
* @param data
* @return
*/
protected JexlNode expandNodeForNormalizers(JexlNode node, Object data) {
JexlNode nodeToReturn = node;
IdentifierOpLiteral op = JexlASTHelper.getIdentifierOpLiteral(node);
if (op != null) {
final String fieldName = op.deconstructIdentifier();
final Object literal = op.getLiteralValue();
// Get all of the indexed or normalized dataTypes for the field name
Set<Type<?>> dataTypes = Sets.newHashSet(config.getQueryFieldsDatatypes().get(fieldName));
dataTypes.addAll(config.getNormalizedFieldsDatatypes().get(fieldName));
// Catch the case of the user entering FIELD == null
if (!dataTypes.isEmpty() && null != literal) {
try {
String term = literal.toString();
Set<String> normalizedTerms = Sets.newHashSet();
// Build up a set of normalized terms using each normalizer
for (Type<?> normalizer : dataTypes) {
try {
if (node instanceof ASTNRNode || node instanceof ASTERNode) {
normalizedTerms.add(normalizer.normalizeRegex(term));
} else {
normalizedTerms.add(normalizer.normalize(term));
}
log.debug("normalizedTerms=" + normalizedTerms);
} catch (IpAddressNormalizer.Exception ipex) {
try {
String[] lowHi = ((IpAddressType) normalizer).normalizeCidrToRange(term);
// node was FIELD == 'cidr'
// change to FIELD >= low and FIELD <= hi
JexlNode geNode = JexlNodeFactory.buildNode(new ASTGENode(ParserTreeConstants.JJTGENODE), fieldName, lowHi[0]);
JexlNode leNode = JexlNodeFactory.buildNode(new ASTLENode(ParserTreeConstants.JJTLENODE), fieldName, lowHi[1]);
// now link em up
return BoundedRange.create(JexlNodeFactory.createAndNode(Arrays.asList(geNode, leNode)));
} catch (Exception ex) {
if (log.isTraceEnabled()) {
log.trace("Could not normalize " + term + " as cidr notation with: " + normalizer.getClass());
}
}
// this could be CIDR notation, attempt to expand the node to the cidr range
} catch (Exception ne) {
if (log.isTraceEnabled()) {
log.trace("Could not normalize " + term + " using " + normalizer.getClass());
}
}
}
if (normalizedTerms.size() > 1) {
nodeToReturn = JexlNodeFactory.createNodeTreeFromFieldValues(ContainerType.OR_NODE, node, node, fieldName, normalizedTerms);
} else if (1 == normalizedTerms.size()) {
// If there is only one term, we don't need to make an OR
nodeToReturn = JexlNodeFactory.buildUntypedNewLiteralNode(node, fieldName, normalizedTerms.iterator().next());
} else {
// If we couldn't map anything, return a copy
nodeToReturn = JexlNodeFactory.buildUntypedNewLiteralNode(node, fieldName, literal);
}
} catch (Exception e) {
QueryException qe = new QueryException(DatawaveErrorCode.NODE_EXPANSION_ERROR, e, MessageFormat.format("Node: {0}, Datatypes: {1}", PrintingVisitor.formattedQueryString(node), dataTypes));
log.error(qe);
throw new DatawaveFatalQueryException(qe);
}
}
}
return nodeToReturn;
}
use of datawave.query.exceptions.DatawaveFatalQueryException in project datawave by NationalSecurityAgency.
the class PushdownLargeFieldedListsVisitor method visit.
@Override
public Object visit(ASTOrNode node, Object data) {
ASTOrNode newNode = newInstanceOfType(node);
newNode.image = node.image;
Multimap<String, JexlNode> eqNodesByField = LinkedListMultimap.create();
Multimap<String, JexlNode> rangeNodesByField = LinkedListMultimap.create();
List<JexlNode> otherNodes = new ArrayList<>();
// first pull out sets of nodes by field
for (JexlNode childNode : children(node)) assignNodeByField(childNode, eqNodesByField, rangeNodesByField, otherNodes);
ArrayList<JexlNode> children = newArrayList();
// if "OTHER_NODES", then simply add the subset back into the children list
copyChildren(otherNodes, children, data);
SortedSet<String> fields = new TreeSet<>(eqNodesByField.keySet());
fields.addAll(rangeNodesByField.keySet());
for (String field : fields) {
// if fields is not specified or the current field is in fields it can be reduced
boolean canReduce = (this.fields == null || this.fields.contains(field));
Collection<JexlNode> eqNodes = eqNodesByField.get(field);
Collection<JexlNode> rangeNodes = rangeNodesByField.get(field);
// @formatter:off
if (canReduce && !Constants.ANY_FIELD.equals(field) && !Constants.NO_FIELD.equals(field) && (eqNodes.size() >= config.getMaxOrExpansionFstThreshold() || eqNodes.size() >= config.getMaxOrExpansionThreshold() || rangeNodes.size() >= config.getMaxOrRangeThreshold()) && isIndexed(field)) {
// @formatter:on
log.info("Pushing down large (" + eqNodes.size() + "|" + rangeNodes.size() + ") fielded list for " + field);
// turn the subset of children into a list of values
SortedSet<String> values = new TreeSet<>();
for (JexlNode child : eqNodes) {
values.add(String.valueOf(JexlASTHelper.getLiteralValue(child)));
}
List<JexlNode> markers = new ArrayList<>();
try {
// if we have an hdfs cache directory and if past the fst/list threshold, then create the fst/list and replace the list with an assignment
if (fstHdfsUri != null && (eqNodes.size() >= config.getMaxOrExpansionFstThreshold())) {
URI fstPath = createFst(values);
markers.add(ExceededOrThresholdMarkerJexlNode.createFromFstURI(field, fstPath));
eqNodes = null;
} else if (eqNodes.size() >= config.getMaxOrExpansionThreshold()) {
markers.add(ExceededOrThresholdMarkerJexlNode.createFromValues(field, values));
eqNodes = null;
}
// handle range nodes separately
if (rangeNodes.size() >= config.getMaxOrRangeThreshold()) {
TreeMap<Range, JexlNode> ranges = new TreeMap<>();
rangeNodes.forEach(rangeNode -> ranges.put(rangeNodeToRange(rangeNode), rangeNode));
int numBatches = getBatchCount(rangeNodes.size());
List<List<Map.Entry<Range, JexlNode>>> batchedRanges = batchRanges(ranges, numBatches);
rangeNodes = new ArrayList<>();
for (List<Map.Entry<Range, JexlNode>> rangeList : batchedRanges) {
if (rangeList.size() > 1) {
markers.add(ExceededOrThresholdMarkerJexlNode.createFromRanges(field, rangeList.stream().map(Map.Entry::getKey).collect(Collectors.toList())));
} else {
rangeNodes.add(rangeList.get(0).getValue());
}
}
}
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException | IOException e) {
QueryException qe = new QueryException(DatawaveErrorCode.LARGE_FIELDED_LIST_ERROR, e);
throw new DatawaveFatalQueryException(qe);
}
// add in any unused eq nodes
if (eqNodes != null) {
copyChildren(eqNodes, children, data);
}
// add in any unused range nodes
copyChildren(rangeNodes, children, data);
children.addAll(markers);
} else // else simply add the subset back into the children list
{
// recurse on the eq children in this subset
copyChildren(eqNodes, children, data);
track(data, field, eqNodes.size() - 1);
// recurse on the range children in this subset
copyChildren(rangeNodes, children, data);
int numBatches = getBatchCount(rangeNodes.size());
track(data, field, rangeNodes.size() - numBatches);
}
}
return children.size() == 1 ? Iterables.getOnlyElement(children) : children(newNode, children.toArray(new JexlNode[0]));
}
use of datawave.query.exceptions.DatawaveFatalQueryException in project datawave by NationalSecurityAgency.
the class IteratorBuildingVisitor method buildLiteralRange.
public static LiteralRange<?> buildLiteralRange(ASTERNode node) {
JavaRegexAnalyzer analyzer;
try {
analyzer = new JavaRegexAnalyzer(String.valueOf(JexlASTHelper.getLiteralValue(node)));
LiteralRange<String> range = new LiteralRange<>(JexlASTHelper.getIdentifier(node), NodeOperand.AND);
if (!analyzer.isLeadingLiteral()) {
// if the range is a leading wildcard we have to seek over the whole range since it's forward indexed only
range.updateLower(Constants.NULL_BYTE_STRING, true, node);
range.updateUpper(Constants.MAX_UNICODE_STRING, true, node);
} else {
range.updateLower(analyzer.getLeadingLiteral(), true, node);
if (analyzer.hasWildCard()) {
range.updateUpper(analyzer.getLeadingLiteral() + Constants.MAX_UNICODE_STRING, true, node);
} else {
range.updateUpper(analyzer.getLeadingLiteral(), true, node);
}
}
return range;
} catch (JavaRegexParseException | NoSuchElementException e) {
throw new DatawaveFatalQueryException(e);
}
}
use of datawave.query.exceptions.DatawaveFatalQueryException in project datawave by NationalSecurityAgency.
the class IteratorBuildingVisitor method ivarateList.
/**
* Build the iterator stack using the regex ivarator (field index caching regex iterator)
*
* @param rootNode
* the node that was processed to generated this builder
* @param sourceNode
* the source node derived from the root
* @param data
*/
public void ivarateList(JexlNode rootNode, JexlNode sourceNode, Object data) throws IOException {
IvaratorBuilder builder = null;
try {
String id = ExceededOrThresholdMarkerJexlNode.getId(sourceNode);
String field = JexlASTHelper.deconstructIdentifier(ExceededOrThresholdMarkerJexlNode.getField(sourceNode));
ExceededOrThresholdMarkerJexlNode.ExceededOrParams params = ExceededOrThresholdMarkerJexlNode.getParameters(sourceNode);
if (params.getRanges() != null && !params.getRanges().isEmpty()) {
IndexRangeIteratorBuilder rangeIterBuilder = new IndexRangeIteratorBuilder();
builder = rangeIterBuilder;
SortedSet<Range> ranges = params.getSortedAccumuloRanges();
rangeIterBuilder.setSubRanges(params.getSortedAccumuloRanges());
// cache these ranges for use during Jexl Evaluation
if (exceededOrEvaluationCache != null)
exceededOrEvaluationCache.put(id, ranges);
LiteralRange<?> fullRange = new LiteralRange<>(String.valueOf(ranges.first().getStartKey().getRow()), ranges.first().isStartKeyInclusive(), String.valueOf(ranges.last().getEndKey().getRow()), ranges.last().isEndKeyInclusive(), field, NodeOperand.AND);
rangeIterBuilder.setRange(fullRange);
} else {
IndexListIteratorBuilder listIterBuilder = new IndexListIteratorBuilder();
builder = listIterBuilder;
if (params.getValues() != null && !params.getValues().isEmpty()) {
Set<String> values = new TreeSet<>(params.getValues());
listIterBuilder.setValues(values);
// cache these values for use during Jexl Evaluation
if (exceededOrEvaluationCache != null)
exceededOrEvaluationCache.put(id, values);
} else if (params.getFstURI() != null) {
URI fstUri = new URI(params.getFstURI());
FST fst;
// only recompute this if not already set since this is potentially expensive
if (exceededOrEvaluationCache.containsKey(id)) {
fst = (FST) exceededOrEvaluationCache.get(id);
} else {
fst = DatawaveFieldIndexListIteratorJexl.FSTManager.get(new Path(fstUri), hdfsFileCompressionCodec, hdfsFileSystem.getFileSystem(fstUri));
}
listIterBuilder.setFst(fst);
// cache this fst for use during JexlEvaluation.
if (exceededOrEvaluationCache != null)
exceededOrEvaluationCache.put(id, fst);
}
// If this is actually negated, then this will be added to excludes. Do not negate in the ivarator
listIterBuilder.setNegated(false);
}
builder.setField(field);
} catch (IOException | URISyntaxException | NullPointerException e) {
QueryException qe = new QueryException(DatawaveErrorCode.UNPARSEABLE_EXCEEDED_OR_PARAMS, e, MessageFormat.format("Class: {0}", ExceededOrThresholdMarkerJexlNode.class.getSimpleName()));
throw new DatawaveFatalQueryException(qe);
}
builder.negateAsNeeded(data);
builder.forceDocumentBuild(!limitLookup && this.isQueryFullySatisfied);
ivarate(builder, rootNode, sourceNode, data);
}
Aggregations