Search in sources :

Example 26 with DatawaveFatalQueryException

use of datawave.query.exceptions.DatawaveFatalQueryException in project datawave by NationalSecurityAgency.

the class BoundedRangeDetectionVisitor method visit.

@Override
public Object visit(ASTERNode node, Object data) {
    try {
        if (helper.getNonEventFields(config.getDatatypeFilter()).contains(JexlASTHelper.getIdentifier(node))) {
            if (null != data) {
                AtomicBoolean hasBounded = (AtomicBoolean) data;
                hasBounded.set(true);
            }
        }
    } catch (TableNotFoundException e) {
        throw new DatawaveFatalQueryException("Cannot access metadata", e);
    }
    return false;
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException)

Example 27 with DatawaveFatalQueryException

use of datawave.query.exceptions.DatawaveFatalQueryException in project datawave by NationalSecurityAgency.

the class ExpandMultiNormalizedTerms method expandNodeForNormalizers.

/**
 * @param node
 * @param data
 * @return
 */
protected JexlNode expandNodeForNormalizers(JexlNode node, Object data) {
    JexlNode nodeToReturn = node;
    IdentifierOpLiteral op = JexlASTHelper.getIdentifierOpLiteral(node);
    if (op != null) {
        final String fieldName = op.deconstructIdentifier();
        final Object literal = op.getLiteralValue();
        // Get all of the indexed or normalized dataTypes for the field name
        Set<Type<?>> dataTypes = Sets.newHashSet(config.getQueryFieldsDatatypes().get(fieldName));
        dataTypes.addAll(config.getNormalizedFieldsDatatypes().get(fieldName));
        // Catch the case of the user entering FIELD == null
        if (!dataTypes.isEmpty() && null != literal) {
            try {
                String term = literal.toString();
                Set<String> normalizedTerms = Sets.newHashSet();
                // Build up a set of normalized terms using each normalizer
                for (Type<?> normalizer : dataTypes) {
                    try {
                        if (node instanceof ASTNRNode || node instanceof ASTERNode) {
                            normalizedTerms.add(normalizer.normalizeRegex(term));
                        } else {
                            normalizedTerms.add(normalizer.normalize(term));
                        }
                        log.debug("normalizedTerms=" + normalizedTerms);
                    } catch (IpAddressNormalizer.Exception ipex) {
                        try {
                            String[] lowHi = ((IpAddressType) normalizer).normalizeCidrToRange(term);
                            // node was FIELD == 'cidr'
                            // change to FIELD >= low and FIELD <= hi
                            JexlNode geNode = JexlNodeFactory.buildNode(new ASTGENode(ParserTreeConstants.JJTGENODE), fieldName, lowHi[0]);
                            JexlNode leNode = JexlNodeFactory.buildNode(new ASTLENode(ParserTreeConstants.JJTLENODE), fieldName, lowHi[1]);
                            // now link em up
                            return BoundedRange.create(JexlNodeFactory.createAndNode(Arrays.asList(geNode, leNode)));
                        } catch (Exception ex) {
                            if (log.isTraceEnabled()) {
                                log.trace("Could not normalize " + term + " as cidr notation with: " + normalizer.getClass());
                            }
                        }
                    // this could be CIDR notation, attempt to expand the node to the cidr range
                    } catch (Exception ne) {
                        if (log.isTraceEnabled()) {
                            log.trace("Could not normalize " + term + " using " + normalizer.getClass());
                        }
                    }
                }
                if (normalizedTerms.size() > 1) {
                    nodeToReturn = JexlNodeFactory.createNodeTreeFromFieldValues(ContainerType.OR_NODE, node, node, fieldName, normalizedTerms);
                } else if (1 == normalizedTerms.size()) {
                    // If there is only one term, we don't need to make an OR
                    nodeToReturn = JexlNodeFactory.buildUntypedNewLiteralNode(node, fieldName, normalizedTerms.iterator().next());
                } else {
                    // If we couldn't map anything, return a copy
                    nodeToReturn = JexlNodeFactory.buildUntypedNewLiteralNode(node, fieldName, literal);
                }
            } catch (Exception e) {
                QueryException qe = new QueryException(DatawaveErrorCode.NODE_EXPANSION_ERROR, e, MessageFormat.format("Node: {0}, Datatypes: {1}", PrintingVisitor.formattedQueryString(node), dataTypes));
                log.error(qe);
                throw new DatawaveFatalQueryException(qe);
            }
        }
    }
    return nodeToReturn;
}
Also used : ASTGENode(org.apache.commons.jexl2.parser.ASTGENode) ASTERNode(org.apache.commons.jexl2.parser.ASTERNode) IdentifierOpLiteral(datawave.query.jexl.JexlASTHelper.IdentifierOpLiteral) ASTNRNode(org.apache.commons.jexl2.parser.ASTNRNode) IpAddressNormalizer(datawave.data.normalizer.IpAddressNormalizer) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) QueryException(datawave.webservice.query.exception.QueryException) ContainerType(datawave.query.jexl.JexlNodeFactory.ContainerType) IpAddressType(datawave.data.type.IpAddressType) Type(datawave.data.type.Type) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) QueryException(datawave.webservice.query.exception.QueryException) ASTLENode(org.apache.commons.jexl2.parser.ASTLENode) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) JexlNode(org.apache.commons.jexl2.parser.JexlNode)

Example 28 with DatawaveFatalQueryException

use of datawave.query.exceptions.DatawaveFatalQueryException in project datawave by NationalSecurityAgency.

the class PushdownLargeFieldedListsVisitor method visit.

@Override
public Object visit(ASTOrNode node, Object data) {
    ASTOrNode newNode = newInstanceOfType(node);
    newNode.image = node.image;
    Multimap<String, JexlNode> eqNodesByField = LinkedListMultimap.create();
    Multimap<String, JexlNode> rangeNodesByField = LinkedListMultimap.create();
    List<JexlNode> otherNodes = new ArrayList<>();
    // first pull out sets of nodes by field
    for (JexlNode childNode : children(node)) assignNodeByField(childNode, eqNodesByField, rangeNodesByField, otherNodes);
    ArrayList<JexlNode> children = newArrayList();
    // if "OTHER_NODES", then simply add the subset back into the children list
    copyChildren(otherNodes, children, data);
    SortedSet<String> fields = new TreeSet<>(eqNodesByField.keySet());
    fields.addAll(rangeNodesByField.keySet());
    for (String field : fields) {
        // if fields is not specified or the current field is in fields it can be reduced
        boolean canReduce = (this.fields == null || this.fields.contains(field));
        Collection<JexlNode> eqNodes = eqNodesByField.get(field);
        Collection<JexlNode> rangeNodes = rangeNodesByField.get(field);
        // @formatter:off
        if (canReduce && !Constants.ANY_FIELD.equals(field) && !Constants.NO_FIELD.equals(field) && (eqNodes.size() >= config.getMaxOrExpansionFstThreshold() || eqNodes.size() >= config.getMaxOrExpansionThreshold() || rangeNodes.size() >= config.getMaxOrRangeThreshold()) && isIndexed(field)) {
            // @formatter:on
            log.info("Pushing down large (" + eqNodes.size() + "|" + rangeNodes.size() + ") fielded list for " + field);
            // turn the subset of children into a list of values
            SortedSet<String> values = new TreeSet<>();
            for (JexlNode child : eqNodes) {
                values.add(String.valueOf(JexlASTHelper.getLiteralValue(child)));
            }
            List<JexlNode> markers = new ArrayList<>();
            try {
                // if we have an hdfs cache directory and if past the fst/list threshold, then create the fst/list and replace the list with an assignment
                if (fstHdfsUri != null && (eqNodes.size() >= config.getMaxOrExpansionFstThreshold())) {
                    URI fstPath = createFst(values);
                    markers.add(ExceededOrThresholdMarkerJexlNode.createFromFstURI(field, fstPath));
                    eqNodes = null;
                } else if (eqNodes.size() >= config.getMaxOrExpansionThreshold()) {
                    markers.add(ExceededOrThresholdMarkerJexlNode.createFromValues(field, values));
                    eqNodes = null;
                }
                // handle range nodes separately
                if (rangeNodes.size() >= config.getMaxOrRangeThreshold()) {
                    TreeMap<Range, JexlNode> ranges = new TreeMap<>();
                    rangeNodes.forEach(rangeNode -> ranges.put(rangeNodeToRange(rangeNode), rangeNode));
                    int numBatches = getBatchCount(rangeNodes.size());
                    List<List<Map.Entry<Range, JexlNode>>> batchedRanges = batchRanges(ranges, numBatches);
                    rangeNodes = new ArrayList<>();
                    for (List<Map.Entry<Range, JexlNode>> rangeList : batchedRanges) {
                        if (rangeList.size() > 1) {
                            markers.add(ExceededOrThresholdMarkerJexlNode.createFromRanges(field, rangeList.stream().map(Map.Entry::getKey).collect(Collectors.toList())));
                        } else {
                            rangeNodes.add(rangeList.get(0).getValue());
                        }
                    }
                }
            } catch (ClassNotFoundException | InstantiationException | IllegalAccessException | IOException e) {
                QueryException qe = new QueryException(DatawaveErrorCode.LARGE_FIELDED_LIST_ERROR, e);
                throw new DatawaveFatalQueryException(qe);
            }
            // add in any unused eq nodes
            if (eqNodes != null) {
                copyChildren(eqNodes, children, data);
            }
            // add in any unused range nodes
            copyChildren(rangeNodes, children, data);
            children.addAll(markers);
        } else // else simply add the subset back into the children list
        {
            // recurse on the eq children in this subset
            copyChildren(eqNodes, children, data);
            track(data, field, eqNodes.size() - 1);
            // recurse on the range children in this subset
            copyChildren(rangeNodes, children, data);
            int numBatches = getBatchCount(rangeNodes.size());
            track(data, field, rangeNodes.size() - numBatches);
        }
    }
    return children.size() == 1 ? Iterables.getOnlyElement(children) : children(newNode, children.toArray(new JexlNode[0]));
}
Also used : ArrayList(java.util.ArrayList) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) URI(java.net.URI) TreeSet(java.util.TreeSet) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) ArrayList(java.util.ArrayList) List(java.util.List) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ASTOrNode(org.apache.commons.jexl2.parser.ASTOrNode) IOException(java.io.IOException) LiteralRange(datawave.query.jexl.LiteralRange) BoundedRange(datawave.query.jexl.nodes.BoundedRange) Range(org.apache.accumulo.core.data.Range) TreeMap(java.util.TreeMap) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) QueryException(datawave.webservice.query.exception.QueryException) JexlNode(org.apache.commons.jexl2.parser.JexlNode) ExceededValueThresholdMarkerJexlNode(datawave.query.jexl.nodes.ExceededValueThresholdMarkerJexlNode) ExceededOrThresholdMarkerJexlNode(datawave.query.jexl.nodes.ExceededOrThresholdMarkerJexlNode) Map(java.util.Map) TreeMap(java.util.TreeMap)

Example 29 with DatawaveFatalQueryException

use of datawave.query.exceptions.DatawaveFatalQueryException in project datawave by NationalSecurityAgency.

the class IteratorBuildingVisitor method buildLiteralRange.

public static LiteralRange<?> buildLiteralRange(ASTERNode node) {
    JavaRegexAnalyzer analyzer;
    try {
        analyzer = new JavaRegexAnalyzer(String.valueOf(JexlASTHelper.getLiteralValue(node)));
        LiteralRange<String> range = new LiteralRange<>(JexlASTHelper.getIdentifier(node), NodeOperand.AND);
        if (!analyzer.isLeadingLiteral()) {
            // if the range is a leading wildcard we have to seek over the whole range since it's forward indexed only
            range.updateLower(Constants.NULL_BYTE_STRING, true, node);
            range.updateUpper(Constants.MAX_UNICODE_STRING, true, node);
        } else {
            range.updateLower(analyzer.getLeadingLiteral(), true, node);
            if (analyzer.hasWildCard()) {
                range.updateUpper(analyzer.getLeadingLiteral() + Constants.MAX_UNICODE_STRING, true, node);
            } else {
                range.updateUpper(analyzer.getLeadingLiteral(), true, node);
            }
        }
        return range;
    } catch (JavaRegexParseException | NoSuchElementException e) {
        throw new DatawaveFatalQueryException(e);
    }
}
Also used : DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) JavaRegexParseException(datawave.query.parser.JavaRegexAnalyzer.JavaRegexParseException) JavaRegexAnalyzer(datawave.query.parser.JavaRegexAnalyzer) LiteralRange(datawave.query.jexl.LiteralRange) NoSuchElementException(java.util.NoSuchElementException)

Example 30 with DatawaveFatalQueryException

use of datawave.query.exceptions.DatawaveFatalQueryException in project datawave by NationalSecurityAgency.

the class IteratorBuildingVisitor method ivarateList.

/**
 * Build the iterator stack using the regex ivarator (field index caching regex iterator)
 *
 * @param rootNode
 *            the node that was processed to generated this builder
 * @param sourceNode
 *            the source node derived from the root
 * @param data
 */
public void ivarateList(JexlNode rootNode, JexlNode sourceNode, Object data) throws IOException {
    IvaratorBuilder builder = null;
    try {
        String id = ExceededOrThresholdMarkerJexlNode.getId(sourceNode);
        String field = JexlASTHelper.deconstructIdentifier(ExceededOrThresholdMarkerJexlNode.getField(sourceNode));
        ExceededOrThresholdMarkerJexlNode.ExceededOrParams params = ExceededOrThresholdMarkerJexlNode.getParameters(sourceNode);
        if (params.getRanges() != null && !params.getRanges().isEmpty()) {
            IndexRangeIteratorBuilder rangeIterBuilder = new IndexRangeIteratorBuilder();
            builder = rangeIterBuilder;
            SortedSet<Range> ranges = params.getSortedAccumuloRanges();
            rangeIterBuilder.setSubRanges(params.getSortedAccumuloRanges());
            // cache these ranges for use during Jexl Evaluation
            if (exceededOrEvaluationCache != null)
                exceededOrEvaluationCache.put(id, ranges);
            LiteralRange<?> fullRange = new LiteralRange<>(String.valueOf(ranges.first().getStartKey().getRow()), ranges.first().isStartKeyInclusive(), String.valueOf(ranges.last().getEndKey().getRow()), ranges.last().isEndKeyInclusive(), field, NodeOperand.AND);
            rangeIterBuilder.setRange(fullRange);
        } else {
            IndexListIteratorBuilder listIterBuilder = new IndexListIteratorBuilder();
            builder = listIterBuilder;
            if (params.getValues() != null && !params.getValues().isEmpty()) {
                Set<String> values = new TreeSet<>(params.getValues());
                listIterBuilder.setValues(values);
                // cache these values for use during Jexl Evaluation
                if (exceededOrEvaluationCache != null)
                    exceededOrEvaluationCache.put(id, values);
            } else if (params.getFstURI() != null) {
                URI fstUri = new URI(params.getFstURI());
                FST fst;
                // only recompute this if not already set since this is potentially expensive
                if (exceededOrEvaluationCache.containsKey(id)) {
                    fst = (FST) exceededOrEvaluationCache.get(id);
                } else {
                    fst = DatawaveFieldIndexListIteratorJexl.FSTManager.get(new Path(fstUri), hdfsFileCompressionCodec, hdfsFileSystem.getFileSystem(fstUri));
                }
                listIterBuilder.setFst(fst);
                // cache this fst for use during JexlEvaluation.
                if (exceededOrEvaluationCache != null)
                    exceededOrEvaluationCache.put(id, fst);
            }
            // If this is actually negated, then this will be added to excludes. Do not negate in the ivarator
            listIterBuilder.setNegated(false);
        }
        builder.setField(field);
    } catch (IOException | URISyntaxException | NullPointerException e) {
        QueryException qe = new QueryException(DatawaveErrorCode.UNPARSEABLE_EXCEEDED_OR_PARAMS, e, MessageFormat.format("Class: {0}", ExceededOrThresholdMarkerJexlNode.class.getSimpleName()));
        throw new DatawaveFatalQueryException(qe);
    }
    builder.negateAsNeeded(data);
    builder.forceDocumentBuild(!limitLookup && this.isQueryFullySatisfied);
    ivarate(builder, rootNode, sourceNode, data);
}
Also used : Path(org.apache.hadoop.fs.Path) FST(org.apache.lucene.util.fst.FST) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) LiteralRange(datawave.query.jexl.LiteralRange) Range(org.apache.accumulo.core.data.Range) IvaratorBuilder(datawave.query.iterator.builder.IvaratorBuilder) URI(java.net.URI) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) QueryException(datawave.webservice.query.exception.QueryException) TreeSet(java.util.TreeSet) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) ExceededOrThresholdMarkerJexlNode(datawave.query.jexl.nodes.ExceededOrThresholdMarkerJexlNode) IndexListIteratorBuilder(datawave.query.iterator.builder.IndexListIteratorBuilder) IndexRangeIteratorBuilder(datawave.query.iterator.builder.IndexRangeIteratorBuilder) LiteralRange(datawave.query.jexl.LiteralRange)

Aggregations

DatawaveFatalQueryException (datawave.query.exceptions.DatawaveFatalQueryException)51 QueryException (datawave.webservice.query.exception.QueryException)32 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)16 PreConditionFailedQueryException (datawave.webservice.query.exception.PreConditionFailedQueryException)14 IOException (java.io.IOException)12 LiteralRange (datawave.query.jexl.LiteralRange)11 BadRequestQueryException (datawave.webservice.query.exception.BadRequestQueryException)11 InvalidQueryException (datawave.query.exceptions.InvalidQueryException)10 DoNotPerformOptimizedQueryException (datawave.query.exceptions.DoNotPerformOptimizedQueryException)9 NotFoundQueryException (datawave.webservice.query.exception.NotFoundQueryException)9 Range (org.apache.accumulo.core.data.Range)9 DatawaveQueryException (datawave.query.exceptions.DatawaveQueryException)8 Key (org.apache.accumulo.core.data.Key)7 JexlNode (org.apache.commons.jexl2.parser.JexlNode)7 TraceStopwatch (datawave.util.time.TraceStopwatch)6 Entry (java.util.Map.Entry)5 Type (datawave.data.type.Type)4 HashSet (java.util.HashSet)4 Set (java.util.Set)4 PartialKey (org.apache.accumulo.core.data.PartialKey)4