Search in sources :

Example 1 with IndexRangeIteratorBuilder

use of datawave.query.iterator.builder.IndexRangeIteratorBuilder in project datawave by NationalSecurityAgency.

the class IteratorBuildingVisitor method visit.

@Override
public Object visit(ASTAndNode and, Object data) {
    QueryPropertyMarker.Instance instance = QueryPropertyMarker.findInstance(and);
    if (instance.isType(ExceededOrThresholdMarkerJexlNode.class)) {
        JexlNode source = instance.getSource();
        // Ivarator to get the job done
        if (source instanceof ASTAndNode) {
            try {
                ivarateList(and, source, data);
            } catch (IOException ioe) {
                throw new DatawaveFatalQueryException(ioe);
            }
        } else {
            QueryException qe = new QueryException(DatawaveErrorCode.UNEXPECTED_SOURCE_NODE, MessageFormat.format("{0}", "Limited ExceededOrThresholdMarkerJexlNode"));
            throw new DatawaveFatalQueryException(qe);
        }
    } else if (data instanceof IndexRangeIteratorBuilder) {
        // index checking has already been done, otherwise we would not have
        // an "ExceededValueThresholdMarker"
        // hence the "IndexAgnostic" method can be used here
        LiteralRange range = JexlASTHelper.findRange().recursively().getRange(and);
        if (range == null) {
            QueryException qe = new QueryException(DatawaveErrorCode.MULTIPLE_RANGES_IN_EXPRESSION);
            throw new DatawaveFatalQueryException(qe);
        }
        ((IndexRangeIteratorBuilder) data).setRange(range);
    } else if (instance.isType(ExceededValueThresholdMarkerJexlNode.class)) {
        // if the parent is our ExceededValueThreshold marker, then use an
        // Ivarator to get the job done unless we don't have to
        JexlNode source = instance.getSource();
        String identifier = null;
        LiteralRange<?> range = null;
        boolean negatedLocal = false;
        if (source instanceof ASTAndNode) {
            range = buildLiteralRange(source, null);
            identifier = range.getFieldName();
        } else {
            if (source instanceof ASTNRNode || source instanceof ASTNotNode)
                negatedLocal = true;
            range = buildLiteralRange(source);
            identifier = JexlASTHelper.getIdentifier(source);
        }
        boolean negatedOverall = negatedLocal;
        if (data instanceof AbstractIteratorBuilder) {
            AbstractIteratorBuilder oib = (AbstractIteratorBuilder) data;
            if (oib.isInANot()) {
                negatedOverall = !negatedOverall;
            }
        }
        // or the field is index only but not in the term frequencies, then we must ivarate
        if (!limitLookup || !allowTermFrequencyLookup || (indexOnlyFields.contains(identifier) && !termFrequencyFields.contains(identifier))) {
            if (source instanceof ASTAndNode) {
                try {
                    List<ASTFunctionNode> functionNodes = JexlASTHelper.getFunctionNodes(source).stream().filter(node -> JexlFunctionArgumentDescriptorFactory.F.getArgumentDescriptor(node).allowIvaratorFiltering()).collect(Collectors.toList());
                    if (functionNodes.isEmpty()) {
                        ivarateRange(and, source, data);
                    } else {
                        ivarateFilter(and, source, data, functionNodes);
                    }
                } catch (IOException ioe) {
                    throw new DatawaveFatalQueryException("Unable to ivarate", ioe);
                }
            } else if (source instanceof ASTERNode || source instanceof ASTNRNode) {
                try {
                    ivarateRegex(and, source, data);
                } catch (IOException ioe) {
                    throw new DatawaveFatalQueryException("Unable to ivarate", ioe);
                }
            } else {
                QueryException qe = new QueryException(DatawaveErrorCode.UNEXPECTED_SOURCE_NODE, MessageFormat.format("{0}", "ExceededValueThresholdMarkerJexlNode"));
                throw new DatawaveFatalQueryException(qe);
            }
        } else {
            NestedIterator<Key> nested = null;
            if (termFrequencyFields.contains(identifier)) {
                nested = buildExceededFromTermFrequency(identifier, and, source, range, data);
            } else {
                /**
                 * This is okay since 1) We are doc specific 2) We are not index only or tf 3) Therefore, we must evaluate against the document for this
                 * expression 4) Return a stubbed range in case we have a disjunction that breaks the current doc.
                 */
                if (!limitOverride && !negatedOverall)
                    nested = createExceededCheck(identifier, range, and);
            }
            if (null != nested && null != data && data instanceof AbstractIteratorBuilder) {
                AbstractIteratorBuilder iterators = (AbstractIteratorBuilder) data;
                if (negatedLocal) {
                    iterators.addExclude(nested);
                } else {
                    iterators.addInclude(nested);
                }
            } else {
                if (isQueryFullySatisfied == true) {
                    log.warn("Determined that isQueryFullySatisfied should be false, but it was not preset to false in the SatisfactionVisitor");
                }
                return nested;
            }
        }
    } else if (null != data && data instanceof AndIteratorBuilder) {
        and.childrenAccept(this, data);
    } else {
        // Create an AndIterator and recursively add the children
        AbstractIteratorBuilder andItr = new AndIteratorBuilder();
        andItr.negateAsNeeded(data);
        and.childrenAccept(this, andItr);
        // If there is no parent
        if (data == null) {
            // Make this AndIterator the root node
            if (!andItr.includes().isEmpty()) {
                root = andItr.build();
            }
        } else {
            // Otherwise, add this AndIterator to its parent
            AbstractIteratorBuilder parent = (AbstractIteratorBuilder) data;
            if (!andItr.includes().isEmpty()) {
                parent.addInclude(andItr.build());
            }
        }
        if (log.isTraceEnabled()) {
            log.trace("ASTAndNode visit: pretty formatting of:\nparent.includes:" + formatIncludesOrExcludes(andItr.includes()) + "\nparent.excludes:" + formatIncludesOrExcludes(andItr.excludes()));
        }
    }
    return null;
}
Also used : Arrays(java.util.Arrays) Text(org.apache.hadoop.io.Text) ASTSizeMethod(org.apache.commons.jexl2.parser.ASTSizeMethod) AbstractIteratorBuilder(datawave.query.iterator.builder.AbstractIteratorBuilder) JavaRegexParseException(datawave.query.parser.JavaRegexAnalyzer.JavaRegexParseException) IvaratorCacheDir(datawave.query.iterator.ivarator.IvaratorCacheDir) ASTNRNode(org.apache.commons.jexl2.parser.ASTNRNode) Map(java.util.Map) ExceededValueThresholdMarkerJexlNode(datawave.query.jexl.nodes.ExceededValueThresholdMarkerJexlNode) AttributeFactory(datawave.query.attributes.AttributeFactory) JexlFunctionArgumentDescriptorFactory(datawave.query.jexl.functions.JexlFunctionArgumentDescriptorFactory) ExceededOrThresholdMarkerJexlNode(datawave.query.jexl.nodes.ExceededOrThresholdMarkerJexlNode) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) TimeFilter(datawave.query.predicate.TimeFilter) Set(java.util.Set) IndexRangeIteratorBuilder(datawave.query.iterator.builder.IndexRangeIteratorBuilder) ASTNENode(org.apache.commons.jexl2.parser.ASTNENode) DefaultArithmetic(datawave.query.jexl.DefaultArithmetic) ASTOrNode(org.apache.commons.jexl2.parser.ASTOrNode) Predicate(com.google.common.base.Predicate) AndIteratorBuilder(datawave.query.iterator.builder.AndIteratorBuilder) JexlContext(org.apache.commons.jexl2.JexlContext) DatawaveFieldIndexListIteratorJexl(datawave.core.iterators.DatawaveFieldIndexListIteratorJexl) TermFrequencyAggregator(datawave.query.jexl.functions.TermFrequencyAggregator) FileSortedSet(datawave.query.util.sortedset.FileSortedSet) IteratorBuilder(datawave.query.iterator.builder.IteratorBuilder) ASTEQNode(org.apache.commons.jexl2.parser.ASTEQNode) NestedIterator(datawave.query.iterator.NestedIterator) JexlASTHelper(datawave.query.jexl.JexlASTHelper) IndexIteratorBuilder(datawave.query.iterator.builder.IndexIteratorBuilder) JexlArithmetic(org.apache.commons.jexl2.JexlArithmetic) ASTMethodNode(org.apache.commons.jexl2.parser.ASTMethodNode) IteratorToSortedKeyValueIterator(datawave.query.util.IteratorToSortedKeyValueIterator) JexlNode(org.apache.commons.jexl2.parser.JexlNode) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) TreeSet(java.util.TreeSet) SourceFactory(datawave.query.iterator.SourceFactory) ArrayList(java.util.ArrayList) NodeOperand(datawave.query.jexl.LiteralRange.NodeOperand) Lists(com.google.common.collect.Lists) LiteralRange(datawave.query.jexl.LiteralRange) Key(org.apache.accumulo.core.data.Key) IndexFilterIteratorBuilder(datawave.query.iterator.builder.IndexFilterIteratorBuilder) OrIteratorBuilder(datawave.query.iterator.builder.OrIteratorBuilder) Predicates(com.google.common.base.Predicates) EventFieldIterator(datawave.query.iterator.EventFieldIterator) IndexRegexIteratorBuilder(datawave.query.iterator.builder.IndexRegexIteratorBuilder) FileSystemCache(datawave.core.iterators.filesystem.FileSystemCache) FieldIndexAggregator(datawave.query.jexl.functions.FieldIndexAggregator) QueryPropertyMarker(datawave.query.jexl.nodes.QueryPropertyMarker) TermFrequencyIndexBuilder(datawave.query.iterator.builder.TermFrequencyIndexBuilder) IOException(java.io.IOException) IndexListIteratorBuilder(datawave.query.iterator.builder.IndexListIteratorBuilder) DatawaveErrorCode(datawave.webservice.query.exception.DatawaveErrorCode) QueryLock(datawave.core.iterators.querylock.QueryLock) OrIterator(datawave.query.iterator.logic.OrIterator) NegationBuilder(datawave.query.iterator.builder.NegationBuilder) Range(org.apache.accumulo.core.data.Range) JavaRegexAnalyzer(datawave.query.parser.JavaRegexAnalyzer) Filter(datawave.query.predicate.Filter) QueryException(datawave.webservice.query.exception.QueryException) ASTEvaluationOnly(org.apache.commons.jexl2.parser.ASTEvaluationOnly) FST(org.apache.lucene.util.fst.FST) PartialKey(org.apache.accumulo.core.data.PartialKey) QuerySpanCollector(datawave.query.iterator.profile.QuerySpanCollector) IvaratorBuilder(datawave.query.iterator.builder.IvaratorBuilder) ASTIdentifier(org.apache.commons.jexl2.parser.ASTIdentifier) IteratorEnvironment(org.apache.accumulo.core.iterators.IteratorEnvironment) Script(org.apache.commons.jexl2.Script) SortedSet(java.util.SortedSet) DatawaveJexlEngine(datawave.query.jexl.DatawaveJexlEngine) URISyntaxException(java.net.URISyntaxException) SourceManager(datawave.query.iterator.SourceManager) Logger(org.apache.log4j.Logger) ValueTuple(datawave.query.attributes.ValueTuple) EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) ASTNotNode(org.apache.commons.jexl2.parser.ASTNotNode) ASTStringLiteral(org.apache.commons.jexl2.parser.ASTStringLiteral) Path(org.apache.hadoop.fs.Path) Value(org.apache.accumulo.core.data.Value) URI(java.net.URI) UniversalSet(datawave.util.UniversalSet) TypeMetadata(datawave.query.util.TypeMetadata) ASTDelayedPredicate(org.apache.commons.jexl2.parser.ASTDelayedPredicate) Collection(java.util.Collection) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) List(java.util.List) IdentifierOpLiteral(datawave.query.jexl.JexlASTHelper.IdentifierOpLiteral) Entry(java.util.Map.Entry) IvaratorCacheDirConfig(datawave.query.iterator.ivarator.IvaratorCacheDirConfig) EventDataQueryExpressionFilter(datawave.query.predicate.EventDataQueryExpressionFilter) JexlNodes.children(org.apache.commons.jexl2.parser.JexlNodes.children) GenericObjectPool(org.apache.commons.pool.impl.GenericObjectPool) ASTReferenceExpression(org.apache.commons.jexl2.parser.ASTReferenceExpression) ASTNumberLiteral(org.apache.commons.jexl2.parser.ASTNumberLiteral) NoOpType(datawave.data.type.NoOpType) IdentityAggregator(datawave.query.jexl.functions.IdentityAggregator) MessageFormat(java.text.MessageFormat) HashSet(java.util.HashSet) ASTERNode(org.apache.commons.jexl2.parser.ASTERNode) DatawaveJexlContext(datawave.query.jexl.DatawaveJexlContext) ASTAndNode(org.apache.commons.jexl2.parser.ASTAndNode) NoSuchElementException(java.util.NoSuchElementException) ParserTreeConstants(org.apache.commons.jexl2.parser.ParserTreeConstants) MalformedURLException(java.net.MalformedURLException) Maps(com.google.common.collect.Maps) Constants(datawave.query.Constants) ArithmeticJexlEngines(datawave.query.jexl.ArithmeticJexlEngines) ASTFunctionNode(org.apache.commons.jexl2.parser.ASTFunctionNode) CompositeMetadata(datawave.query.composite.CompositeMetadata) EventFieldAggregator(datawave.query.jexl.functions.EventFieldAggregator) Collections(java.util.Collections) ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) ASTReference(org.apache.commons.jexl2.parser.ASTReference) ASTNotNode(org.apache.commons.jexl2.parser.ASTNotNode) ASTERNode(org.apache.commons.jexl2.parser.ASTERNode) ASTNRNode(org.apache.commons.jexl2.parser.ASTNRNode) QueryPropertyMarker(datawave.query.jexl.nodes.QueryPropertyMarker) IOException(java.io.IOException) AndIteratorBuilder(datawave.query.iterator.builder.AndIteratorBuilder) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) QueryException(datawave.webservice.query.exception.QueryException) ASTFunctionNode(org.apache.commons.jexl2.parser.ASTFunctionNode) AbstractIteratorBuilder(datawave.query.iterator.builder.AbstractIteratorBuilder) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) ExceededValueThresholdMarkerJexlNode(datawave.query.jexl.nodes.ExceededValueThresholdMarkerJexlNode) ExceededOrThresholdMarkerJexlNode(datawave.query.jexl.nodes.ExceededOrThresholdMarkerJexlNode) JexlNode(org.apache.commons.jexl2.parser.JexlNode) IndexRangeIteratorBuilder(datawave.query.iterator.builder.IndexRangeIteratorBuilder) LiteralRange(datawave.query.jexl.LiteralRange) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) ASTAndNode(org.apache.commons.jexl2.parser.ASTAndNode)

Example 2 with IndexRangeIteratorBuilder

use of datawave.query.iterator.builder.IndexRangeIteratorBuilder in project datawave by NationalSecurityAgency.

the class IteratorBuildingVisitor method ivarateRange.

/**
 * Build the iterator stack using the regex ivarator (field index caching regex iterator)
 *
 * @param rootNode
 *            the node that was processed to generated this builder
 * @param sourceNode
 *            the source node derived from the root
 * @param data
 */
public void ivarateRange(JexlNode rootNode, JexlNode sourceNode, Object data) throws IOException {
    IndexRangeIteratorBuilder builder = new IndexRangeIteratorBuilder();
    builder.negateAsNeeded(data);
    // hence the "IndexAgnostic" method can be used here
    if (sourceNode instanceof ASTAndNode) {
        LiteralRange range = JexlASTHelper.findRange().recursively().getRange(sourceNode);
        if (range == null) {
            QueryException qe = new QueryException(DatawaveErrorCode.MULTIPLE_RANGES_IN_EXPRESSION);
            throw new DatawaveFatalQueryException(qe);
        }
        builder.setRange(range);
    } else {
        QueryException qe = new QueryException(DatawaveErrorCode.UNEXPECTED_SOURCE_NODE, MessageFormat.format("{0}", "ExceededValueThresholdMarkerJexlNode"));
        throw new DatawaveFatalQueryException(qe);
    }
    builder.forceDocumentBuild(!limitLookup && this.isQueryFullySatisfied);
    ivarate(builder, rootNode, sourceNode, data);
}
Also used : DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) QueryException(datawave.webservice.query.exception.QueryException) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) IndexRangeIteratorBuilder(datawave.query.iterator.builder.IndexRangeIteratorBuilder) LiteralRange(datawave.query.jexl.LiteralRange) ASTAndNode(org.apache.commons.jexl2.parser.ASTAndNode)

Example 3 with IndexRangeIteratorBuilder

use of datawave.query.iterator.builder.IndexRangeIteratorBuilder in project datawave by NationalSecurityAgency.

the class IteratorBuildingVisitor method ivarateList.

/**
 * Build the iterator stack using the regex ivarator (field index caching regex iterator)
 *
 * @param rootNode
 *            the node that was processed to generated this builder
 * @param sourceNode
 *            the source node derived from the root
 * @param data
 */
public void ivarateList(JexlNode rootNode, JexlNode sourceNode, Object data) throws IOException {
    IvaratorBuilder builder = null;
    try {
        String id = ExceededOrThresholdMarkerJexlNode.getId(sourceNode);
        String field = JexlASTHelper.deconstructIdentifier(ExceededOrThresholdMarkerJexlNode.getField(sourceNode));
        ExceededOrThresholdMarkerJexlNode.ExceededOrParams params = ExceededOrThresholdMarkerJexlNode.getParameters(sourceNode);
        if (params.getRanges() != null && !params.getRanges().isEmpty()) {
            IndexRangeIteratorBuilder rangeIterBuilder = new IndexRangeIteratorBuilder();
            builder = rangeIterBuilder;
            SortedSet<Range> ranges = params.getSortedAccumuloRanges();
            rangeIterBuilder.setSubRanges(params.getSortedAccumuloRanges());
            // cache these ranges for use during Jexl Evaluation
            if (exceededOrEvaluationCache != null)
                exceededOrEvaluationCache.put(id, ranges);
            LiteralRange<?> fullRange = new LiteralRange<>(String.valueOf(ranges.first().getStartKey().getRow()), ranges.first().isStartKeyInclusive(), String.valueOf(ranges.last().getEndKey().getRow()), ranges.last().isEndKeyInclusive(), field, NodeOperand.AND);
            rangeIterBuilder.setRange(fullRange);
        } else {
            IndexListIteratorBuilder listIterBuilder = new IndexListIteratorBuilder();
            builder = listIterBuilder;
            if (params.getValues() != null && !params.getValues().isEmpty()) {
                Set<String> values = new TreeSet<>(params.getValues());
                listIterBuilder.setValues(values);
                // cache these values for use during Jexl Evaluation
                if (exceededOrEvaluationCache != null)
                    exceededOrEvaluationCache.put(id, values);
            } else if (params.getFstURI() != null) {
                URI fstUri = new URI(params.getFstURI());
                FST fst;
                // only recompute this if not already set since this is potentially expensive
                if (exceededOrEvaluationCache.containsKey(id)) {
                    fst = (FST) exceededOrEvaluationCache.get(id);
                } else {
                    fst = DatawaveFieldIndexListIteratorJexl.FSTManager.get(new Path(fstUri), hdfsFileCompressionCodec, hdfsFileSystem.getFileSystem(fstUri));
                }
                listIterBuilder.setFst(fst);
                // cache this fst for use during JexlEvaluation.
                if (exceededOrEvaluationCache != null)
                    exceededOrEvaluationCache.put(id, fst);
            }
            // If this is actually negated, then this will be added to excludes. Do not negate in the ivarator
            listIterBuilder.setNegated(false);
        }
        builder.setField(field);
    } catch (IOException | URISyntaxException | NullPointerException e) {
        QueryException qe = new QueryException(DatawaveErrorCode.UNPARSEABLE_EXCEEDED_OR_PARAMS, e, MessageFormat.format("Class: {0}", ExceededOrThresholdMarkerJexlNode.class.getSimpleName()));
        throw new DatawaveFatalQueryException(qe);
    }
    builder.negateAsNeeded(data);
    builder.forceDocumentBuild(!limitLookup && this.isQueryFullySatisfied);
    ivarate(builder, rootNode, sourceNode, data);
}
Also used : Path(org.apache.hadoop.fs.Path) FST(org.apache.lucene.util.fst.FST) IOException(java.io.IOException) URISyntaxException(java.net.URISyntaxException) LiteralRange(datawave.query.jexl.LiteralRange) Range(org.apache.accumulo.core.data.Range) IvaratorBuilder(datawave.query.iterator.builder.IvaratorBuilder) URI(java.net.URI) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) QueryException(datawave.webservice.query.exception.QueryException) TreeSet(java.util.TreeSet) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) ExceededOrThresholdMarkerJexlNode(datawave.query.jexl.nodes.ExceededOrThresholdMarkerJexlNode) IndexListIteratorBuilder(datawave.query.iterator.builder.IndexListIteratorBuilder) IndexRangeIteratorBuilder(datawave.query.iterator.builder.IndexRangeIteratorBuilder) LiteralRange(datawave.query.jexl.LiteralRange)

Aggregations

DatawaveFatalQueryException (datawave.query.exceptions.DatawaveFatalQueryException)3 IndexRangeIteratorBuilder (datawave.query.iterator.builder.IndexRangeIteratorBuilder)3 LiteralRange (datawave.query.jexl.LiteralRange)3 QueryException (datawave.webservice.query.exception.QueryException)3 IndexListIteratorBuilder (datawave.query.iterator.builder.IndexListIteratorBuilder)2 IvaratorBuilder (datawave.query.iterator.builder.IvaratorBuilder)2 ASTAndNode (org.apache.commons.jexl2.parser.ASTAndNode)2 Predicate (com.google.common.base.Predicate)1 Predicates (com.google.common.base.Predicates)1 Lists (com.google.common.collect.Lists)1 Maps (com.google.common.collect.Maps)1 Sets (com.google.common.collect.Sets)1 DatawaveFieldIndexListIteratorJexl (datawave.core.iterators.DatawaveFieldIndexListIteratorJexl)1 FileSystemCache (datawave.core.iterators.filesystem.FileSystemCache)1 QueryLock (datawave.core.iterators.querylock.QueryLock)1 NoOpType (datawave.data.type.NoOpType)1 Constants (datawave.query.Constants)1 AttributeFactory (datawave.query.attributes.AttributeFactory)1 ValueTuple (datawave.query.attributes.ValueTuple)1 CompositeMetadata (datawave.query.composite.CompositeMetadata)1