Search in sources :

Example 1 with KeyAdjudicator

use of datawave.query.jexl.functions.KeyAdjudicator in project datawave by NationalSecurityAgency.

the class QueryIterator method seek.

@Override
public void seek(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
    // preserve the original range for use with the Final Document tracking iterator because it is placed after the ResultCountingIterator
    // so the FinalDocumentTracking iterator needs the start key with the count already appended
    originalRange = range;
    getActiveQueryLog().get(getQueryId()).beginCall(this.originalRange, ActiveQuery.CallType.SEEK);
    Span span = Trace.start("QueryIterator.seek");
    if (!this.isIncludeGroupingContext() && (this.query.contains("grouping:") || this.query.contains("matchesInGroup") || this.query.contains("MatchesInGroup") || this.query.contains("atomValuesMatch"))) {
        this.setIncludeGroupingContext(true);
        this.groupingContextAddedByMe = true;
    } else {
        this.groupingContextAddedByMe = false;
    }
    try {
        if (log.isDebugEnabled()) {
            log.debug("Seek range: " + range + " " + query);
        }
        this.range = range;
        // determine whether this is a teardown/rebuild range
        long resultCount = 0;
        if (!range.isStartKeyInclusive()) {
            // see if we can fail fast. If we were rebuilt with the FinalDocument key, then we are already completely done
            if (collectTimingDetails && FinalDocumentTrackingIterator.isFinalDocumentKey(range.getStartKey())) {
                this.seekKeySource = new EmptyTreeIterable();
                this.serializedDocuments = EmptyIterator.emptyIterator();
                prepareKeyValue(span);
                return;
            }
            // see if we have a count in the cf
            Key startKey = range.getStartKey();
            String[] parts = StringUtils.split(startKey.getColumnFamily().toString(), '\0');
            if (parts.length == 3) {
                resultCount = NumericalEncoder.decode(parts[0]).longValue();
                // remove the count from the range
                startKey = new Key(startKey.getRow(), new Text(parts[1] + '\0' + parts[2]), startKey.getColumnQualifier(), startKey.getColumnVisibility(), startKey.getTimestamp());
                this.range = range = new Range(startKey, range.isStartKeyInclusive(), range.getEndKey(), range.isEndKeyInclusive());
            }
        }
        // determine whether this is a document specific range
        Range documentRange = isDocumentSpecificRange(range) ? range : null;
        // is done
        if (documentRange != null && !documentRange.isStartKeyInclusive()) {
            if (log.isTraceEnabled()) {
                log.trace("Received non-inclusive event specific range: " + documentRange);
            }
            if (gatherTimingDetails()) {
                this.seekKeySource = new EvaluationTrackingNestedIterator(QuerySpan.Stage.EmptyTree, trackingSpan, new EmptyTreeIterable(), myEnvironment);
            } else {
                this.seekKeySource = new EmptyTreeIterable();
            }
        } else // if the Range is for a single document and the query doesn't reference any index-only or tokenized fields
        if (documentRange != null && (!this.isContainsIndexOnlyTerms() && this.getTermFrequencyFields().isEmpty() && !super.mustUseFieldIndex)) {
            if (log.isTraceEnabled()) {
                log.trace("Received event specific range: " + documentRange);
            }
            // We can take a shortcut to the directly to the event
            Map.Entry<Key, Document> documentKey = Maps.immutableEntry(super.getDocumentKey.apply(documentRange), new Document());
            if (log.isTraceEnabled()) {
                log.trace("Transformed document key: " + documentKey);
            }
            if (gatherTimingDetails()) {
                this.seekKeySource = new EvaluationTrackingNestedIterator(QuerySpan.Stage.DocumentSpecificTree, trackingSpan, new DocumentSpecificNestedIterator(documentKey), myEnvironment);
            } else {
                this.seekKeySource = new DocumentSpecificNestedIterator(documentKey);
            }
        } else {
            this.seekKeySource = buildDocumentIterator(documentRange, range, columnFamilies, inclusive);
        }
        // Create the pipeline iterator for document aggregation and
        // evaluation within a thread pool
        PipelineIterator pipelineIter = PipelineFactory.createIterator(this.seekKeySource, getMaxEvaluationPipelines(), getMaxPipelineCachedResults(), getSerialPipelineRequest(), querySpanCollector, trackingSpan, this, sourceForDeepCopies.deepCopy(myEnvironment), myEnvironment, yield, yieldThresholdMs, columnFamilies, inclusive);
        pipelineIter.setCollectTimingDetails(collectTimingDetails);
        // TODO pipelineIter.setStatsdHostAndPort(statsdHostAndPort);
        pipelineIter.startPipeline();
        // gather Key,Document Entries from the pipelines
        Iterator<Entry<Key, Document>> pipelineDocuments = pipelineIter;
        if (log.isTraceEnabled()) {
            pipelineDocuments = Iterators.filter(pipelineDocuments, keyDocumentEntry -> {
                log.trace("after pipeline, keyDocumentEntry:" + keyDocumentEntry);
                return true;
            });
        }
        // now apply the unique transform if requested
        UniqueTransform uniquify = getUniqueTransform();
        if (uniquify != null) {
            pipelineDocuments = Iterators.filter(pipelineDocuments, uniquify.getUniquePredicate());
        }
        // apply the grouping transform if requested and if the batch size is greater than zero
        // if the batch size is 0, then grouping is computed only on the web server
        GroupingTransform groupify = getGroupingTransform();
        if (groupify != null && this.groupFieldsBatchSize > 0) {
            pipelineDocuments = groupingTransform.getGroupingIterator(pipelineDocuments, this.groupFieldsBatchSize, this.yield);
            if (log.isTraceEnabled()) {
                pipelineDocuments = Iterators.filter(pipelineDocuments, keyDocumentEntry -> {
                    log.trace("after grouping, keyDocumentEntry:" + keyDocumentEntry);
                    return true;
                });
            }
        }
        pipelineDocuments = Iterators.filter(pipelineDocuments, keyDocumentEntry -> {
            // last chance before the documents are serialized
            getActiveQueryLog().get(getQueryId()).recordStats(keyDocumentEntry.getValue(), querySpanCollector.getCombinedQuerySpan(null));
            // Always return true since we just want to record data in the ActiveQueryLog
            return true;
        });
        if (this.getReturnType() == ReturnType.kryo) {
            // Serialize the Document using Kryo
            this.serializedDocuments = Iterators.transform(pipelineDocuments, new KryoDocumentSerializer(isReducedResponse(), isCompressResults()));
        } else if (this.getReturnType() == ReturnType.writable) {
            // Use the Writable interface to serialize the Document
            this.serializedDocuments = Iterators.transform(pipelineDocuments, new WritableDocumentSerializer(isReducedResponse()));
        } else if (this.getReturnType() == ReturnType.tostring) {
            // Just return a toString() representation of the document
            this.serializedDocuments = Iterators.transform(pipelineDocuments, new ToStringDocumentSerializer(isReducedResponse()));
        } else {
            throw new IllegalArgumentException("Unknown return type of: " + this.getReturnType());
        }
        if (log.isTraceEnabled()) {
            KryoDocumentDeserializer dser = new KryoDocumentDeserializer();
            this.serializedDocuments = Iterators.filter(this.serializedDocuments, keyValueEntry -> {
                log.trace("after serializing, keyValueEntry:" + dser.apply(keyValueEntry));
                return true;
            });
        }
        // Cannot do this on document specific ranges as the count would place the keys outside the initial range
        if (!sortedUIDs && documentRange == null) {
            this.serializedDocuments = new ResultCountingIterator(serializedDocuments, resultCount, yield);
        } else if (this.sortedUIDs) {
            // we have sorted UIDs, so we can mask out the cq
            this.serializedDocuments = new KeyAdjudicator<>(serializedDocuments, yield);
        }
        // only add the final document tracking iterator which sends stats back to the client if collectTimingDetails is true
        if (collectTimingDetails) {
            // if there is no document to return, then add an empty document
            // to store the timing metadata
            this.serializedDocuments = new FinalDocumentTrackingIterator(querySpanCollector, trackingSpan, originalRange, this.serializedDocuments, this.getReturnType(), this.isReducedResponse(), this.isCompressResults(), this.yield);
        }
        if (log.isTraceEnabled()) {
            KryoDocumentDeserializer dser = new KryoDocumentDeserializer();
            this.serializedDocuments = Iterators.filter(this.serializedDocuments, keyValueEntry -> {
                log.debug("finally, considering:" + dser.apply(keyValueEntry));
                return true;
            });
        }
        // Determine if we have items to return
        prepareKeyValue(span);
    } catch (Exception e) {
        handleException(e);
    } finally {
        if (gatherTimingDetails() && trackingSpan != null && querySpanCollector != null) {
            querySpanCollector.addQuerySpan(trackingSpan);
        }
        if (null != span) {
            span.stop();
        }
        QueryStatsDClient client = getStatsdClient();
        if (client != null) {
            client.flush();
        }
        getActiveQueryLog().get(getQueryId()).endCall(this.originalRange, ActiveQuery.CallType.SEEK);
        if (this.key == null && this.value == null) {
            // no entries to return
            getActiveQueryLog().remove(getQueryId(), this.originalRange);
        }
    }
}
Also used : ByteSequence(org.apache.accumulo.core.data.ByteSequence) Document(datawave.query.attributes.Document) PipelineIterator(datawave.query.iterator.pipeline.PipelineIterator) FileSystem(org.apache.hadoop.fs.FileSystem) Text(org.apache.hadoop.io.Text) FileStatus(org.apache.hadoop.fs.FileStatus) DocumentData(datawave.query.iterator.aggregation.DocumentData) EvaluationTrackingFunction(datawave.query.iterator.profile.EvaluationTrackingFunction) Map(java.util.Map) QueryStatsDClient(datawave.query.statsd.QueryStatsDClient) Span(org.apache.accumulo.core.trace.Span) CompareToBuilder(org.apache.commons.lang.builder.CompareToBuilder) WritableDocumentSerializer(datawave.query.function.serializer.WritableDocumentSerializer) ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) ConfigException(org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException) Trace(org.apache.accumulo.core.trace.Trace) Set(java.util.Set) EmptyIterator(org.apache.commons.collections4.iterators.EmptyIterator) RemoveGroupingContext(datawave.query.function.RemoveGroupingContext) DefaultArithmetic(datawave.query.jexl.DefaultArithmetic) Predicate(com.google.common.base.Predicate) DatawaveFieldIndexListIteratorJexl(datawave.core.iterators.DatawaveFieldIndexListIteratorJexl) DocumentProjection(datawave.query.function.DocumentProjection) YieldingKeyValueIterator(org.apache.accumulo.core.iterators.YieldingKeyValueIterator) JexlContextCreator(datawave.query.function.JexlContextCreator) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) IndexOnlyContextCreatorBuilder(datawave.query.function.IndexOnlyContextCreatorBuilder) StringUtils(datawave.util.StringUtils) JexlASTHelper(datawave.query.jexl.JexlASTHelper) JexlArithmetic(org.apache.commons.jexl2.JexlArithmetic) JexlNode(org.apache.commons.jexl2.parser.JexlNode) DataTypeAsField(datawave.query.function.DataTypeAsField) TraceIterators(datawave.query.util.TraceIterators) SortedKeyValueIterator(org.apache.accumulo.core.iterators.SortedKeyValueIterator) InterruptedIOException(java.io.InterruptedIOException) FsAction(org.apache.hadoop.fs.permission.FsAction) KeyToDocumentData(datawave.query.function.KeyToDocumentData) Lists(com.google.common.collect.Lists) Aggregation(datawave.query.function.Aggregation) Key(org.apache.accumulo.core.data.Key) BasePoolableObjectFactory(org.apache.commons.pool.BasePoolableObjectFactory) Nullable(javax.annotation.Nullable) ToStringDocumentSerializer(datawave.query.function.serializer.ToStringDocumentSerializer) TFFactory(datawave.query.postprocessing.tf.TFFactory) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) Range(org.apache.accumulo.core.data.Range) QuerySpan(datawave.query.iterator.profile.QuerySpan) DocumentPermutation(datawave.query.function.DocumentPermutation) WHEN_EXHAUSTED_BLOCK(org.apache.commons.pool.impl.GenericObjectPool.WHEN_EXHAUSTED_BLOCK) Preconditions(com.google.common.base.Preconditions) MaskedValueFilterInterface(datawave.query.function.MaskedValueFilterInterface) UnmodifiableIterator(com.google.common.collect.UnmodifiableIterator) QuerySpanCollector(datawave.query.iterator.profile.QuerySpanCollector) StatefulArithmetic(datawave.query.jexl.StatefulArithmetic) YieldCallback(org.apache.accumulo.core.iterators.YieldCallback) IteratorEnvironment(org.apache.accumulo.core.iterators.IteratorEnvironment) EvaluationTrackingPredicate(datawave.query.iterator.profile.EvaluationTrackingPredicate) KryoDocumentSerializer(datawave.query.function.serializer.KryoDocumentSerializer) IteratorBuildingVisitor(datawave.query.jexl.visitors.IteratorBuildingVisitor) DocumentMetadata(datawave.query.function.DocumentMetadata) EmptyContext(datawave.query.util.EmptyContext) IndexOnlyContextCreator(datawave.query.function.IndexOnlyContextCreator) EvaluationTrackingIterator(datawave.query.iterator.profile.EvaluationTrackingIterator) GroupingTransform(datawave.query.transformer.GroupingTransform) Logger(org.apache.log4j.Logger) ValueTuple(datawave.query.attributes.ValueTuple) SourceTrackingIterator(datawave.query.iterator.profile.SourceTrackingIterator) PipelineQuerySpanCollectionIterator(datawave.query.iterator.profile.PipelineQuerySpanCollectionIterator) Path(org.apache.hadoop.fs.Path) KeyAdjudicator(datawave.query.jexl.functions.KeyAdjudicator) Value(org.apache.accumulo.core.data.Value) PipelineFactory(datawave.query.iterator.pipeline.PipelineFactory) AttributeKeepFilter(datawave.query.attributes.AttributeKeepFilter) SatisfactionVisitor(datawave.query.jexl.visitors.SatisfactionVisitor) ActiveQueryLog(datawave.query.tracking.ActiveQueryLog) MaskedValueFilterFactory(datawave.query.function.MaskedValueFilterFactory) Tuple2(datawave.query.util.Tuple2) Tuple3(datawave.query.util.Tuple3) Function(com.google.common.base.Function) JexlEvaluation(datawave.query.function.JexlEvaluation) DelayedNonEventSubTreeVisitor(datawave.query.jexl.visitors.DelayedNonEventSubTreeVisitor) EmptyDocumentFilter(datawave.query.predicate.EmptyDocumentFilter) TypeMetadata(datawave.query.util.TypeMetadata) Collection(java.util.Collection) Sets(com.google.common.collect.Sets) FileNotFoundException(java.io.FileNotFoundException) ReturnType(datawave.query.DocumentSerialization.ReturnType) TabletClosedException(org.apache.accumulo.tserver.tablet.TabletClosedException) List(java.util.List) ActiveQuery(datawave.query.tracking.ActiveQuery) MarkingFunctionsFactory(datawave.marking.MarkingFunctionsFactory) Type(datawave.data.type.Type) Entry(java.util.Map.Entry) LimitFields(datawave.query.function.LimitFields) IvaratorCacheDirConfig(datawave.query.iterator.ivarator.IvaratorCacheDirConfig) FinalDocumentTrackingIterator(datawave.query.iterator.profile.FinalDocumentTrackingIterator) EntryToTuple(datawave.query.util.EntryToTuple) GenericObjectPool(org.apache.commons.pool.impl.GenericObjectPool) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) MultiThreadedQuerySpan(datawave.query.iterator.profile.MultiThreadedQuerySpan) UniqueTransform(datawave.query.transformer.UniqueTransform) Iterators(com.google.common.collect.Iterators) IdentityAggregator(datawave.query.jexl.functions.IdentityAggregator) DatawaveJexlContext(datawave.query.jexl.DatawaveJexlContext) CompositeIngest(datawave.ingest.data.config.ingest.CompositeIngest) Iterator(java.util.Iterator) MalformedURLException(java.net.MalformedURLException) Preconditions.checkNotNull(com.google.common.base.Preconditions.checkNotNull) KryoDocumentDeserializer(datawave.query.function.deserializer.KryoDocumentDeserializer) Maps(com.google.common.collect.Maps) Constants(datawave.query.Constants) NumericalEncoder(datawave.data.type.util.NumericalEncoder) TupleToEntry(datawave.query.util.TupleToEntry) EvaluationTrackingNestedIterator(datawave.query.iterator.profile.EvaluationTrackingNestedIterator) CompositeMetadata(datawave.query.composite.CompositeMetadata) Comparator(java.util.Comparator) Collections(java.util.Collections) VariableNameVisitor(datawave.query.jexl.visitors.VariableNameVisitor) ToStringDocumentSerializer(datawave.query.function.serializer.ToStringDocumentSerializer) UniqueTransform(datawave.query.transformer.UniqueTransform) KeyAdjudicator(datawave.query.jexl.functions.KeyAdjudicator) GroupingTransform(datawave.query.transformer.GroupingTransform) Document(datawave.query.attributes.Document) Span(org.apache.accumulo.core.trace.Span) QuerySpan(datawave.query.iterator.profile.QuerySpan) MultiThreadedQuerySpan(datawave.query.iterator.profile.MultiThreadedQuerySpan) FinalDocumentTrackingIterator(datawave.query.iterator.profile.FinalDocumentTrackingIterator) KryoDocumentSerializer(datawave.query.function.serializer.KryoDocumentSerializer) KryoDocumentDeserializer(datawave.query.function.deserializer.KryoDocumentDeserializer) Entry(java.util.Map.Entry) TupleToEntry(datawave.query.util.TupleToEntry) EvaluationTrackingNestedIterator(datawave.query.iterator.profile.EvaluationTrackingNestedIterator) PipelineIterator(datawave.query.iterator.pipeline.PipelineIterator) WritableDocumentSerializer(datawave.query.function.serializer.WritableDocumentSerializer) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) ConfigException(org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) TabletClosedException(org.apache.accumulo.tserver.tablet.TabletClosedException) MalformedURLException(java.net.MalformedURLException) QueryStatsDClient(datawave.query.statsd.QueryStatsDClient) Key(org.apache.accumulo.core.data.Key)

Aggregations

Function (com.google.common.base.Function)1 Preconditions (com.google.common.base.Preconditions)1 Preconditions.checkNotNull (com.google.common.base.Preconditions.checkNotNull)1 Predicate (com.google.common.base.Predicate)1 Throwables (com.google.common.base.Throwables)1 Iterators (com.google.common.collect.Iterators)1 Lists (com.google.common.collect.Lists)1 Maps (com.google.common.collect.Maps)1 Multimap (com.google.common.collect.Multimap)1 Sets (com.google.common.collect.Sets)1 UnmodifiableIterator (com.google.common.collect.UnmodifiableIterator)1 DatawaveFieldIndexListIteratorJexl (datawave.core.iterators.DatawaveFieldIndexListIteratorJexl)1 Type (datawave.data.type.Type)1 NumericalEncoder (datawave.data.type.util.NumericalEncoder)1 CompositeIngest (datawave.ingest.data.config.ingest.CompositeIngest)1 MarkingFunctionsFactory (datawave.marking.MarkingFunctionsFactory)1 Constants (datawave.query.Constants)1 ReturnType (datawave.query.DocumentSerialization.ReturnType)1 AttributeKeepFilter (datawave.query.attributes.AttributeKeepFilter)1 Document (datawave.query.attributes.Document)1