Example 1 with QuerySpan

use of datawave.query.iterator.profile.QuerySpan in project datawave by NationalSecurityAgency.

the class DatawaveFieldIndexCachingIteratorJexl method moveToNextRow.

// need to build a range starting at the end of current row (this.fiRow) and seek the
// source to it. If we get an IOException, that means we hit the end of the tablet.
protected Text moveToNextRow() throws IOException {
    QuerySpan querySpan = null;
    try {
        // this will block until an ivarator source becomes available
        final SortedKeyValueIterator<Key, Value> source = takePoolSource();
        try {
            if (collectTimingDetails && source instanceof SourceTrackingIterator) {
                querySpan = ((SourceTrackingIterator) source).getQuerySpan();
            // fi keys are on a row boundary.
            if (lastRangeSeeked.getEndKey() != null && !lastRangeSeeked.contains(new Key(this.fiRow).followingKey(PartialKey.ROW))) {
                fiRow = null;
            } else {
                Range followingRowRange = new Range(new Key(this.fiRow).followingKey(PartialKey.ROW), true, lastRangeSeeked.getEndKey(), lastRangeSeeked.isEndKeyInclusive());
                if (log.isTraceEnabled()) {
                    log.trace("moveToNextRow(Key k), followingRowRange: " + followingRowRange);
                // do an initial seek to determine the next row (needed to calculate bounding FI ranges below)
      , EMPTY_CFS, false);
                if (source.hasTop()) {
                    fiRow = source.getTopKey().getRow();
                } else {
                    fiRow = null;
        } finally {
        if (log.isTraceEnabled()) {
            log.trace("moveToNextRow, nextRow: " + fiRow);
        // The boundingFiRange is used to test that we have the right fieldName->fieldValue pairing.
        if (fiRow != null) {
            boundingFiRanges.addAll(this.buildBoundingFiRanges(fiRow, fiName, fieldValue));
            if (log.isTraceEnabled()) {
                log.trace("findTop() boundingFiRange: " + boundingFiRanges);
    } finally {
        if (collectTimingDetails && querySpanCollector != null && querySpan != null) {
    return fiRow;
Example 2 with QuerySpan

use of datawave.query.iterator.profile.QuerySpan in project datawave by NationalSecurityAgency.

the class DatawaveFieldIndexCachingIteratorJexl method fillSet.

 * This method will asynchronously fill the set with matches from within the specified bounding FI range.
 * @param boundingFiRange
 * @return the Future
protected Future<?> fillSet(final Range boundingFiRange, final TotalResults totalResults) {
    // this will block until an ivarator source becomes available
    final SortedKeyValueIterator<Key, Value> source = takePoolSource();
    // create runnable
    Runnable runnable = () -> {
        if (log.isDebugEnabled()) {
            log.debug("Starting fillSet(" + boundingFiRange + ')');
        int scanned = 0;
        int matched = 0;
        QuerySpan querySpan = null;
        Key nextSeekKey = null;
        int nextCount = 0;
        try {
            if (collectTimingDetails && source instanceof SourceTrackingIterator) {
                querySpan = ((SourceTrackingIterator) source).getQuerySpan();
            // seek the source to a range covering the entire row....the bounding box will dictate the actual scan
  , EMPTY_CFS, false);
            // if this is a range iterator, build the composite-safe Fi range
            Range compositeSafeFiRange = (this instanceof DatawaveFieldIndexRangeIteratorJexl) ? ((DatawaveFieldIndexRangeIteratorJexl) this).buildCompositeSafeFiRange(fiRow, fiName, fieldValue) : null;
            while (source.hasTop()) {
                Key top = source.getTopKey();
                // if we are setup for composite seeking, seek if we are out of range
                if (compositeSeeker != null && compositeSafeFiRange != null) {
                    String colQual = top.getColumnQualifier().toString();
                    String ingestType = colQual.substring(colQual.indexOf('\0') + 1, colQual.lastIndexOf('\0'));
                    String colFam = top.getColumnFamily().toString();
                    String fieldName = colFam.substring(colFam.indexOf('\0') + 1);
                    Collection<String> componentFields = null;
                    String separator = null;
                    Multimap<String, String> compositeToFieldMap = compositeMetadata.getCompositeFieldMapByType().get(ingestType);
                    Map<String, String> compositeSeparatorMap = compositeMetadata.getCompositeFieldSeparatorsByType().get(ingestType);
                    if (compositeToFieldMap != null && compositeSeparatorMap != null) {
                        componentFields = compositeToFieldMap.get(fieldName);
                        separator = compositeSeparatorMap.get(fieldName);
                    if (componentFields != null && separator != null && !compositeSeeker.isKeyInRange(top, compositeSafeFiRange, separator)) {
                        boolean shouldSeek = false;
                        // top key precedes nextSeekKey
                        if (nextSeekKey != null && top.compareTo(nextSeekKey) < 0) {
                            // if we hit the seek threshold, seek
                            if (nextCount >= compositeSeekThreshold)
                                shouldSeek = true;
                        } else // top key exceeds nextSeekKey, or nextSeekKey unset
                            nextCount = 0;
                            nextSeekKey = null;
                            // get a new seek key
                            Key newStartKey = compositeSeeker.nextSeekKey(new ArrayList<>(componentFields), top, compositeSafeFiRange, separator);
                            if (newStartKey != boundingFiRange.getStartKey() && newStartKey.compareTo(boundingFiRange.getStartKey()) > 0 && newStartKey.compareTo(boundingFiRange.getEndKey()) <= 0) {
                                nextSeekKey = newStartKey;
                                // if we hit the seek threshold (i.e. if it is set to 0), seek
                                if (nextCount >= compositeSeekThreshold)
                                    shouldSeek = true;
                        if (shouldSeek) {
                   Range(nextSeekKey, boundingFiRange.isStartKeyInclusive(), boundingFiRange.getEndKey(), boundingFiRange.isEndKeyInclusive()), EMPTY_CFS, false);
                            // reset next count and seek key
                            nextSeekKey = null;
                            nextCount = 0;
                        } else {
                // terminate if timed out or cancelled
                if (DatawaveFieldIndexCachingIteratorJexl.this.setControl.isCancelledQuery()) {
                if (addKey(top, source.getTopValue())) {
                    if (!totalResults.increment()) {
                        throw new DatawaveIvaratorMaxResultsException("Exceeded the maximum set size");
        } catch (Exception e) {
            // throw the exception up which will be available via the Future
            log.error("Failed to complete fillSet(" + boundingFiRange + ")", e);
            throw new RuntimeException(e);
        } finally {
            // return the ivarator source back to the pool.
            if (log.isDebugEnabled()) {
                StringBuilder builder = new StringBuilder();
                builder.append("Matched ").append(matched).append(" out of ").append(scanned).append(" for ").append(boundingFiRange).append(": ").append(DatawaveFieldIndexCachingIteratorJexl.this);
            if (collectTimingDetails && querySpanCollector != null && querySpan != null) {
    return IteratorThreadPoolManager.executeIvarator(runnable, DatawaveFieldIndexCachingIteratorJexl.this + " in " + boundingFiRange, this.initEnv);
Example 3 with QuerySpan

use of datawave.query.iterator.profile.QuerySpan in project datawave by NationalSecurityAgency.

the class QueryIterator method createDocumentPipeline.

 * Create the pipeline. It is very important that this pipeline can handle resetting the bottom iterator with a new value. This means that hasNext() needs
 * to call the next iterator. The only state that can be maintained is the next value ready after hasNext() has been called. Once next returns the value,
 * the next hasNext() call must call the next iterator again. So for example Iterators.filter() cannot be used as it uses a google commons AbstractIterator
 * that maintains an iterator state (failed, ready, done); use statelessFilter above instead.
 * @param deepSourceCopy
 * @param documentSpecificSource
 * @return iterator of keys and values
public Iterator<Entry<Key, Document>> createDocumentPipeline(SortedKeyValueIterator<Key, Value> deepSourceCopy, final NestedQueryIterator<Key> documentSpecificSource, Collection<ByteSequence> columnFamilies, boolean inclusive, QuerySpanCollector querySpanCollector) {
    QuerySpan trackingSpan = null;
    if (gatherTimingDetails()) {
        trackingSpan = new QuerySpan(getStatsdClient());
    if (log.isTraceEnabled()) {
    final Function<Entry<Key, Document>, Entry<DocumentData, Document>> docMapper;
    if (isFieldIndexSatisfyingQuery()) {
        if (log.isTraceEnabled()) {
        docMapper = new Function<Entry<Key, Document>, Entry<DocumentData, Document>>() {

            public Entry<DocumentData, Document> apply(@Nullable Entry<Key, Document> input) {
                Entry<DocumentData, Document> entry = null;
                if (input != null) {
                    entry = Maps.immutableEntry(new DocumentData(input.getKey(), Collections.singleton(input.getKey()), Collections.EMPTY_LIST, true), input.getValue());
                return entry;
    } else {
        docMapper = new KeyToDocumentData(deepSourceCopy, myEnvironment, documentOptions, super.equality, getEvaluationFilter(), this.includeHierarchyFields, this.includeHierarchyFields);
    Iterator<Entry<DocumentData, Document>> sourceIterator = Iterators.transform(documentSpecificSource, from -> {
        Entry<Key, Document> entry = Maps.immutableEntry(from, documentSpecificSource.document());
        return docMapper.apply(entry);
    // Take the document Keys and transform it into Entry<Key,Document>,
    // removing Attributes for this Document
    // which do not fall within the expected time range
    Iterator<Entry<Key, Document>> documents = null;
    Aggregation a = new Aggregation(this.getTimeFilter(), this.typeMetadataWithNonIndexed, compositeMetadata, this.isIncludeGroupingContext(), this.includeRecordId, this.disableIndexOnlyDocuments(), getEvaluationFilter(), isTrackSizes());
    if (gatherTimingDetails()) {
        documents = Iterators.transform(sourceIterator, new EvaluationTrackingFunction<>(QuerySpan.Stage.Aggregation, trackingSpan, a));
    } else {
        documents = Iterators.transform(sourceIterator, a);
    // Inject the data type as a field if the user requested it
    if (this.includeDatatype) {
        if (gatherTimingDetails()) {
            documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.DataTypeAsField, trackingSpan, new DataTypeAsField(this.datatypeKey)));
        } else {
            documents = Iterators.transform(documents, new DataTypeAsField(this.datatypeKey));
    // Inject the document permutations if required
    if (!this.getDocumentPermutations().isEmpty()) {
        if (gatherTimingDetails()) {
            documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.DocumentPermutation, trackingSpan, new DocumentPermutation.DocumentPermutationAggregation(this.getDocumentPermutations())));
        } else {
            documents = Iterators.transform(documents, new DocumentPermutation.DocumentPermutationAggregation(this.getDocumentPermutations()));
    if (gatherTimingDetails()) {
        documents = new EvaluationTrackingIterator(QuerySpan.Stage.DocumentEvaluation, trackingSpan, getEvaluation(documentSpecificSource, deepSourceCopy, documents, compositeMetadata, typeMetadataWithNonIndexed, columnFamilies, inclusive));
    } else {
        documents = getEvaluation(documentSpecificSource, deepSourceCopy, documents, compositeMetadata, typeMetadataWithNonIndexed, columnFamilies, inclusive);
    // a hook to allow mapping the document such as with the TLD or Parent
    // query logics
    // or if the document was not aggregated in the first place because the
    // field index fields completely satisfied the query
    documents = mapDocument(deepSourceCopy, documents, compositeMetadata);
    // apply any configured post processing
    documents = getPostProcessingChain(documents);
    if (gatherTimingDetails()) {
        documents = new EvaluationTrackingIterator(QuerySpan.Stage.PostProcessing, trackingSpan, documents);
    // Filter out masked values if requested
    if (this.filterMaskedValues) {
        MaskedValueFilterInterface mvfi = MaskedValueFilterFactory.get(this.isIncludeGroupingContext(), this.isReducedResponse());
        if (gatherTimingDetails()) {
            documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.MaskedValueFilter, trackingSpan, mvfi));
        } else {
            documents = Iterators.transform(documents, mvfi);
    // now filter the attributes to those with the keep flag set true
    if (gatherTimingDetails()) {
        documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.AttributeKeepFilter, trackingSpan, new AttributeKeepFilter<>()));
    } else {
        documents = Iterators.transform(documents, new AttributeKeepFilter<>());
    // Project fields using a whitelist or a blacklist before serialization
    if (this.projectResults) {
        if (gatherTimingDetails()) {
            documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.DocumentProjection, trackingSpan, getProjection()));
        } else {
            documents = Iterators.transform(documents, getProjection());
    // remove the composite entries
    documents = Iterators.transform(documents, this.getCompositeProjection());
    // projection or visibility filtering)
    if (gatherTimingDetails()) {
        documents = statelessFilter(documents, new EvaluationTrackingPredicate<>(QuerySpan.Stage.EmptyDocumentFilter, trackingSpan, new EmptyDocumentFilter()));
        documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.DocumentMetadata, trackingSpan, new DocumentMetadata()));
    } else {
        documents = statelessFilter(documents, new EmptyDocumentFilter());
        documents = Iterators.transform(documents, new DocumentMetadata());
    if (!this.limitFieldsMap.isEmpty()) {
        if (gatherTimingDetails()) {
            documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.LimitFields, trackingSpan, new LimitFields(this.getLimitFieldsMap())));
        } else {
            documents = Iterators.transform(documents, new LimitFields(this.getLimitFieldsMap()));
    // do I need to remove the grouping context I added above?
    if (groupingContextAddedByMe) {
        if (gatherTimingDetails()) {
            documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.RemoveGroupingContext, trackingSpan, new RemoveGroupingContext()));
        } else {
            documents = Iterators.transform(documents, new RemoveGroupingContext());
    // only add the pipeline query span collection iterator which will cache metrics with each document if collectTimingDetails is true
    if (collectTimingDetails) {
        // if there is not a result, then add the trackingSpan to the
        // QuerySpanCollector
        // if there was a result, then the metrics from the trackingSpan
        // will be added here
        documents = new PipelineQuerySpanCollectionIterator(querySpanCollector, trackingSpan, documents);
    return documents;
Example 4 with QuerySpan

use of datawave.query.iterator.profile.QuerySpan in project datawave by NationalSecurityAgency.

the class FieldIndexOnlyQueryIterator method init.

public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException {
    if (log.isTraceEnabled()) {
        log.trace("QueryIterator init()");
    if (!validateOptions(options)) {
        throw new IllegalArgumentException("Could not initialize QueryIterator with " + options);
    // Parse & flatten the query
    try {
        script = JexlASTHelper.parseAndFlattenJexlQuery(this.getQuery());
    } catch (ParseException e) {
        throw new IOException("Could not parse the JEXL query: '" + this.getQuery() + "'", e);
    this.documentOptions = options;
    this.myEnvironment = env;
    if (collectTimingDetails) {
        trackingSpan = new QuerySpan(getStatsdClient());
        this.source = new SourceTrackingIterator(trackingSpan, source);
    } else {
        this.source = source;
    this.fiAggregator = new IdentityAggregator(null, null);
    this.sourceForDeepCopies = this.source.deepCopy(this.myEnvironment);
Example 5 with QuerySpan

use of datawave.query.iterator.profile.QuerySpan in project datawave by NationalSecurityAgency.

the class DatawaveFieldIndexCachingIteratorJexl method seek.

public void seek(Range r, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
    if (log.isTraceEnabled()) {
        log.trace("begin seek, range: " + r);
    if (!lastRangeSeekedContains(r)) {
        // the start of this range is beyond the end of the last range seeked
        // we must reset keyValues to null and empty the underlying collection
    } else {
        // inside the original range, so potentially need to reposition keyValues
        if (keys != null) {
            Key startKey = r.getStartKey();
            // decide if keyValues needs to be rebuilt or can be reused
            if (!keys.hasNext() || (keys.peek().compareTo(startKey) > 0)) {
                keys = new CachingIterator<>(threadSafeSet.iterator());
    // if we are not sorting UIDs, then determine whether we have a cq and capture the lastFiKey
    Key lastFiKey = null;
    if (!sortedUIDs && r.getStartKey().getColumnFamily().getLength() > 0 && r.getStartKey().getColumnQualifier().getLength() > 0) {
        Key startKey = r.getStartKey();
        String cq = startKey.getColumnQualifier().toString();
        int fieldnameIndex = cq.indexOf('\0');
        if (fieldnameIndex >= 0) {
            String cf = startKey.getColumnFamily().toString();
            lastFiKey = new Key(startKey.getRow().toString(), "fi\0" + cq.substring(0, fieldnameIndex), cq.substring(fieldnameIndex + 1) + '\0' + cf + '\0');
    this.lastRangeSeeked = r;
    QuerySpan querySpan = null;
    try {
        this.fiRow = null;
        // this will block until an ivarator source becomes available
        final SortedKeyValueIterator<Key, Value> source = takePoolSource();
        try {
            if (collectTimingDetails && source instanceof SourceTrackingIterator) {
                querySpan = ((SourceTrackingIterator) source).getQuerySpan();
            // seek our underlying source to the start of the incoming range
            // expand the range as the underlying table may not actually contain the keys in this range as we are only returning keys
            // as specified by the returnKeyType
            Range seekRange = new Range(lastRangeSeeked.getStartKey(), lastRangeSeeked.isStartKeyInclusive(), (lastRangeSeeked.getEndKey() == null ? null : new Key(lastRangeSeeked.getEndKey().getRow()).followingKey(PartialKey.ROW)), false);
  , EMPTY_CFS, false);
            if (log.isTraceEnabled()) {
                try {
                    log.trace("lastRangeSeeked: " + lastRangeSeeked + "  source.getTopKey(): " + source != null ? source.getTopKey() : null);
                } catch (Exception ex) {
                    log.trace("Ignoring this while logging a trace message:", ex);
                // let's not ruin everything when trace is on...
            // Determine the bounding FI ranges for the field index for this row
            if (source.hasTop()) {
                this.fiRow = source.getTopKey().getRow();
                this.boundingFiRanges.addAll(buildBoundingFiRanges(fiRow, fiName, fieldValue));
                // containing the last value returned. Then modify that range appropriately.
                if (lastFiKey != null) {
                    if (log.isTraceEnabled()) {
                        log.trace("Reseeking fi to lastFiKey: " + lastFiKey);
                    while (!boundingFiRanges.isEmpty() && !boundingFiRanges.get(0).contains(lastFiKey)) {
                        if (log.isTraceEnabled()) {
                            log.trace("Skipping range: " + boundingFiRanges.get(0));
                        if (this.boundingFiRanges.isEmpty()) {
                    if (!boundingFiRanges.isEmpty()) {
                        if (log.isTraceEnabled()) {
                            log.trace("Starting in range: " + boundingFiRanges.get(0));
                        Range boundingFiRange = boundingFiRanges.get(0);
                        boundingFiRange = new Range(lastFiKey, false, boundingFiRange.getEndKey(), boundingFiRange.isEndKeyInclusive());
                        boundingFiRanges.set(0, boundingFiRange);
                        if (log.isTraceEnabled()) {
                            log.trace("Reset range to: " + boundingFiRanges.get(0));
            } else {
                this.topKey = null;
        } finally {
        // now lets find the top key
        if (this.fiRow != null) {
        if (log.isTraceEnabled()) {
            log.trace("seek, topKey : " + ((null == topKey) ? "null" : topKey));
    } finally {
        if (collectTimingDetails && querySpanCollector != null && querySpan != null) {
Also used : SourceTrackingIterator(datawave.query.iterator.profile.SourceTrackingIterator) Value( Range( QuerySpan(datawave.query.iterator.profile.QuerySpan) Key( PartialKey( DatawaveIvaratorMaxResultsException(datawave.query.exceptions.DatawaveIvaratorMaxResultsException) IOException( UnknownHostException( IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException)


