Search in sources :

Example 1 with SourceTrackingIterator

use of datawave.query.iterator.profile.SourceTrackingIterator in project datawave by NationalSecurityAgency.

the class DatawaveFieldIndexCachingIteratorJexl method moveToNextRow.

// need to build a range starting at the end of current row (this.fiRow) and seek the
// source to it. If we get an IOException, that means we hit the end of the tablet.
protected Text moveToNextRow() throws IOException {
    log.trace("moveToNextRow()");
    QuerySpan querySpan = null;
    try {
        // this will block until an ivarator source becomes available
        final SortedKeyValueIterator<Key, Value> source = takePoolSource();
        try {
            if (collectTimingDetails && source instanceof SourceTrackingIterator) {
                querySpan = ((SourceTrackingIterator) source).getQuerySpan();
            }
            // fi keys are on a row boundary.
            if (lastRangeSeeked.getEndKey() != null && !lastRangeSeeked.contains(new Key(this.fiRow).followingKey(PartialKey.ROW))) {
                fiRow = null;
            } else {
                Range followingRowRange = new Range(new Key(this.fiRow).followingKey(PartialKey.ROW), true, lastRangeSeeked.getEndKey(), lastRangeSeeked.isEndKeyInclusive());
                if (log.isTraceEnabled()) {
                    log.trace("moveToNextRow(Key k), followingRowRange: " + followingRowRange);
                }
                // do an initial seek to determine the next row (needed to calculate bounding FI ranges below)
                source.seek(followingRowRange, EMPTY_CFS, false);
                scannedKeys.incrementAndGet();
                if (source.hasTop()) {
                    fiRow = source.getTopKey().getRow();
                } else {
                    fiRow = null;
                }
            }
        } finally {
            returnPoolSource(source);
        }
        if (log.isTraceEnabled()) {
            log.trace("moveToNextRow, nextRow: " + fiRow);
        }
        // The boundingFiRange is used to test that we have the right fieldName->fieldValue pairing.
        boundingFiRanges.clear();
        if (fiRow != null) {
            boundingFiRanges.addAll(this.buildBoundingFiRanges(fiRow, fiName, fieldValue));
            if (log.isTraceEnabled()) {
                log.trace("findTop() boundingFiRange: " + boundingFiRanges);
            }
        }
    } finally {
        if (collectTimingDetails && querySpanCollector != null && querySpan != null) {
            this.querySpanCollector.addQuerySpan(querySpan);
        }
    }
    return fiRow;
}
Also used : SourceTrackingIterator(datawave.query.iterator.profile.SourceTrackingIterator) Value(org.apache.accumulo.core.data.Value) Range(org.apache.accumulo.core.data.Range) QuerySpan(datawave.query.iterator.profile.QuerySpan) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 2 with SourceTrackingIterator

use of datawave.query.iterator.profile.SourceTrackingIterator in project datawave by NationalSecurityAgency.

the class DatawaveFieldIndexCachingIteratorJexl method fillSet.

/**
 * This method will asynchronously fill the set with matches from within the specified bounding FI range.
 *
 * @param boundingFiRange
 * @return the Future
 */
protected Future<?> fillSet(final Range boundingFiRange, final TotalResults totalResults) {
    // this will block until an ivarator source becomes available
    final SortedKeyValueIterator<Key, Value> source = takePoolSource();
    // create runnable
    Runnable runnable = () -> {
        if (log.isDebugEnabled()) {
            log.debug("Starting fillSet(" + boundingFiRange + ')');
        }
        int scanned = 0;
        int matched = 0;
        QuerySpan querySpan = null;
        Key nextSeekKey = null;
        int nextCount = 0;
        try {
            if (collectTimingDetails && source instanceof SourceTrackingIterator) {
                querySpan = ((SourceTrackingIterator) source).getQuerySpan();
            }
            // seek the source to a range covering the entire row....the bounding box will dictate the actual scan
            source.seek(boundingFiRange, EMPTY_CFS, false);
            scanned++;
            DatawaveFieldIndexCachingIteratorJexl.this.scannedKeys.incrementAndGet();
            // if this is a range iterator, build the composite-safe Fi range
            Range compositeSafeFiRange = (this instanceof DatawaveFieldIndexRangeIteratorJexl) ? ((DatawaveFieldIndexRangeIteratorJexl) this).buildCompositeSafeFiRange(fiRow, fiName, fieldValue) : null;
            while (source.hasTop()) {
                checkTiming();
                Key top = source.getTopKey();
                // if we are setup for composite seeking, seek if we are out of range
                if (compositeSeeker != null && compositeSafeFiRange != null) {
                    String colQual = top.getColumnQualifier().toString();
                    String ingestType = colQual.substring(colQual.indexOf('\0') + 1, colQual.lastIndexOf('\0'));
                    String colFam = top.getColumnFamily().toString();
                    String fieldName = colFam.substring(colFam.indexOf('\0') + 1);
                    Collection<String> componentFields = null;
                    String separator = null;
                    Multimap<String, String> compositeToFieldMap = compositeMetadata.getCompositeFieldMapByType().get(ingestType);
                    Map<String, String> compositeSeparatorMap = compositeMetadata.getCompositeFieldSeparatorsByType().get(ingestType);
                    if (compositeToFieldMap != null && compositeSeparatorMap != null) {
                        componentFields = compositeToFieldMap.get(fieldName);
                        separator = compositeSeparatorMap.get(fieldName);
                    }
                    if (componentFields != null && separator != null && !compositeSeeker.isKeyInRange(top, compositeSafeFiRange, separator)) {
                        boolean shouldSeek = false;
                        // top key precedes nextSeekKey
                        if (nextSeekKey != null && top.compareTo(nextSeekKey) < 0) {
                            // if we hit the seek threshold, seek
                            if (nextCount >= compositeSeekThreshold)
                                shouldSeek = true;
                        } else // top key exceeds nextSeekKey, or nextSeekKey unset
                        {
                            nextCount = 0;
                            nextSeekKey = null;
                            // get a new seek key
                            Key newStartKey = compositeSeeker.nextSeekKey(new ArrayList<>(componentFields), top, compositeSafeFiRange, separator);
                            if (newStartKey != boundingFiRange.getStartKey() && newStartKey.compareTo(boundingFiRange.getStartKey()) > 0 && newStartKey.compareTo(boundingFiRange.getEndKey()) <= 0) {
                                nextSeekKey = newStartKey;
                                // if we hit the seek threshold (i.e. if it is set to 0), seek
                                if (nextCount >= compositeSeekThreshold)
                                    shouldSeek = true;
                            }
                        }
                        if (shouldSeek) {
                            source.seek(new Range(nextSeekKey, boundingFiRange.isStartKeyInclusive(), boundingFiRange.getEndKey(), boundingFiRange.isEndKeyInclusive()), EMPTY_CFS, false);
                            // reset next count and seek key
                            nextSeekKey = null;
                            nextCount = 0;
                        } else {
                            nextCount++;
                            source.next();
                        }
                        scanned++;
                        continue;
                    }
                }
                // terminate if timed out or cancelled
                if (DatawaveFieldIndexCachingIteratorJexl.this.setControl.isCancelledQuery()) {
                    break;
                }
                if (addKey(top, source.getTopValue())) {
                    matched++;
                    if (!totalResults.increment()) {
                        throw new DatawaveIvaratorMaxResultsException("Exceeded the maximum set size");
                    }
                }
                source.next();
                scanned++;
                DatawaveFieldIndexCachingIteratorJexl.this.scannedKeys.incrementAndGet();
            }
        } catch (Exception e) {
            // throw the exception up which will be available via the Future
            log.error("Failed to complete fillSet(" + boundingFiRange + ")", e);
            throw new RuntimeException(e);
        } finally {
            // return the ivarator source back to the pool.
            returnPoolSource(source);
            if (log.isDebugEnabled()) {
                StringBuilder builder = new StringBuilder();
                builder.append("Matched ").append(matched).append(" out of ").append(scanned).append(" for ").append(boundingFiRange).append(": ").append(DatawaveFieldIndexCachingIteratorJexl.this);
                log.debug(builder.toString());
            }
            if (collectTimingDetails && querySpanCollector != null && querySpan != null) {
                querySpanCollector.addQuerySpan(querySpan);
            }
        }
    };
    return IteratorThreadPoolManager.executeIvarator(runnable, DatawaveFieldIndexCachingIteratorJexl.this + " in " + boundingFiRange, this.initEnv);
}
Also used : ArrayList(java.util.ArrayList) Range(org.apache.accumulo.core.data.Range) QuerySpan(datawave.query.iterator.profile.QuerySpan) DatawaveIvaratorMaxResultsException(datawave.query.exceptions.DatawaveIvaratorMaxResultsException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) DatawaveIvaratorMaxResultsException(datawave.query.exceptions.DatawaveIvaratorMaxResultsException) Multimap(com.google.common.collect.Multimap) SourceTrackingIterator(datawave.query.iterator.profile.SourceTrackingIterator) Value(org.apache.accumulo.core.data.Value) Collection(java.util.Collection) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 3 with SourceTrackingIterator

use of datawave.query.iterator.profile.SourceTrackingIterator in project datawave by NationalSecurityAgency.

the class QueryIterator method init.

@Override
public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException {
    if (log.isTraceEnabled()) {
        log.trace("QueryIterator init()");
    }
    if (!validateOptions(new SourcedOptions<>(source, env, options))) {
        throw new IllegalArgumentException("Could not initialize QueryIterator with " + options);
    }
    // We want to add in spoofed dataTypes for Aggregation/Evaluation to
    // ensure proper numeric evaluation.
    this.typeMetadata = new TypeMetadata(this.getTypeMetadata());
    this.typeMetadataWithNonIndexed = new TypeMetadata(this.typeMetadata);
    this.typeMetadataWithNonIndexed.addForAllIngestTypes(this.getNonIndexedDataTypeMap());
    this.exceededOrEvaluationCache = new HashMap<>();
    // Parse the query
    try {
        this.script = JexlASTHelper.parseJexlQuery(this.getQuery());
        this.myEvaluationFunction = new JexlEvaluation(this.getQuery(), arithmetic);
    } catch (Exception e) {
        throw new IOException("Could not parse the JEXL query: '" + this.getQuery() + "'", e);
    }
    this.documentOptions = options;
    this.myEnvironment = env;
    if (gatherTimingDetails()) {
        this.trackingSpan = new MultiThreadedQuerySpan(getStatsdClient());
        this.source = new SourceTrackingIterator(trackingSpan, source);
    } else {
        this.source = source;
    }
    this.fiAggregator = new IdentityAggregator(getAllIndexOnlyFields(), getEvaluationFilter(), getEvaluationFilter() != null ? getEvaluationFilter().getMaxNextCount() : -1);
    if (isDebugMultithreadedSources()) {
        this.source = new SourceThreadTrackingIterator(this.source);
    }
    this.sourceForDeepCopies = this.source.deepCopy(this.myEnvironment);
    // update ActiveQueryLog with (potentially) updated config
    if (env != null) {
        ActiveQueryLog.setConfig(env.getConfig());
    }
    DatawaveFieldIndexListIteratorJexl.FSTManager.setHdfsFileSystem(this.getFileSystemCache());
    DatawaveFieldIndexListIteratorJexl.FSTManager.setHdfsFileCompressionCodec(this.getHdfsFileCompressionCodec());
    pruneIvaratorCacheDirs();
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) SourceTrackingIterator(datawave.query.iterator.profile.SourceTrackingIterator) JexlEvaluation(datawave.query.function.JexlEvaluation) MultiThreadedQuerySpan(datawave.query.iterator.profile.MultiThreadedQuerySpan) IdentityAggregator(datawave.query.jexl.functions.IdentityAggregator) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) ConfigException(org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) TabletClosedException(org.apache.accumulo.tserver.tablet.TabletClosedException) MalformedURLException(java.net.MalformedURLException)

Example 4 with SourceTrackingIterator

use of datawave.query.iterator.profile.SourceTrackingIterator in project datawave by NationalSecurityAgency.

the class FieldIndexOnlyQueryIterator method init.

@Override
public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException {
    if (log.isTraceEnabled()) {
        log.trace("QueryIterator init()");
    }
    if (!validateOptions(options)) {
        throw new IllegalArgumentException("Could not initialize QueryIterator with " + options);
    }
    // Parse & flatten the query
    try {
        script = JexlASTHelper.parseAndFlattenJexlQuery(this.getQuery());
    } catch (ParseException e) {
        throw new IOException("Could not parse the JEXL query: '" + this.getQuery() + "'", e);
    }
    this.documentOptions = options;
    this.myEnvironment = env;
    if (collectTimingDetails) {
        trackingSpan = new QuerySpan(getStatsdClient());
        this.source = new SourceTrackingIterator(trackingSpan, source);
    } else {
        this.source = source;
    }
    this.fiAggregator = new IdentityAggregator(null, null);
    this.sourceForDeepCopies = this.source.deepCopy(this.myEnvironment);
}
Also used : SourceTrackingIterator(datawave.query.iterator.profile.SourceTrackingIterator) IdentityAggregator(datawave.query.jexl.functions.IdentityAggregator) ParseException(org.apache.commons.jexl2.parser.ParseException) IOException(java.io.IOException) QuerySpan(datawave.query.iterator.profile.QuerySpan)

Example 5 with SourceTrackingIterator

use of datawave.query.iterator.profile.SourceTrackingIterator in project datawave by NationalSecurityAgency.

the class DatawaveFieldIndexCachingIteratorJexl method seek.

@Override
public void seek(Range r, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
    if (log.isTraceEnabled()) {
        log.trace("begin seek, range: " + r);
    }
    if (!lastRangeSeekedContains(r)) {
        // the start of this range is beyond the end of the last range seeked
        // we must reset keyValues to null and empty the underlying collection
        clearRowBasedHdfsBackedSet();
    } else {
        // inside the original range, so potentially need to reposition keyValues
        if (keys != null) {
            Key startKey = r.getStartKey();
            // decide if keyValues needs to be rebuilt or can be reused
            if (!keys.hasNext() || (keys.peek().compareTo(startKey) > 0)) {
                keys = new CachingIterator<>(threadSafeSet.iterator());
            }
        }
    }
    // if we are not sorting UIDs, then determine whether we have a cq and capture the lastFiKey
    Key lastFiKey = null;
    if (!sortedUIDs && r.getStartKey().getColumnFamily().getLength() > 0 && r.getStartKey().getColumnQualifier().getLength() > 0) {
        Key startKey = r.getStartKey();
        String cq = startKey.getColumnQualifier().toString();
        int fieldnameIndex = cq.indexOf('\0');
        if (fieldnameIndex >= 0) {
            String cf = startKey.getColumnFamily().toString();
            lastFiKey = new Key(startKey.getRow().toString(), "fi\0" + cq.substring(0, fieldnameIndex), cq.substring(fieldnameIndex + 1) + '\0' + cf + '\0');
        }
    }
    this.lastRangeSeeked = r;
    QuerySpan querySpan = null;
    try {
        this.fiRow = null;
        // this will block until an ivarator source becomes available
        final SortedKeyValueIterator<Key, Value> source = takePoolSource();
        try {
            if (collectTimingDetails && source instanceof SourceTrackingIterator) {
                querySpan = ((SourceTrackingIterator) source).getQuerySpan();
            }
            // seek our underlying source to the start of the incoming range
            // expand the range as the underlying table may not actually contain the keys in this range as we are only returning keys
            // as specified by the returnKeyType
            Range seekRange = new Range(lastRangeSeeked.getStartKey(), lastRangeSeeked.isStartKeyInclusive(), (lastRangeSeeked.getEndKey() == null ? null : new Key(lastRangeSeeked.getEndKey().getRow()).followingKey(PartialKey.ROW)), false);
            source.seek(seekRange, EMPTY_CFS, false);
            scannedKeys.incrementAndGet();
            if (log.isTraceEnabled()) {
                try {
                    log.trace("lastRangeSeeked: " + lastRangeSeeked + "  source.getTopKey(): " + source != null ? source.getTopKey() : null);
                } catch (Exception ex) {
                    log.trace("Ignoring this while logging a trace message:", ex);
                // let's not ruin everything when trace is on...
                }
            }
            // Determine the bounding FI ranges for the field index for this row
            this.boundingFiRanges.clear();
            if (source.hasTop()) {
                this.fiRow = source.getTopKey().getRow();
                this.boundingFiRanges.addAll(buildBoundingFiRanges(fiRow, fiName, fieldValue));
                // containing the last value returned. Then modify that range appropriately.
                if (lastFiKey != null) {
                    if (log.isTraceEnabled()) {
                        log.trace("Reseeking fi to lastFiKey: " + lastFiKey);
                    }
                    while (!boundingFiRanges.isEmpty() && !boundingFiRanges.get(0).contains(lastFiKey)) {
                        if (log.isTraceEnabled()) {
                            log.trace("Skipping range: " + boundingFiRanges.get(0));
                        }
                        boundingFiRanges.remove(0);
                        if (this.boundingFiRanges.isEmpty()) {
                            moveToNextRow();
                        }
                    }
                    if (!boundingFiRanges.isEmpty()) {
                        if (log.isTraceEnabled()) {
                            log.trace("Starting in range: " + boundingFiRanges.get(0));
                        }
                        Range boundingFiRange = boundingFiRanges.get(0);
                        boundingFiRange = new Range(lastFiKey, false, boundingFiRange.getEndKey(), boundingFiRange.isEndKeyInclusive());
                        boundingFiRanges.set(0, boundingFiRange);
                        if (log.isTraceEnabled()) {
                            log.trace("Reset range to: " + boundingFiRanges.get(0));
                        }
                    }
                }
            } else {
                this.topKey = null;
            }
        } finally {
            returnPoolSource(source);
        }
        // now lets find the top key
        if (this.fiRow != null) {
            findTop();
        }
        if (log.isTraceEnabled()) {
            log.trace("seek, topKey : " + ((null == topKey) ? "null" : topKey));
        }
    } finally {
        if (collectTimingDetails && querySpanCollector != null && querySpan != null) {
            querySpanCollector.addQuerySpan(querySpan);
        }
    }
}
Also used : SourceTrackingIterator(datawave.query.iterator.profile.SourceTrackingIterator) Value(org.apache.accumulo.core.data.Value) Range(org.apache.accumulo.core.data.Range) QuerySpan(datawave.query.iterator.profile.QuerySpan) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) DatawaveIvaratorMaxResultsException(datawave.query.exceptions.DatawaveIvaratorMaxResultsException) IOException(java.io.IOException) UnknownHostException(java.net.UnknownHostException) IterationInterruptedException(org.apache.accumulo.core.iterators.IterationInterruptedException)

Aggregations

SourceTrackingIterator (datawave.query.iterator.profile.SourceTrackingIterator)5 QuerySpan (datawave.query.iterator.profile.QuerySpan)4 IOException (java.io.IOException)4 Key (org.apache.accumulo.core.data.Key)3 PartialKey (org.apache.accumulo.core.data.PartialKey)3 Range (org.apache.accumulo.core.data.Range)3 Value (org.apache.accumulo.core.data.Value)3 IterationInterruptedException (org.apache.accumulo.core.iterators.IterationInterruptedException)3 DatawaveIvaratorMaxResultsException (datawave.query.exceptions.DatawaveIvaratorMaxResultsException)2 IdentityAggregator (datawave.query.jexl.functions.IdentityAggregator)2 UnknownHostException (java.net.UnknownHostException)2 Multimap (com.google.common.collect.Multimap)1 JexlEvaluation (datawave.query.function.JexlEvaluation)1 MultiThreadedQuerySpan (datawave.query.iterator.profile.MultiThreadedQuerySpan)1 TypeMetadata (datawave.query.util.TypeMetadata)1 FileNotFoundException (java.io.FileNotFoundException)1 InterruptedIOException (java.io.InterruptedIOException)1 MalformedURLException (java.net.MalformedURLException)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1