use of datawave.query.iterator.profile.QuerySpan in project datawave by NationalSecurityAgency.
the class DatawaveFieldIndexCachingIteratorJexl method moveToNextRow.
// need to build a range starting at the end of current row (this.fiRow) and seek the
// source to it. If we get an IOException, that means we hit the end of the tablet.
protected Text moveToNextRow() throws IOException {
log.trace("moveToNextRow()");
QuerySpan querySpan = null;
try {
// this will block until an ivarator source becomes available
final SortedKeyValueIterator<Key, Value> source = takePoolSource();
try {
if (collectTimingDetails && source instanceof SourceTrackingIterator) {
querySpan = ((SourceTrackingIterator) source).getQuerySpan();
}
// fi keys are on a row boundary.
if (lastRangeSeeked.getEndKey() != null && !lastRangeSeeked.contains(new Key(this.fiRow).followingKey(PartialKey.ROW))) {
fiRow = null;
} else {
Range followingRowRange = new Range(new Key(this.fiRow).followingKey(PartialKey.ROW), true, lastRangeSeeked.getEndKey(), lastRangeSeeked.isEndKeyInclusive());
if (log.isTraceEnabled()) {
log.trace("moveToNextRow(Key k), followingRowRange: " + followingRowRange);
}
// do an initial seek to determine the next row (needed to calculate bounding FI ranges below)
source.seek(followingRowRange, EMPTY_CFS, false);
scannedKeys.incrementAndGet();
if (source.hasTop()) {
fiRow = source.getTopKey().getRow();
} else {
fiRow = null;
}
}
} finally {
returnPoolSource(source);
}
if (log.isTraceEnabled()) {
log.trace("moveToNextRow, nextRow: " + fiRow);
}
// The boundingFiRange is used to test that we have the right fieldName->fieldValue pairing.
boundingFiRanges.clear();
if (fiRow != null) {
boundingFiRanges.addAll(this.buildBoundingFiRanges(fiRow, fiName, fieldValue));
if (log.isTraceEnabled()) {
log.trace("findTop() boundingFiRange: " + boundingFiRanges);
}
}
} finally {
if (collectTimingDetails && querySpanCollector != null && querySpan != null) {
this.querySpanCollector.addQuerySpan(querySpan);
}
}
return fiRow;
}
use of datawave.query.iterator.profile.QuerySpan in project datawave by NationalSecurityAgency.
the class DatawaveFieldIndexCachingIteratorJexl method fillSet.
/**
* This method will asynchronously fill the set with matches from within the specified bounding FI range.
*
* @param boundingFiRange
* @return the Future
*/
protected Future<?> fillSet(final Range boundingFiRange, final TotalResults totalResults) {
// this will block until an ivarator source becomes available
final SortedKeyValueIterator<Key, Value> source = takePoolSource();
// create runnable
Runnable runnable = () -> {
if (log.isDebugEnabled()) {
log.debug("Starting fillSet(" + boundingFiRange + ')');
}
int scanned = 0;
int matched = 0;
QuerySpan querySpan = null;
Key nextSeekKey = null;
int nextCount = 0;
try {
if (collectTimingDetails && source instanceof SourceTrackingIterator) {
querySpan = ((SourceTrackingIterator) source).getQuerySpan();
}
// seek the source to a range covering the entire row....the bounding box will dictate the actual scan
source.seek(boundingFiRange, EMPTY_CFS, false);
scanned++;
DatawaveFieldIndexCachingIteratorJexl.this.scannedKeys.incrementAndGet();
// if this is a range iterator, build the composite-safe Fi range
Range compositeSafeFiRange = (this instanceof DatawaveFieldIndexRangeIteratorJexl) ? ((DatawaveFieldIndexRangeIteratorJexl) this).buildCompositeSafeFiRange(fiRow, fiName, fieldValue) : null;
while (source.hasTop()) {
checkTiming();
Key top = source.getTopKey();
// if we are setup for composite seeking, seek if we are out of range
if (compositeSeeker != null && compositeSafeFiRange != null) {
String colQual = top.getColumnQualifier().toString();
String ingestType = colQual.substring(colQual.indexOf('\0') + 1, colQual.lastIndexOf('\0'));
String colFam = top.getColumnFamily().toString();
String fieldName = colFam.substring(colFam.indexOf('\0') + 1);
Collection<String> componentFields = null;
String separator = null;
Multimap<String, String> compositeToFieldMap = compositeMetadata.getCompositeFieldMapByType().get(ingestType);
Map<String, String> compositeSeparatorMap = compositeMetadata.getCompositeFieldSeparatorsByType().get(ingestType);
if (compositeToFieldMap != null && compositeSeparatorMap != null) {
componentFields = compositeToFieldMap.get(fieldName);
separator = compositeSeparatorMap.get(fieldName);
}
if (componentFields != null && separator != null && !compositeSeeker.isKeyInRange(top, compositeSafeFiRange, separator)) {
boolean shouldSeek = false;
// top key precedes nextSeekKey
if (nextSeekKey != null && top.compareTo(nextSeekKey) < 0) {
// if we hit the seek threshold, seek
if (nextCount >= compositeSeekThreshold)
shouldSeek = true;
} else // top key exceeds nextSeekKey, or nextSeekKey unset
{
nextCount = 0;
nextSeekKey = null;
// get a new seek key
Key newStartKey = compositeSeeker.nextSeekKey(new ArrayList<>(componentFields), top, compositeSafeFiRange, separator);
if (newStartKey != boundingFiRange.getStartKey() && newStartKey.compareTo(boundingFiRange.getStartKey()) > 0 && newStartKey.compareTo(boundingFiRange.getEndKey()) <= 0) {
nextSeekKey = newStartKey;
// if we hit the seek threshold (i.e. if it is set to 0), seek
if (nextCount >= compositeSeekThreshold)
shouldSeek = true;
}
}
if (shouldSeek) {
source.seek(new Range(nextSeekKey, boundingFiRange.isStartKeyInclusive(), boundingFiRange.getEndKey(), boundingFiRange.isEndKeyInclusive()), EMPTY_CFS, false);
// reset next count and seek key
nextSeekKey = null;
nextCount = 0;
} else {
nextCount++;
source.next();
}
scanned++;
continue;
}
}
// terminate if timed out or cancelled
if (DatawaveFieldIndexCachingIteratorJexl.this.setControl.isCancelledQuery()) {
break;
}
if (addKey(top, source.getTopValue())) {
matched++;
if (!totalResults.increment()) {
throw new DatawaveIvaratorMaxResultsException("Exceeded the maximum set size");
}
}
source.next();
scanned++;
DatawaveFieldIndexCachingIteratorJexl.this.scannedKeys.incrementAndGet();
}
} catch (Exception e) {
// throw the exception up which will be available via the Future
log.error("Failed to complete fillSet(" + boundingFiRange + ")", e);
throw new RuntimeException(e);
} finally {
// return the ivarator source back to the pool.
returnPoolSource(source);
if (log.isDebugEnabled()) {
StringBuilder builder = new StringBuilder();
builder.append("Matched ").append(matched).append(" out of ").append(scanned).append(" for ").append(boundingFiRange).append(": ").append(DatawaveFieldIndexCachingIteratorJexl.this);
log.debug(builder.toString());
}
if (collectTimingDetails && querySpanCollector != null && querySpan != null) {
querySpanCollector.addQuerySpan(querySpan);
}
}
};
return IteratorThreadPoolManager.executeIvarator(runnable, DatawaveFieldIndexCachingIteratorJexl.this + " in " + boundingFiRange, this.initEnv);
}
use of datawave.query.iterator.profile.QuerySpan in project datawave by NationalSecurityAgency.
the class QueryIterator method createDocumentPipeline.
/**
* Create the pipeline. It is very important that this pipeline can handle resetting the bottom iterator with a new value. This means that hasNext() needs
* to call the next iterator. The only state that can be maintained is the next value ready after hasNext() has been called. Once next returns the value,
* the next hasNext() call must call the next iterator again. So for example Iterators.filter() cannot be used as it uses a google commons AbstractIterator
* that maintains an iterator state (failed, ready, done); use statelessFilter above instead.
*
* @param deepSourceCopy
* @param documentSpecificSource
* @return iterator of keys and values
*/
public Iterator<Entry<Key, Document>> createDocumentPipeline(SortedKeyValueIterator<Key, Value> deepSourceCopy, final NestedQueryIterator<Key> documentSpecificSource, Collection<ByteSequence> columnFamilies, boolean inclusive, QuerySpanCollector querySpanCollector) {
QuerySpan trackingSpan = null;
if (gatherTimingDetails()) {
trackingSpan = new QuerySpan(getStatsdClient());
}
if (log.isTraceEnabled()) {
log.trace("createDocumentPipeline");
}
final Function<Entry<Key, Document>, Entry<DocumentData, Document>> docMapper;
if (isFieldIndexSatisfyingQuery()) {
if (log.isTraceEnabled()) {
log.trace("isFieldIndexSatisfyingQuery");
}
docMapper = new Function<Entry<Key, Document>, Entry<DocumentData, Document>>() {
@Nullable
@Override
public Entry<DocumentData, Document> apply(@Nullable Entry<Key, Document> input) {
Entry<DocumentData, Document> entry = null;
if (input != null) {
entry = Maps.immutableEntry(new DocumentData(input.getKey(), Collections.singleton(input.getKey()), Collections.EMPTY_LIST, true), input.getValue());
}
return entry;
}
};
} else {
docMapper = new KeyToDocumentData(deepSourceCopy, myEnvironment, documentOptions, super.equality, getEvaluationFilter(), this.includeHierarchyFields, this.includeHierarchyFields);
}
Iterator<Entry<DocumentData, Document>> sourceIterator = Iterators.transform(documentSpecificSource, from -> {
Entry<Key, Document> entry = Maps.immutableEntry(from, documentSpecificSource.document());
return docMapper.apply(entry);
});
// Take the document Keys and transform it into Entry<Key,Document>,
// removing Attributes for this Document
// which do not fall within the expected time range
Iterator<Entry<Key, Document>> documents = null;
Aggregation a = new Aggregation(this.getTimeFilter(), this.typeMetadataWithNonIndexed, compositeMetadata, this.isIncludeGroupingContext(), this.includeRecordId, this.disableIndexOnlyDocuments(), getEvaluationFilter(), isTrackSizes());
if (gatherTimingDetails()) {
documents = Iterators.transform(sourceIterator, new EvaluationTrackingFunction<>(QuerySpan.Stage.Aggregation, trackingSpan, a));
} else {
documents = Iterators.transform(sourceIterator, a);
}
// Inject the data type as a field if the user requested it
if (this.includeDatatype) {
if (gatherTimingDetails()) {
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.DataTypeAsField, trackingSpan, new DataTypeAsField(this.datatypeKey)));
} else {
documents = Iterators.transform(documents, new DataTypeAsField(this.datatypeKey));
}
}
// Inject the document permutations if required
if (!this.getDocumentPermutations().isEmpty()) {
if (gatherTimingDetails()) {
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.DocumentPermutation, trackingSpan, new DocumentPermutation.DocumentPermutationAggregation(this.getDocumentPermutations())));
} else {
documents = Iterators.transform(documents, new DocumentPermutation.DocumentPermutationAggregation(this.getDocumentPermutations()));
}
}
if (gatherTimingDetails()) {
documents = new EvaluationTrackingIterator(QuerySpan.Stage.DocumentEvaluation, trackingSpan, getEvaluation(documentSpecificSource, deepSourceCopy, documents, compositeMetadata, typeMetadataWithNonIndexed, columnFamilies, inclusive));
} else {
documents = getEvaluation(documentSpecificSource, deepSourceCopy, documents, compositeMetadata, typeMetadataWithNonIndexed, columnFamilies, inclusive);
}
// a hook to allow mapping the document such as with the TLD or Parent
// query logics
// or if the document was not aggregated in the first place because the
// field index fields completely satisfied the query
documents = mapDocument(deepSourceCopy, documents, compositeMetadata);
// apply any configured post processing
documents = getPostProcessingChain(documents);
if (gatherTimingDetails()) {
documents = new EvaluationTrackingIterator(QuerySpan.Stage.PostProcessing, trackingSpan, documents);
}
// Filter out masked values if requested
if (this.filterMaskedValues) {
MaskedValueFilterInterface mvfi = MaskedValueFilterFactory.get(this.isIncludeGroupingContext(), this.isReducedResponse());
if (gatherTimingDetails()) {
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.MaskedValueFilter, trackingSpan, mvfi));
} else {
documents = Iterators.transform(documents, mvfi);
}
}
// now filter the attributes to those with the keep flag set true
if (gatherTimingDetails()) {
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.AttributeKeepFilter, trackingSpan, new AttributeKeepFilter<>()));
} else {
documents = Iterators.transform(documents, new AttributeKeepFilter<>());
}
// Project fields using a whitelist or a blacklist before serialization
if (this.projectResults) {
if (gatherTimingDetails()) {
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.DocumentProjection, trackingSpan, getProjection()));
} else {
documents = Iterators.transform(documents, getProjection());
}
}
// remove the composite entries
documents = Iterators.transform(documents, this.getCompositeProjection());
// projection or visibility filtering)
if (gatherTimingDetails()) {
documents = statelessFilter(documents, new EvaluationTrackingPredicate<>(QuerySpan.Stage.EmptyDocumentFilter, trackingSpan, new EmptyDocumentFilter()));
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.DocumentMetadata, trackingSpan, new DocumentMetadata()));
} else {
documents = statelessFilter(documents, new EmptyDocumentFilter());
documents = Iterators.transform(documents, new DocumentMetadata());
}
if (!this.limitFieldsMap.isEmpty()) {
if (gatherTimingDetails()) {
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.LimitFields, trackingSpan, new LimitFields(this.getLimitFieldsMap())));
} else {
documents = Iterators.transform(documents, new LimitFields(this.getLimitFieldsMap()));
}
}
// do I need to remove the grouping context I added above?
if (groupingContextAddedByMe) {
if (gatherTimingDetails()) {
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.RemoveGroupingContext, trackingSpan, new RemoveGroupingContext()));
} else {
documents = Iterators.transform(documents, new RemoveGroupingContext());
}
}
// only add the pipeline query span collection iterator which will cache metrics with each document if collectTimingDetails is true
if (collectTimingDetails) {
// if there is not a result, then add the trackingSpan to the
// QuerySpanCollector
// if there was a result, then the metrics from the trackingSpan
// will be added here
documents = new PipelineQuerySpanCollectionIterator(querySpanCollector, trackingSpan, documents);
}
return documents;
}
use of datawave.query.iterator.profile.QuerySpan in project datawave by NationalSecurityAgency.
the class FieldIndexOnlyQueryIterator method init.
@Override
public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException {
if (log.isTraceEnabled()) {
log.trace("QueryIterator init()");
}
if (!validateOptions(options)) {
throw new IllegalArgumentException("Could not initialize QueryIterator with " + options);
}
// Parse & flatten the query
try {
script = JexlASTHelper.parseAndFlattenJexlQuery(this.getQuery());
} catch (ParseException e) {
throw new IOException("Could not parse the JEXL query: '" + this.getQuery() + "'", e);
}
this.documentOptions = options;
this.myEnvironment = env;
if (collectTimingDetails) {
trackingSpan = new QuerySpan(getStatsdClient());
this.source = new SourceTrackingIterator(trackingSpan, source);
} else {
this.source = source;
}
this.fiAggregator = new IdentityAggregator(null, null);
this.sourceForDeepCopies = this.source.deepCopy(this.myEnvironment);
}
use of datawave.query.iterator.profile.QuerySpan in project datawave by NationalSecurityAgency.
the class DatawaveFieldIndexCachingIteratorJexl method seek.
@Override
public void seek(Range r, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException {
if (log.isTraceEnabled()) {
log.trace("begin seek, range: " + r);
}
if (!lastRangeSeekedContains(r)) {
// the start of this range is beyond the end of the last range seeked
// we must reset keyValues to null and empty the underlying collection
clearRowBasedHdfsBackedSet();
} else {
// inside the original range, so potentially need to reposition keyValues
if (keys != null) {
Key startKey = r.getStartKey();
// decide if keyValues needs to be rebuilt or can be reused
if (!keys.hasNext() || (keys.peek().compareTo(startKey) > 0)) {
keys = new CachingIterator<>(threadSafeSet.iterator());
}
}
}
// if we are not sorting UIDs, then determine whether we have a cq and capture the lastFiKey
Key lastFiKey = null;
if (!sortedUIDs && r.getStartKey().getColumnFamily().getLength() > 0 && r.getStartKey().getColumnQualifier().getLength() > 0) {
Key startKey = r.getStartKey();
String cq = startKey.getColumnQualifier().toString();
int fieldnameIndex = cq.indexOf('\0');
if (fieldnameIndex >= 0) {
String cf = startKey.getColumnFamily().toString();
lastFiKey = new Key(startKey.getRow().toString(), "fi\0" + cq.substring(0, fieldnameIndex), cq.substring(fieldnameIndex + 1) + '\0' + cf + '\0');
}
}
this.lastRangeSeeked = r;
QuerySpan querySpan = null;
try {
this.fiRow = null;
// this will block until an ivarator source becomes available
final SortedKeyValueIterator<Key, Value> source = takePoolSource();
try {
if (collectTimingDetails && source instanceof SourceTrackingIterator) {
querySpan = ((SourceTrackingIterator) source).getQuerySpan();
}
// seek our underlying source to the start of the incoming range
// expand the range as the underlying table may not actually contain the keys in this range as we are only returning keys
// as specified by the returnKeyType
Range seekRange = new Range(lastRangeSeeked.getStartKey(), lastRangeSeeked.isStartKeyInclusive(), (lastRangeSeeked.getEndKey() == null ? null : new Key(lastRangeSeeked.getEndKey().getRow()).followingKey(PartialKey.ROW)), false);
source.seek(seekRange, EMPTY_CFS, false);
scannedKeys.incrementAndGet();
if (log.isTraceEnabled()) {
try {
log.trace("lastRangeSeeked: " + lastRangeSeeked + " source.getTopKey(): " + source != null ? source.getTopKey() : null);
} catch (Exception ex) {
log.trace("Ignoring this while logging a trace message:", ex);
// let's not ruin everything when trace is on...
}
}
// Determine the bounding FI ranges for the field index for this row
this.boundingFiRanges.clear();
if (source.hasTop()) {
this.fiRow = source.getTopKey().getRow();
this.boundingFiRanges.addAll(buildBoundingFiRanges(fiRow, fiName, fieldValue));
// containing the last value returned. Then modify that range appropriately.
if (lastFiKey != null) {
if (log.isTraceEnabled()) {
log.trace("Reseeking fi to lastFiKey: " + lastFiKey);
}
while (!boundingFiRanges.isEmpty() && !boundingFiRanges.get(0).contains(lastFiKey)) {
if (log.isTraceEnabled()) {
log.trace("Skipping range: " + boundingFiRanges.get(0));
}
boundingFiRanges.remove(0);
if (this.boundingFiRanges.isEmpty()) {
moveToNextRow();
}
}
if (!boundingFiRanges.isEmpty()) {
if (log.isTraceEnabled()) {
log.trace("Starting in range: " + boundingFiRanges.get(0));
}
Range boundingFiRange = boundingFiRanges.get(0);
boundingFiRange = new Range(lastFiKey, false, boundingFiRange.getEndKey(), boundingFiRange.isEndKeyInclusive());
boundingFiRanges.set(0, boundingFiRange);
if (log.isTraceEnabled()) {
log.trace("Reset range to: " + boundingFiRanges.get(0));
}
}
}
} else {
this.topKey = null;
}
} finally {
returnPoolSource(source);
}
// now lets find the top key
if (this.fiRow != null) {
findTop();
}
if (log.isTraceEnabled()) {
log.trace("seek, topKey : " + ((null == topKey) ? "null" : topKey));
}
} finally {
if (collectTimingDetails && querySpanCollector != null && querySpan != null) {
querySpanCollector.addQuerySpan(querySpan);
}
}
}
Aggregations