use of datawave.query.iterator.aggregation.DocumentData in project datawave by NationalSecurityAgency.
the class KeyToDocumentData method apply.
@Override
public Entry<DocumentData, Document> apply(Entry<Key, Document> from) {
// We want to ensure that we have a non-empty colqual
if (null == from || null == from.getKey() || null == from.getValue())
return null;
Range keyRange = getKeyRange(from);
try {
source.seek(keyRange, columnFamilies, inclusive);
if (log.isDebugEnabled())
log.debug(source.hasTop() + " Key range is " + keyRange);
// Assign only once for
final List<Entry<Key, Value>> attrs;
// efficiency
final Set<Key> docKeys = new HashSet<>();
if (source.hasTop()) {
attrs = this.collectDocumentAttributes(from.getKey(), docKeys, keyRange);
this.appendHierarchyFields(attrs, keyRange, from.getKey());
} else {
attrs = Collections.emptyList();
}
return Maps.immutableEntry(new DocumentData(from.getKey(), docKeys, attrs, false), from.getValue());
} catch (IOException e) {
log.error("Unable to collection document attributes for evaluation: " + keyRange, e);
QueryException qe = new QueryException(DatawaveErrorCode.DOCUMENT_EVALUATION_ERROR, e);
throw new DatawaveFatalQueryException(qe);
}
}
use of datawave.query.iterator.aggregation.DocumentData in project datawave by NationalSecurityAgency.
the class IndexOnlyKeyToDocumentData method next.
@Override
public Entry<DocumentData, Document> next() {
final Entry<Key, Value> next;
try {
next = this.seekNext(false);
} catch (IOException e) {
QueryException qe = new QueryException(DatawaveErrorCode.SEEK_NEXT_ELEMENT_ERROR, e);
throw new DatawaveFatalQueryException(qe);
}
final Entry<DocumentData, Document> entry;
if (null != next) {
final List<Entry<Key, Value>> keyValues = new LinkedList<>();
keyValues.add(next);
Key docKey = getDocKey(next.getKey());
final DocumentData documentData = new DocumentData(this.iteratorDocumentKey, Collections.singleton(docKey), keyValues, true);
entry = Maps.immutableEntry(documentData, this.iteratorDocument);
} else if (next == ITERATOR_COMPLETE_KEY) {
QueryException qe = new QueryException(DatawaveErrorCode.FETCH_NEXT_ELEMENT_ERROR, MessageFormat.format("Fieldname: {0}, Range: {1}", this.fieldName, this.parent));
throw (NoSuchElementException) (new NoSuchElementException().initCause(qe));
} else {
entry = null;
}
return entry;
}
use of datawave.query.iterator.aggregation.DocumentData in project datawave by NationalSecurityAgency.
the class QueryIterator method createDocumentPipeline.
/**
* Create the pipeline. It is very important that this pipeline can handle resetting the bottom iterator with a new value. This means that hasNext() needs
* to call the next iterator. The only state that can be maintained is the next value ready after hasNext() has been called. Once next returns the value,
* the next hasNext() call must call the next iterator again. So for example Iterators.filter() cannot be used as it uses a google commons AbstractIterator
* that maintains an iterator state (failed, ready, done); use statelessFilter above instead.
*
* @param deepSourceCopy
* @param documentSpecificSource
* @return iterator of keys and values
*/
public Iterator<Entry<Key, Document>> createDocumentPipeline(SortedKeyValueIterator<Key, Value> deepSourceCopy, final NestedQueryIterator<Key> documentSpecificSource, Collection<ByteSequence> columnFamilies, boolean inclusive, QuerySpanCollector querySpanCollector) {
QuerySpan trackingSpan = null;
if (gatherTimingDetails()) {
trackingSpan = new QuerySpan(getStatsdClient());
}
if (log.isTraceEnabled()) {
log.trace("createDocumentPipeline");
}
final Function<Entry<Key, Document>, Entry<DocumentData, Document>> docMapper;
if (isFieldIndexSatisfyingQuery()) {
if (log.isTraceEnabled()) {
log.trace("isFieldIndexSatisfyingQuery");
}
docMapper = new Function<Entry<Key, Document>, Entry<DocumentData, Document>>() {
@Nullable
@Override
public Entry<DocumentData, Document> apply(@Nullable Entry<Key, Document> input) {
Entry<DocumentData, Document> entry = null;
if (input != null) {
entry = Maps.immutableEntry(new DocumentData(input.getKey(), Collections.singleton(input.getKey()), Collections.EMPTY_LIST, true), input.getValue());
}
return entry;
}
};
} else {
docMapper = new KeyToDocumentData(deepSourceCopy, myEnvironment, documentOptions, super.equality, getEvaluationFilter(), this.includeHierarchyFields, this.includeHierarchyFields);
}
Iterator<Entry<DocumentData, Document>> sourceIterator = Iterators.transform(documentSpecificSource, from -> {
Entry<Key, Document> entry = Maps.immutableEntry(from, documentSpecificSource.document());
return docMapper.apply(entry);
});
// Take the document Keys and transform it into Entry<Key,Document>,
// removing Attributes for this Document
// which do not fall within the expected time range
Iterator<Entry<Key, Document>> documents = null;
Aggregation a = new Aggregation(this.getTimeFilter(), this.typeMetadataWithNonIndexed, compositeMetadata, this.isIncludeGroupingContext(), this.includeRecordId, this.disableIndexOnlyDocuments(), getEvaluationFilter(), isTrackSizes());
if (gatherTimingDetails()) {
documents = Iterators.transform(sourceIterator, new EvaluationTrackingFunction<>(QuerySpan.Stage.Aggregation, trackingSpan, a));
} else {
documents = Iterators.transform(sourceIterator, a);
}
// Inject the data type as a field if the user requested it
if (this.includeDatatype) {
if (gatherTimingDetails()) {
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.DataTypeAsField, trackingSpan, new DataTypeAsField(this.datatypeKey)));
} else {
documents = Iterators.transform(documents, new DataTypeAsField(this.datatypeKey));
}
}
// Inject the document permutations if required
if (!this.getDocumentPermutations().isEmpty()) {
if (gatherTimingDetails()) {
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.DocumentPermutation, trackingSpan, new DocumentPermutation.DocumentPermutationAggregation(this.getDocumentPermutations())));
} else {
documents = Iterators.transform(documents, new DocumentPermutation.DocumentPermutationAggregation(this.getDocumentPermutations()));
}
}
if (gatherTimingDetails()) {
documents = new EvaluationTrackingIterator(QuerySpan.Stage.DocumentEvaluation, trackingSpan, getEvaluation(documentSpecificSource, deepSourceCopy, documents, compositeMetadata, typeMetadataWithNonIndexed, columnFamilies, inclusive));
} else {
documents = getEvaluation(documentSpecificSource, deepSourceCopy, documents, compositeMetadata, typeMetadataWithNonIndexed, columnFamilies, inclusive);
}
// a hook to allow mapping the document such as with the TLD or Parent
// query logics
// or if the document was not aggregated in the first place because the
// field index fields completely satisfied the query
documents = mapDocument(deepSourceCopy, documents, compositeMetadata);
// apply any configured post processing
documents = getPostProcessingChain(documents);
if (gatherTimingDetails()) {
documents = new EvaluationTrackingIterator(QuerySpan.Stage.PostProcessing, trackingSpan, documents);
}
// Filter out masked values if requested
if (this.filterMaskedValues) {
MaskedValueFilterInterface mvfi = MaskedValueFilterFactory.get(this.isIncludeGroupingContext(), this.isReducedResponse());
if (gatherTimingDetails()) {
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.MaskedValueFilter, trackingSpan, mvfi));
} else {
documents = Iterators.transform(documents, mvfi);
}
}
// now filter the attributes to those with the keep flag set true
if (gatherTimingDetails()) {
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.AttributeKeepFilter, trackingSpan, new AttributeKeepFilter<>()));
} else {
documents = Iterators.transform(documents, new AttributeKeepFilter<>());
}
// Project fields using a whitelist or a blacklist before serialization
if (this.projectResults) {
if (gatherTimingDetails()) {
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.DocumentProjection, trackingSpan, getProjection()));
} else {
documents = Iterators.transform(documents, getProjection());
}
}
// remove the composite entries
documents = Iterators.transform(documents, this.getCompositeProjection());
// projection or visibility filtering)
if (gatherTimingDetails()) {
documents = statelessFilter(documents, new EvaluationTrackingPredicate<>(QuerySpan.Stage.EmptyDocumentFilter, trackingSpan, new EmptyDocumentFilter()));
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.DocumentMetadata, trackingSpan, new DocumentMetadata()));
} else {
documents = statelessFilter(documents, new EmptyDocumentFilter());
documents = Iterators.transform(documents, new DocumentMetadata());
}
if (!this.limitFieldsMap.isEmpty()) {
if (gatherTimingDetails()) {
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.LimitFields, trackingSpan, new LimitFields(this.getLimitFieldsMap())));
} else {
documents = Iterators.transform(documents, new LimitFields(this.getLimitFieldsMap()));
}
}
// do I need to remove the grouping context I added above?
if (groupingContextAddedByMe) {
if (gatherTimingDetails()) {
documents = Iterators.transform(documents, new EvaluationTrackingFunction<>(QuerySpan.Stage.RemoveGroupingContext, trackingSpan, new RemoveGroupingContext()));
} else {
documents = Iterators.transform(documents, new RemoveGroupingContext());
}
}
// only add the pipeline query span collection iterator which will cache metrics with each document if collectTimingDetails is true
if (collectTimingDetails) {
// if there is not a result, then add the trackingSpan to the
// QuerySpanCollector
// if there was a result, then the metrics from the trackingSpan
// will be added here
documents = new PipelineQuerySpanCollectionIterator(querySpanCollector, trackingSpan, documents);
}
return documents;
}
use of datawave.query.iterator.aggregation.DocumentData in project datawave by NationalSecurityAgency.
the class Aggregation method apply.
@Override
public Entry<Key, Document> apply(Entry<DocumentData, Document> from) {
DocumentData docData = from.getKey();
// set the document context on the attribute filter
if (attrFilter != null) {
attrFilter.startNewDocument(docData.getKey());
}
// Only load attributes for this document that fall within the expected date range
Document d = new Document(docData.getKey(), docData.getDocKeys(), docData.isFromIndex(), Iterators.filter(docData.getData().iterator(), timeFilter.getKeyValueTimeFilter()), this.typeMetadata, this.compositeMetadata, this.includeGroupingContext, this.includeRecordId, this.attrFilter, true, trackSizes);
if (log.isTraceEnabled()) {
log.trace("disable index only docs? " + disableIndexOnlyDocuments + " , size is " + d.size());
}
if (null != from.getValue() && from.getValue().size() > 0 && (!disableIndexOnlyDocuments || d.size() > 0)) {
d.putAll(from.getValue(), this.includeGroupingContext);
}
Key origKey = docData.getKey();
if (log.isTraceEnabled()) {
log.trace("Computed document for " + origKey + ": " + d);
}
return Maps.immutableEntry(origKey, d);
}
use of datawave.query.iterator.aggregation.DocumentData in project datawave by NationalSecurityAgency.
the class DynamicFacetIterator method getDocumentIterator.
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public Iterator<Entry<Key, Document>> getDocumentIterator(Range range, Collection<ByteSequence> columnFamilies, boolean inclusive) throws IOException, ConfigException, InstantiationException, IllegalAccessException {
// Otherwise, we have to use the field index
// Seek() the boolean logic stuff
createAndSeekIndexIterator(range, columnFamilies, inclusive);
Function<Entry<Key, Document>, Entry<DocumentData, Document>> keyToDoc = null;
// TODO consider using the new EventDataQueryExpressionFilter
EventDataQueryFieldFilter projection = null;
Iterator<Entry<Key, Document>> documents = null;
if (!configuration.getFacetedFields().isEmpty()) {
projection = new EventDataQueryFieldFilter();
projection.initializeWhitelist(configuration.getFacetedFields());
}
if (!configuration.hasFieldLimits() || projection != null) {
keyToDoc = new KeyToDocumentData(source.deepCopy(myEnvironment), super.equality, projection, this.includeHierarchyFields, this.includeHierarchyFields);
}
AccumuloTreeIterable<Key, DocumentData> doc = null;
if (null != keyToDoc) {
doc = new AccumuloTreeIterable<>(fieldIndexResults.tree, keyToDoc);
} else {
if (log.isTraceEnabled()) {
log.trace("Skipping document lookup, because we don't need it");
}
doc = new AccumuloTreeIterable<>(fieldIndexResults.tree, new Function<Entry<Key, Document>, Entry<DocumentData, Document>>() {
@Override
@Nullable
public Entry<DocumentData, Document> apply(@Nullable Entry<Key, Document> input) {
Set<Key> docKeys = Sets.newHashSet();
List<Entry<Key, Value>> attrs = Lists.newArrayList();
return Maps.immutableEntry(new DocumentData(input.getKey(), docKeys, attrs, true), input.getValue());
}
});
}
doc.seek(range, columnFamilies, inclusive);
TypeMetadata typeMetadata = this.getTypeMetadata();
documents = Iterators.transform(doc.iterator(), new Aggregation(this.getTimeFilter(), typeMetadata, compositeMetadata, this.isIncludeGroupingContext(), this.includeRecordId, false, null));
switch(configuration.getType()) {
case SHARD_COUNT:
case DAY_COUNT:
SortedKeyValueIterator<Key, Value> sourceDeepCopy = source.deepCopy(myEnvironment);
documents = getEvaluation(sourceDeepCopy, documents, compositeMetadata, typeMetadata, columnFamilies, inclusive);
// Take the document Keys and transform it into Entry<Key,Document>, removing Attributes for this Document
// which do not fall within the expected time range
documents = Iterators.transform(documents, new DocumentCountCardinality(configuration.getType(), !merge));
default:
break;
}
return documents;
}
Aggregations