use of datawave.query.predicate.ChainableEventDataQueryFilter in project datawave by NationalSecurityAgency.
the class TLDQueryIterator method getFIEvaluationFilter.
/**
* Distinct from getEvaluation filter as the FI filter is used to prevent FI hits on nonEventFields that are not indexOnly fields
*
* @return
*/
protected EventDataQueryFilter getFIEvaluationFilter() {
ChainableEventDataQueryFilter chainableEventDataQueryFilter = new ChainableEventDataQueryFilter();
// primary filter on the current filter
chainableEventDataQueryFilter.addFilter(getEvaluationFilter());
// prevent anything that is not an index only field from being kept at the tld level, otherwise allow all
EventDataQueryFilter tldFiFilter = new EventDataQueryFilter() {
@Override
public void startNewDocument(Key documentKey) {
// no-op
}
@Override
public boolean apply(@Nullable Map.Entry<Key, String> var1) {
return true;
}
@Override
public boolean peek(@Nullable Map.Entry<Key, String> var1) {
return true;
}
/**
* Keep any FI that is index only and part of the TLD or is not part of the TLD
*
* @param k
* @return
*/
@Override
public boolean keep(Key k) {
boolean root = TLDEventDataFilter.isRootPointer(k);
DatawaveKey datawaveKey = new DatawaveKey(k);
return (root && getIndexOnlyFields().contains(datawaveKey.getFieldName())) || !root;
}
@Override
public Key getStartKey(Key from) {
throw new UnsupportedOperationException();
}
@Override
public Key getStopKey(Key from) {
throw new UnsupportedOperationException();
}
@Override
public Range getKeyRange(Map.Entry<Key, Document> from) {
throw new UnsupportedOperationException();
}
@Override
public EventDataQueryFilter clone() {
return this;
}
@Override
public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
throw new UnsupportedOperationException();
}
@Override
public int getMaxNextCount() {
return -1;
}
@Override
public Key transform(Key toTransform) {
return null;
}
};
chainableEventDataQueryFilter.addFilter(tldFiFilter);
return chainableEventDataQueryFilter;
}
use of datawave.query.predicate.ChainableEventDataQueryFilter in project datawave by NationalSecurityAgency.
the class IteratorBuildingVisitor method createWrappedTermFrequencyFilter.
protected ChainableEventDataQueryFilter createWrappedTermFrequencyFilter(String identifier, JexlNode node, EventDataQueryFilter existing) {
// combine index only and term frequency to create non-event fields
final Set<String> nonEventFields = new HashSet<>(indexOnlyFields.size() + termFrequencyFields.size());
nonEventFields.addAll(indexOnlyFields);
nonEventFields.addAll(termFrequencyFields);
EventDataQueryFilter expressionFilter = new EventDataQueryExpressionFilter(node, typeMetadata, nonEventFields) {
@Override
public boolean keep(Key key) {
// for things that will otherwise be added need to ensure its actually a value match. This is necessary when dealing with TF ranges.
return peek(key);
}
};
ChainableEventDataQueryFilter chainableFilter = new ChainableEventDataQueryFilter();
if (existing != null) {
chainableFilter.addFilter(existing);
}
chainableFilter.addFilter(expressionFilter);
return chainableFilter;
}
use of datawave.query.predicate.ChainableEventDataQueryFilter in project datawave by NationalSecurityAgency.
the class IteratorBuildingVisitor method buildExceededFromTermFrequency.
/**
* @param data
*/
private NestedIterator<Key> buildExceededFromTermFrequency(String identifier, JexlNode rootNode, JexlNode sourceNode, LiteralRange<?> range, Object data) {
if (limitLookup) {
ChainableEventDataQueryFilter wrapped = createWrappedTermFrequencyFilter(identifier, sourceNode, attrFilter);
NestedIterator<Key> eventFieldIterator = new EventFieldIterator(rangeLimiter, source.deepCopy(env), identifier, new AttributeFactory(this.typeMetadata), getEventFieldAggregator(identifier, wrapped));
TermFrequencyIndexBuilder builder = new TermFrequencyIndexBuilder();
builder.setSource(source.deepCopy(env));
builder.setTypeMetadata(typeMetadata);
builder.setFieldsToAggregate(fieldsToAggregate);
builder.setTimeFilter(timeFilter);
builder.setAttrFilter(attrFilter);
builder.setDatatypeFilter(datatypeFilter);
builder.setEnv(env);
builder.setTermFrequencyAggregator(getTermFrequencyAggregator(identifier, sourceNode, attrFilter, attrFilter != null ? attrFilter.getMaxNextCount() : -1));
builder.setNode(rootNode);
Range fiRange = getFiRangeForTF(range);
builder.setRange(fiRange);
builder.setField(identifier);
NestedIterator<Key> tfIterator = builder.build();
OrIterator tfMerge = new OrIterator(Arrays.asList(tfIterator, eventFieldIterator));
return tfMerge;
} else {
QueryException qe = new QueryException(DatawaveErrorCode.UNEXPECTED_SOURCE_NODE, MessageFormat.format("{0}", "buildExceededFromTermFrequency"));
throw new DatawaveFatalQueryException(qe);
}
}
use of datawave.query.predicate.ChainableEventDataQueryFilter in project datawave by NationalSecurityAgency.
the class TLDIndexBuildingVisitor method buildTermFrequencyAggregator.
/**
* Use fieldsToAggregate instead of indexOnlyFields because this enables TLDs to return non-event tokens as part of the user document
*
* @param filter
* @param maxNextCount
* @return
*/
@Override
protected TermFrequencyAggregator buildTermFrequencyAggregator(String identifier, ChainableEventDataQueryFilter filter, int maxNextCount) {
EventDataQueryFilter rootFilter = new EventDataQueryFilter() {
@Override
public void startNewDocument(Key documentKey) {
// no-op
}
@Override
public boolean apply(@Nullable Entry<Key, String> var1) {
// accept all
return true;
}
@Override
public boolean peek(@Nullable Entry<Key, String> var1) {
// accept all
return true;
}
/**
* Only keep the tf key if it isn't the root pointer or if it is index only and contributes to document evaluation
*
* @param k
* @return
*/
@Override
public boolean keep(Key k) {
DatawaveKey key = new DatawaveKey(k);
return (!TLDEventDataFilter.isRootPointer(k) || indexOnlyFields.contains(key.getFieldName())) && attrFilter.peek(new AbstractMap.SimpleEntry(k, null));
}
@Override
public Key getStartKey(Key from) {
throw new UnsupportedOperationException();
}
@Override
public Key getStopKey(Key from) {
throw new UnsupportedOperationException();
}
@Override
public Range getKeyRange(Entry<Key, Document> from) {
throw new UnsupportedOperationException();
}
@Override
public EventDataQueryFilter clone() {
return this;
}
@Override
public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
throw new UnsupportedOperationException();
}
@Override
public int getMaxNextCount() {
return -1;
}
@Override
public Key transform(Key toTransform) {
throw new UnsupportedOperationException();
}
};
filter.addFilter(rootFilter);
Set<String> toAggregate = fieldsToAggregate.contains(identifier) ? Collections.singleton(identifier) : Collections.emptySet();
return new TLDTermFrequencyAggregator(toAggregate, filter, filter.getMaxNextCount());
}
Aggregations