use of datawave.query.predicate.EventDataQueryFilter in project datawave by NationalSecurityAgency.
the class TLDQueryIterator method getFIEvaluationFilter.
/**
* Distinct from getEvaluation filter as the FI filter is used to prevent FI hits on nonEventFields that are not indexOnly fields
*
* @return
*/
protected EventDataQueryFilter getFIEvaluationFilter() {
ChainableEventDataQueryFilter chainableEventDataQueryFilter = new ChainableEventDataQueryFilter();
// primary filter on the current filter
chainableEventDataQueryFilter.addFilter(getEvaluationFilter());
// prevent anything that is not an index only field from being kept at the tld level, otherwise allow all
EventDataQueryFilter tldFiFilter = new EventDataQueryFilter() {
@Override
public void startNewDocument(Key documentKey) {
// no-op
}
@Override
public boolean apply(@Nullable Map.Entry<Key, String> var1) {
return true;
}
@Override
public boolean peek(@Nullable Map.Entry<Key, String> var1) {
return true;
}
/**
* Keep any FI that is index only and part of the TLD or is not part of the TLD
*
* @param k
* @return
*/
@Override
public boolean keep(Key k) {
boolean root = TLDEventDataFilter.isRootPointer(k);
DatawaveKey datawaveKey = new DatawaveKey(k);
return (root && getIndexOnlyFields().contains(datawaveKey.getFieldName())) || !root;
}
@Override
public Key getStartKey(Key from) {
throw new UnsupportedOperationException();
}
@Override
public Key getStopKey(Key from) {
throw new UnsupportedOperationException();
}
@Override
public Range getKeyRange(Map.Entry<Key, Document> from) {
throw new UnsupportedOperationException();
}
@Override
public EventDataQueryFilter clone() {
return this;
}
@Override
public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
throw new UnsupportedOperationException();
}
@Override
public int getMaxNextCount() {
return -1;
}
@Override
public Key transform(Key toTransform) {
return null;
}
};
chainableEventDataQueryFilter.addFilter(tldFiFilter);
return chainableEventDataQueryFilter;
}
use of datawave.query.predicate.EventDataQueryFilter in project datawave by NationalSecurityAgency.
the class IteratorBuildingVisitor method createWrappedTermFrequencyFilter.
protected ChainableEventDataQueryFilter createWrappedTermFrequencyFilter(String identifier, JexlNode node, EventDataQueryFilter existing) {
// combine index only and term frequency to create non-event fields
final Set<String> nonEventFields = new HashSet<>(indexOnlyFields.size() + termFrequencyFields.size());
nonEventFields.addAll(indexOnlyFields);
nonEventFields.addAll(termFrequencyFields);
EventDataQueryFilter expressionFilter = new EventDataQueryExpressionFilter(node, typeMetadata, nonEventFields) {
@Override
public boolean keep(Key key) {
// for things that will otherwise be added need to ensure its actually a value match. This is necessary when dealing with TF ranges.
return peek(key);
}
};
ChainableEventDataQueryFilter chainableFilter = new ChainableEventDataQueryFilter();
if (existing != null) {
chainableFilter.addFilter(existing);
}
chainableFilter.addFilter(expressionFilter);
return chainableFilter;
}
use of datawave.query.predicate.EventDataQueryFilter in project datawave by NationalSecurityAgency.
the class TermFrequencyAggregatorTest method apply_buildDocNotKeep.
@Test
public void apply_buildDocNotKeep() throws IOException {
Document doc = new Document();
AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
TreeMap<Key, Value> treeMap = Maps.newTreeMap();
treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
itr.seek(new Range(), null, true);
Set<String> keepFields = new HashSet<>();
keepFields.add("FIELD2");
EventDataQueryFilter filter = new EventDataQueryFieldFilter();
Set<String> blacklist = new HashSet<>();
blacklist.add("FIELD1");
((EventDataQueryFieldFilter) filter).initializeBlacklist(blacklist);
aggregator = new TermFrequencyAggregator(keepFields, filter, -1);
Key result = aggregator.apply(itr, doc, attributeFactory);
// test result key
assertTrue(result == null);
// test that the doc is empty
assertTrue(doc.size() == 0);
// test that the iterator is in the correct position
assertTrue(itr.hasTop());
assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
use of datawave.query.predicate.EventDataQueryFilter in project datawave by NationalSecurityAgency.
the class TermFrequencyAggregatorTest method apply_buildDocKeepFilteredOut.
@Test
public void apply_buildDocKeepFilteredOut() throws IOException, ParseException {
Document doc = new Document();
AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
TreeMap<Key, Value> treeMap = Maps.newTreeMap();
treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
itr.seek(new Range(), null, true);
Set<String> keepFields = new HashSet<>();
keepFields.add("FIELD2");
EventDataQueryFilter filter = new EventDataQueryFieldFilter(JexlASTHelper.parseJexlQuery("FIELD2 == 'VALUE1'"), Collections.EMPTY_SET);
aggregator = new TermFrequencyAggregator(keepFields, filter, -1);
Key result = aggregator.apply(itr, doc, attributeFactory);
// test result key
assertTrue(result == null);
// test that the doc is empty
assertTrue(doc.size() == 0);
// test that the iterator is in the correct position
assertTrue(itr.hasTop());
assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
use of datawave.query.predicate.EventDataQueryFilter in project datawave by NationalSecurityAgency.
the class TLDIndexBuildingVisitor method buildTermFrequencyAggregator.
/**
* Use fieldsToAggregate instead of indexOnlyFields because this enables TLDs to return non-event tokens as part of the user document
*
* @param filter
* @param maxNextCount
* @return
*/
@Override
protected TermFrequencyAggregator buildTermFrequencyAggregator(String identifier, ChainableEventDataQueryFilter filter, int maxNextCount) {
EventDataQueryFilter rootFilter = new EventDataQueryFilter() {
@Override
public void startNewDocument(Key documentKey) {
// no-op
}
@Override
public boolean apply(@Nullable Entry<Key, String> var1) {
// accept all
return true;
}
@Override
public boolean peek(@Nullable Entry<Key, String> var1) {
// accept all
return true;
}
/**
* Only keep the tf key if it isn't the root pointer or if it is index only and contributes to document evaluation
*
* @param k
* @return
*/
@Override
public boolean keep(Key k) {
DatawaveKey key = new DatawaveKey(k);
return (!TLDEventDataFilter.isRootPointer(k) || indexOnlyFields.contains(key.getFieldName())) && attrFilter.peek(new AbstractMap.SimpleEntry(k, null));
}
@Override
public Key getStartKey(Key from) {
throw new UnsupportedOperationException();
}
@Override
public Key getStopKey(Key from) {
throw new UnsupportedOperationException();
}
@Override
public Range getKeyRange(Entry<Key, Document> from) {
throw new UnsupportedOperationException();
}
@Override
public EventDataQueryFilter clone() {
return this;
}
@Override
public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
throw new UnsupportedOperationException();
}
@Override
public int getMaxNextCount() {
return -1;
}
@Override
public Key transform(Key toTransform) {
throw new UnsupportedOperationException();
}
};
filter.addFilter(rootFilter);
Set<String> toAggregate = fieldsToAggregate.contains(identifier) ? Collections.singleton(identifier) : Collections.emptySet();
return new TLDTermFrequencyAggregator(toAggregate, filter, filter.getMaxNextCount());
}
Aggregations