Search in sources :

Example 1 with EventDataQueryFilter

use of datawave.query.predicate.EventDataQueryFilter in project datawave by NationalSecurityAgency.

the class TLDQueryIterator method getFIEvaluationFilter.

/**
 * Distinct from getEvaluation filter as the FI filter is used to prevent FI hits on nonEventFields that are not indexOnly fields
 *
 * @return
 */
protected EventDataQueryFilter getFIEvaluationFilter() {
    ChainableEventDataQueryFilter chainableEventDataQueryFilter = new ChainableEventDataQueryFilter();
    // primary filter on the current filter
    chainableEventDataQueryFilter.addFilter(getEvaluationFilter());
    // prevent anything that is not an index only field from being kept at the tld level, otherwise allow all
    EventDataQueryFilter tldFiFilter = new EventDataQueryFilter() {

        @Override
        public void startNewDocument(Key documentKey) {
        // no-op
        }

        @Override
        public boolean apply(@Nullable Map.Entry<Key, String> var1) {
            return true;
        }

        @Override
        public boolean peek(@Nullable Map.Entry<Key, String> var1) {
            return true;
        }

        /**
         * Keep any FI that is index only and part of the TLD or is not part of the TLD
         *
         * @param k
         * @return
         */
        @Override
        public boolean keep(Key k) {
            boolean root = TLDEventDataFilter.isRootPointer(k);
            DatawaveKey datawaveKey = new DatawaveKey(k);
            return (root && getIndexOnlyFields().contains(datawaveKey.getFieldName())) || !root;
        }

        @Override
        public Key getStartKey(Key from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Key getStopKey(Key from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Range getKeyRange(Map.Entry<Key, Document> from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public EventDataQueryFilter clone() {
            return this;
        }

        @Override
        public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
            throw new UnsupportedOperationException();
        }

        @Override
        public int getMaxNextCount() {
            return -1;
        }

        @Override
        public Key transform(Key toTransform) {
            return null;
        }
    };
    chainableEventDataQueryFilter.addFilter(tldFiFilter);
    return chainableEventDataQueryFilter;
}
Also used : ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) Nullable(javax.annotation.Nullable)

Example 2 with EventDataQueryFilter

use of datawave.query.predicate.EventDataQueryFilter in project datawave by NationalSecurityAgency.

the class IteratorBuildingVisitor method createWrappedTermFrequencyFilter.

protected ChainableEventDataQueryFilter createWrappedTermFrequencyFilter(String identifier, JexlNode node, EventDataQueryFilter existing) {
    // combine index only and term frequency to create non-event fields
    final Set<String> nonEventFields = new HashSet<>(indexOnlyFields.size() + termFrequencyFields.size());
    nonEventFields.addAll(indexOnlyFields);
    nonEventFields.addAll(termFrequencyFields);
    EventDataQueryFilter expressionFilter = new EventDataQueryExpressionFilter(node, typeMetadata, nonEventFields) {

        @Override
        public boolean keep(Key key) {
            // for things that will otherwise be added need to ensure its actually a value match. This is necessary when dealing with TF ranges.
            return peek(key);
        }
    };
    ChainableEventDataQueryFilter chainableFilter = new ChainableEventDataQueryFilter();
    if (existing != null) {
        chainableFilter.addFilter(existing);
    }
    chainableFilter.addFilter(expressionFilter);
    return chainableFilter;
}
Also used : EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) EventDataQueryExpressionFilter(datawave.query.predicate.EventDataQueryExpressionFilter) ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) HashSet(java.util.HashSet)

Example 3 with EventDataQueryFilter

use of datawave.query.predicate.EventDataQueryFilter in project datawave by NationalSecurityAgency.

the class TermFrequencyAggregatorTest method apply_buildDocNotKeep.

@Test
public void apply_buildDocNotKeep() throws IOException {
    Document doc = new Document();
    AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
    TreeMap<Key, Value> treeMap = Maps.newTreeMap();
    treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
    treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
    SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
    itr.seek(new Range(), null, true);
    Set<String> keepFields = new HashSet<>();
    keepFields.add("FIELD2");
    EventDataQueryFilter filter = new EventDataQueryFieldFilter();
    Set<String> blacklist = new HashSet<>();
    blacklist.add("FIELD1");
    ((EventDataQueryFieldFilter) filter).initializeBlacklist(blacklist);
    aggregator = new TermFrequencyAggregator(keepFields, filter, -1);
    Key result = aggregator.apply(itr, doc, attributeFactory);
    // test result key
    assertTrue(result == null);
    // test that the doc is empty
    assertTrue(doc.size() == 0);
    // test that the iterator is in the correct position
    assertTrue(itr.hasTop());
    assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) AttributeFactory(datawave.query.attributes.AttributeFactory) Document(datawave.query.attributes.Document) SortedMapIterator(org.apache.accumulo.core.iterators.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) EventDataQueryFieldFilter(datawave.query.predicate.EventDataQueryFieldFilter) Value(org.apache.accumulo.core.data.Value) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 4 with EventDataQueryFilter

use of datawave.query.predicate.EventDataQueryFilter in project datawave by NationalSecurityAgency.

the class TermFrequencyAggregatorTest method apply_buildDocKeepFilteredOut.

@Test
public void apply_buildDocKeepFilteredOut() throws IOException, ParseException {
    Document doc = new Document();
    AttributeFactory attributeFactory = new AttributeFactory(new TypeMetadata());
    TreeMap<Key, Value> treeMap = Maps.newTreeMap();
    treeMap.put(getTF("123", "FIELD1", "VALUE1", "dataType1", "123.345.456", 10), new Value());
    treeMap.put(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10), new Value());
    SortedKeyValueIterator<Key, Value> itr = new SortedMapIterator(treeMap);
    itr.seek(new Range(), null, true);
    Set<String> keepFields = new HashSet<>();
    keepFields.add("FIELD2");
    EventDataQueryFilter filter = new EventDataQueryFieldFilter(JexlASTHelper.parseJexlQuery("FIELD2 == 'VALUE1'"), Collections.EMPTY_SET);
    aggregator = new TermFrequencyAggregator(keepFields, filter, -1);
    Key result = aggregator.apply(itr, doc, attributeFactory);
    // test result key
    assertTrue(result == null);
    // test that the doc is empty
    assertTrue(doc.size() == 0);
    // test that the iterator is in the correct position
    assertTrue(itr.hasTop());
    assertTrue(itr.getTopKey().equals(getTF("123", "NEXT_DOC_FIELD", "VALUE1", "dataType1", "124.345.456", 10)));
}
Also used : TypeMetadata(datawave.query.util.TypeMetadata) EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) AttributeFactory(datawave.query.attributes.AttributeFactory) Document(datawave.query.attributes.Document) SortedMapIterator(org.apache.accumulo.core.iterators.SortedMapIterator) Range(org.apache.accumulo.core.data.Range) EventDataQueryFieldFilter(datawave.query.predicate.EventDataQueryFieldFilter) Value(org.apache.accumulo.core.data.Value) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 5 with EventDataQueryFilter

use of datawave.query.predicate.EventDataQueryFilter in project datawave by NationalSecurityAgency.

the class TLDIndexBuildingVisitor method buildTermFrequencyAggregator.

/**
 * Use fieldsToAggregate instead of indexOnlyFields because this enables TLDs to return non-event tokens as part of the user document
 *
 * @param filter
 * @param maxNextCount
 * @return
 */
@Override
protected TermFrequencyAggregator buildTermFrequencyAggregator(String identifier, ChainableEventDataQueryFilter filter, int maxNextCount) {
    EventDataQueryFilter rootFilter = new EventDataQueryFilter() {

        @Override
        public void startNewDocument(Key documentKey) {
        // no-op
        }

        @Override
        public boolean apply(@Nullable Entry<Key, String> var1) {
            // accept all
            return true;
        }

        @Override
        public boolean peek(@Nullable Entry<Key, String> var1) {
            // accept all
            return true;
        }

        /**
         * Only keep the tf key if it isn't the root pointer or if it is index only and contributes to document evaluation
         *
         * @param k
         * @return
         */
        @Override
        public boolean keep(Key k) {
            DatawaveKey key = new DatawaveKey(k);
            return (!TLDEventDataFilter.isRootPointer(k) || indexOnlyFields.contains(key.getFieldName())) && attrFilter.peek(new AbstractMap.SimpleEntry(k, null));
        }

        @Override
        public Key getStartKey(Key from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Key getStopKey(Key from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Range getKeyRange(Entry<Key, Document> from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public EventDataQueryFilter clone() {
            return this;
        }

        @Override
        public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
            throw new UnsupportedOperationException();
        }

        @Override
        public int getMaxNextCount() {
            return -1;
        }

        @Override
        public Key transform(Key toTransform) {
            throw new UnsupportedOperationException();
        }
    };
    filter.addFilter(rootFilter);
    Set<String> toAggregate = fieldsToAggregate.contains(identifier) ? Collections.singleton(identifier) : Collections.emptySet();
    return new TLDTermFrequencyAggregator(toAggregate, filter, filter.getMaxNextCount());
}
Also used : EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) Entry(java.util.Map.Entry) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) Nullable(javax.annotation.Nullable)

Aggregations

EventDataQueryFilter (datawave.query.predicate.EventDataQueryFilter)10 Key (org.apache.accumulo.core.data.Key)10 DatawaveKey (datawave.query.data.parsers.DatawaveKey)8 HashSet (java.util.HashSet)8 AttributeFactory (datawave.query.attributes.AttributeFactory)7 Document (datawave.query.attributes.Document)7 TypeMetadata (datawave.query.util.TypeMetadata)7 Range (org.apache.accumulo.core.data.Range)7 Value (org.apache.accumulo.core.data.Value)7 SortedMapIterator (org.apache.accumulo.core.iterators.SortedMapIterator)7 Test (org.junit.Test)7 EventDataQueryFieldFilter (datawave.query.predicate.EventDataQueryFieldFilter)6 ChainableEventDataQueryFilter (datawave.query.predicate.ChainableEventDataQueryFilter)3 ArrayList (java.util.ArrayList)2 Nullable (javax.annotation.Nullable)2 PartialKey (org.apache.accumulo.core.data.PartialKey)2 Attribute (datawave.query.attributes.Attribute)1 TypeAttribute (datawave.query.attributes.TypeAttribute)1 EventDataQueryExpressionFilter (datawave.query.predicate.EventDataQueryExpressionFilter)1 Entry (java.util.Map.Entry)1