Search in sources :

Example 1 with ChainableEventDataQueryFilter

use of datawave.query.predicate.ChainableEventDataQueryFilter in project datawave by NationalSecurityAgency.

the class TLDQueryIterator method getFIEvaluationFilter.

/**
 * Distinct from getEvaluation filter as the FI filter is used to prevent FI hits on nonEventFields that are not indexOnly fields
 *
 * @return
 */
protected EventDataQueryFilter getFIEvaluationFilter() {
    ChainableEventDataQueryFilter chainableEventDataQueryFilter = new ChainableEventDataQueryFilter();
    // primary filter on the current filter
    chainableEventDataQueryFilter.addFilter(getEvaluationFilter());
    // prevent anything that is not an index only field from being kept at the tld level, otherwise allow all
    EventDataQueryFilter tldFiFilter = new EventDataQueryFilter() {

        @Override
        public void startNewDocument(Key documentKey) {
        // no-op
        }

        @Override
        public boolean apply(@Nullable Map.Entry<Key, String> var1) {
            return true;
        }

        @Override
        public boolean peek(@Nullable Map.Entry<Key, String> var1) {
            return true;
        }

        /**
         * Keep any FI that is index only and part of the TLD or is not part of the TLD
         *
         * @param k
         * @return
         */
        @Override
        public boolean keep(Key k) {
            boolean root = TLDEventDataFilter.isRootPointer(k);
            DatawaveKey datawaveKey = new DatawaveKey(k);
            return (root && getIndexOnlyFields().contains(datawaveKey.getFieldName())) || !root;
        }

        @Override
        public Key getStartKey(Key from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Key getStopKey(Key from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Range getKeyRange(Map.Entry<Key, Document> from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public EventDataQueryFilter clone() {
            return this;
        }

        @Override
        public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
            throw new UnsupportedOperationException();
        }

        @Override
        public int getMaxNextCount() {
            return -1;
        }

        @Override
        public Key transform(Key toTransform) {
            return null;
        }
    };
    chainableEventDataQueryFilter.addFilter(tldFiFilter);
    return chainableEventDataQueryFilter;
}
Also used : ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) Nullable(javax.annotation.Nullable)

Example 2 with ChainableEventDataQueryFilter

use of datawave.query.predicate.ChainableEventDataQueryFilter in project datawave by NationalSecurityAgency.

the class IteratorBuildingVisitor method createWrappedTermFrequencyFilter.

protected ChainableEventDataQueryFilter createWrappedTermFrequencyFilter(String identifier, JexlNode node, EventDataQueryFilter existing) {
    // combine index only and term frequency to create non-event fields
    final Set<String> nonEventFields = new HashSet<>(indexOnlyFields.size() + termFrequencyFields.size());
    nonEventFields.addAll(indexOnlyFields);
    nonEventFields.addAll(termFrequencyFields);
    EventDataQueryFilter expressionFilter = new EventDataQueryExpressionFilter(node, typeMetadata, nonEventFields) {

        @Override
        public boolean keep(Key key) {
            // for things that will otherwise be added need to ensure its actually a value match. This is necessary when dealing with TF ranges.
            return peek(key);
        }
    };
    ChainableEventDataQueryFilter chainableFilter = new ChainableEventDataQueryFilter();
    if (existing != null) {
        chainableFilter.addFilter(existing);
    }
    chainableFilter.addFilter(expressionFilter);
    return chainableFilter;
}
Also used : EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) EventDataQueryExpressionFilter(datawave.query.predicate.EventDataQueryExpressionFilter) ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) HashSet(java.util.HashSet)

Example 3 with ChainableEventDataQueryFilter

use of datawave.query.predicate.ChainableEventDataQueryFilter in project datawave by NationalSecurityAgency.

the class IteratorBuildingVisitor method buildExceededFromTermFrequency.

/**
 * @param data
 */
private NestedIterator<Key> buildExceededFromTermFrequency(String identifier, JexlNode rootNode, JexlNode sourceNode, LiteralRange<?> range, Object data) {
    if (limitLookup) {
        ChainableEventDataQueryFilter wrapped = createWrappedTermFrequencyFilter(identifier, sourceNode, attrFilter);
        NestedIterator<Key> eventFieldIterator = new EventFieldIterator(rangeLimiter, source.deepCopy(env), identifier, new AttributeFactory(this.typeMetadata), getEventFieldAggregator(identifier, wrapped));
        TermFrequencyIndexBuilder builder = new TermFrequencyIndexBuilder();
        builder.setSource(source.deepCopy(env));
        builder.setTypeMetadata(typeMetadata);
        builder.setFieldsToAggregate(fieldsToAggregate);
        builder.setTimeFilter(timeFilter);
        builder.setAttrFilter(attrFilter);
        builder.setDatatypeFilter(datatypeFilter);
        builder.setEnv(env);
        builder.setTermFrequencyAggregator(getTermFrequencyAggregator(identifier, sourceNode, attrFilter, attrFilter != null ? attrFilter.getMaxNextCount() : -1));
        builder.setNode(rootNode);
        Range fiRange = getFiRangeForTF(range);
        builder.setRange(fiRange);
        builder.setField(identifier);
        NestedIterator<Key> tfIterator = builder.build();
        OrIterator tfMerge = new OrIterator(Arrays.asList(tfIterator, eventFieldIterator));
        return tfMerge;
    } else {
        QueryException qe = new QueryException(DatawaveErrorCode.UNEXPECTED_SOURCE_NODE, MessageFormat.format("{0}", "buildExceededFromTermFrequency"));
        throw new DatawaveFatalQueryException(qe);
    }
}
Also used : ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) QueryException(datawave.webservice.query.exception.QueryException) EventFieldIterator(datawave.query.iterator.EventFieldIterator) TermFrequencyIndexBuilder(datawave.query.iterator.builder.TermFrequencyIndexBuilder) OrIterator(datawave.query.iterator.logic.OrIterator) DatawaveFatalQueryException(datawave.query.exceptions.DatawaveFatalQueryException) AttributeFactory(datawave.query.attributes.AttributeFactory) LiteralRange(datawave.query.jexl.LiteralRange) Range(org.apache.accumulo.core.data.Range) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 4 with ChainableEventDataQueryFilter

use of datawave.query.predicate.ChainableEventDataQueryFilter in project datawave by NationalSecurityAgency.

the class TLDIndexBuildingVisitor method buildTermFrequencyAggregator.

/**
 * Use fieldsToAggregate instead of indexOnlyFields because this enables TLDs to return non-event tokens as part of the user document
 *
 * @param filter
 * @param maxNextCount
 * @return
 */
@Override
protected TermFrequencyAggregator buildTermFrequencyAggregator(String identifier, ChainableEventDataQueryFilter filter, int maxNextCount) {
    EventDataQueryFilter rootFilter = new EventDataQueryFilter() {

        @Override
        public void startNewDocument(Key documentKey) {
        // no-op
        }

        @Override
        public boolean apply(@Nullable Entry<Key, String> var1) {
            // accept all
            return true;
        }

        @Override
        public boolean peek(@Nullable Entry<Key, String> var1) {
            // accept all
            return true;
        }

        /**
         * Only keep the tf key if it isn't the root pointer or if it is index only and contributes to document evaluation
         *
         * @param k
         * @return
         */
        @Override
        public boolean keep(Key k) {
            DatawaveKey key = new DatawaveKey(k);
            return (!TLDEventDataFilter.isRootPointer(k) || indexOnlyFields.contains(key.getFieldName())) && attrFilter.peek(new AbstractMap.SimpleEntry(k, null));
        }

        @Override
        public Key getStartKey(Key from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Key getStopKey(Key from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public Range getKeyRange(Entry<Key, Document> from) {
            throw new UnsupportedOperationException();
        }

        @Override
        public EventDataQueryFilter clone() {
            return this;
        }

        @Override
        public Range getSeekRange(Key current, Key endKey, boolean endKeyInclusive) {
            throw new UnsupportedOperationException();
        }

        @Override
        public int getMaxNextCount() {
            return -1;
        }

        @Override
        public Key transform(Key toTransform) {
            throw new UnsupportedOperationException();
        }
    };
    filter.addFilter(rootFilter);
    Set<String> toAggregate = fieldsToAggregate.contains(identifier) ? Collections.singleton(identifier) : Collections.emptySet();
    return new TLDTermFrequencyAggregator(toAggregate, filter, filter.getMaxNextCount());
}
Also used : EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) Entry(java.util.Map.Entry) DatawaveKey(datawave.query.data.parsers.DatawaveKey) DatawaveKey(datawave.query.data.parsers.DatawaveKey) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) Nullable(javax.annotation.Nullable)

Aggregations

ChainableEventDataQueryFilter (datawave.query.predicate.ChainableEventDataQueryFilter)4 Key (org.apache.accumulo.core.data.Key)4 EventDataQueryFilter (datawave.query.predicate.EventDataQueryFilter)3 PartialKey (org.apache.accumulo.core.data.PartialKey)3 DatawaveKey (datawave.query.data.parsers.DatawaveKey)2 Nullable (javax.annotation.Nullable)2 AttributeFactory (datawave.query.attributes.AttributeFactory)1 DatawaveFatalQueryException (datawave.query.exceptions.DatawaveFatalQueryException)1 EventFieldIterator (datawave.query.iterator.EventFieldIterator)1 TermFrequencyIndexBuilder (datawave.query.iterator.builder.TermFrequencyIndexBuilder)1 OrIterator (datawave.query.iterator.logic.OrIterator)1 LiteralRange (datawave.query.jexl.LiteralRange)1 EventDataQueryExpressionFilter (datawave.query.predicate.EventDataQueryExpressionFilter)1 QueryException (datawave.webservice.query.exception.QueryException)1 HashSet (java.util.HashSet)1 Entry (java.util.Map.Entry)1 Range (org.apache.accumulo.core.data.Range)1