Search in sources :

Example 1 with EventDataQueryExpressionFilter

use of datawave.query.predicate.EventDataQueryExpressionFilter in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testEndingFieldMismatch.

@Test
public void testEndingFieldMismatch() throws IOException, ParseException {
    Range r = new Range(getFiKey("row", "type1", "123.345.456.3", "FOO", "alf"), true, getFiKey("row", "type1", "123.345.456.3", Constants.MAX_UNICODE_STRING, "buz"), false);
    filter = new EventDataQueryExpressionFilter(JexlASTHelper.parseJexlQuery("FOO=='bar' || FOO=='baz' || FOO=='buf' || FOO=='arm'"), typeMetadata, fieldsToKeep);
    aggregator = new TermFrequencyAggregator(fieldsToKeep, filter);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertFalse(iterator.hasTop());
}
Also used : EventDataQueryExpressionFilter(datawave.query.predicate.EventDataQueryExpressionFilter) Range(org.apache.accumulo.core.data.Range) TermFrequencyAggregator(datawave.query.jexl.functions.TermFrequencyAggregator) TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) Test(org.junit.Test)

Example 2 with EventDataQueryExpressionFilter

use of datawave.query.predicate.EventDataQueryExpressionFilter in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testScanFullRangeExclusive.

@Test
public void testScanFullRangeExclusive() throws IOException, ParseException {
    Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "alf"), false, getFiKey("row", "type1", "123.345.456.2", "FOO", "buz"), false);
    filter = new EventDataQueryExpressionFilter(JexlASTHelper.parseJexlQuery("FOO=='bar' || FOO=='baz' || FOO=='buf' || FOO=='arm'"), typeMetadata, fieldsToKeep);
    aggregator = new TermFrequencyAggregator(fieldsToKeep, filter);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertTrue(iterator.hasTop());
    Document d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Iterator<PreNormalizedAttribute> i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
    Assert.assertTrue(i.next().getValue().equals("bar"));
    Assert.assertTrue(i.next().getValue().equals("baz"));
    iterator.next();
    Assert.assertTrue(iterator.hasTop());
    d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
    Assert.assertTrue(i.next().getValue().equals("buf"));
    Assert.assertTrue(i.next().getValue().equals("buz"));
    iterator.next();
    Assert.assertTrue(iterator.hasTop());
    d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData().equals("arm"));
    iterator.next();
    Assert.assertFalse(iterator.hasTop());
}
Also used : EventDataQueryExpressionFilter(datawave.query.predicate.EventDataQueryExpressionFilter) HashSet(java.util.HashSet) Set(java.util.Set) PreNormalizedAttribute(datawave.query.attributes.PreNormalizedAttribute) Range(org.apache.accumulo.core.data.Range) Document(datawave.query.attributes.Document) TermFrequencyAggregator(datawave.query.jexl.functions.TermFrequencyAggregator) TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) Test(org.junit.Test)

Example 3 with EventDataQueryExpressionFilter

use of datawave.query.predicate.EventDataQueryExpressionFilter in project datawave by NationalSecurityAgency.

the class IteratorBuildingVisitor method createWrappedTermFrequencyFilter.

protected ChainableEventDataQueryFilter createWrappedTermFrequencyFilter(String identifier, JexlNode node, EventDataQueryFilter existing) {
    // combine index only and term frequency to create non-event fields
    final Set<String> nonEventFields = new HashSet<>(indexOnlyFields.size() + termFrequencyFields.size());
    nonEventFields.addAll(indexOnlyFields);
    nonEventFields.addAll(termFrequencyFields);
    EventDataQueryFilter expressionFilter = new EventDataQueryExpressionFilter(node, typeMetadata, nonEventFields) {

        @Override
        public boolean keep(Key key) {
            // for things that will otherwise be added need to ensure its actually a value match. This is necessary when dealing with TF ranges.
            return peek(key);
        }
    };
    ChainableEventDataQueryFilter chainableFilter = new ChainableEventDataQueryFilter();
    if (existing != null) {
        chainableFilter.addFilter(existing);
    }
    chainableFilter.addFilter(expressionFilter);
    return chainableFilter;
}
Also used : EventDataQueryFilter(datawave.query.predicate.EventDataQueryFilter) ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) EventDataQueryExpressionFilter(datawave.query.predicate.EventDataQueryExpressionFilter) ChainableEventDataQueryFilter(datawave.query.predicate.ChainableEventDataQueryFilter) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey) HashSet(java.util.HashSet)

Example 4 with EventDataQueryExpressionFilter

use of datawave.query.predicate.EventDataQueryExpressionFilter in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method setup.

@Before
public void setup() throws ParseException {
    List<Map.Entry<Key, Value>> baseSource = new ArrayList<>();
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456", "FOO", "bar"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456", "FOO", "baz"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.1", "FOO", "buf"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.1", "FOO", "buz"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.2", "FOO", "alf"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.2", "FOO", "arm"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.2", "FOOT", "armfoot"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.3", "AFOO", "alfa"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.3", "ZFOO", "alfz"), new Value()));
    source = new SortedListKeyValueIterator(baseSource);
    String lcNoDiacritics = LcNoDiacriticsType.class.getName();
    typeMetadata = new TypeMetadata();
    typeMetadata.put("FOO", "type1", lcNoDiacritics);
    fieldsToKeep = new HashSet<>();
    fieldsToKeep.add("FOO");
    filter = new EventDataQueryExpressionFilter(JexlASTHelper.parseJexlQuery("FOO=='bar' || FOO=='baz' || FOO=='buf' || FOO=='buz' || FOO=='alf' || FOO=='arm'"), typeMetadata, fieldsToKeep);
    aggregator = new TermFrequencyAggregator(fieldsToKeep, filter);
}
Also used : AbstractMap(java.util.AbstractMap) SortedListKeyValueIterator(datawave.query.iterator.SortedListKeyValueIterator) TypeMetadata(datawave.query.util.TypeMetadata) EventDataQueryExpressionFilter(datawave.query.predicate.EventDataQueryExpressionFilter) ArrayList(java.util.ArrayList) Value(org.apache.accumulo.core.data.Value) TermFrequencyAggregator(datawave.query.jexl.functions.TermFrequencyAggregator) TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) Before(org.junit.Before)

Example 5 with EventDataQueryExpressionFilter

use of datawave.query.predicate.EventDataQueryExpressionFilter in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testScanFullRangeExclusiveEventDataQueryExpressionFilter.

@Test
public void testScanFullRangeExclusiveEventDataQueryExpressionFilter() throws IOException, ParseException {
    Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "alf"), false, getFiKey("row", "type1", "123.345.456.2", "FOO", "buz"), false);
    filter = new EventDataQueryExpressionFilter(JexlASTHelper.parseJexlQuery("FOO=='bar' || FOO=='baz' || FOO=='buf' || FOO=='arm'"), typeMetadata, fieldsToKeep);
    aggregator = new TLDTermFrequencyAggregator(fieldsToKeep, filter, -1);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertTrue(iterator.hasTop());
    Document d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Iterator<PreNormalizedAttribute> i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
    Assert.assertTrue(i.next().getValue().equals("bar"));
    Assert.assertTrue(i.next().getValue().equals("baz"));
    Assert.assertTrue(i.next().getValue().equals("buf"));
    Assert.assertTrue(i.next().getValue().equals("buz"));
    Assert.assertTrue(i.next().getValue().equals("alf"));
    Assert.assertTrue(i.next().getValue().equals("arm"));
    iterator.next();
    Assert.assertFalse(iterator.hasTop());
}
Also used : EventDataQueryExpressionFilter(datawave.query.predicate.EventDataQueryExpressionFilter) HashSet(java.util.HashSet) Set(java.util.Set) TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) PreNormalizedAttribute(datawave.query.attributes.PreNormalizedAttribute) Range(org.apache.accumulo.core.data.Range) Document(datawave.query.attributes.Document) Test(org.junit.Test)

Aggregations

EventDataQueryExpressionFilter (datawave.query.predicate.EventDataQueryExpressionFilter)5 TLDTermFrequencyAggregator (datawave.query.tld.TLDTermFrequencyAggregator)4 TermFrequencyAggregator (datawave.query.jexl.functions.TermFrequencyAggregator)3 HashSet (java.util.HashSet)3 Range (org.apache.accumulo.core.data.Range)3 Test (org.junit.Test)3 Document (datawave.query.attributes.Document)2 PreNormalizedAttribute (datawave.query.attributes.PreNormalizedAttribute)2 Set (java.util.Set)2 SortedListKeyValueIterator (datawave.query.iterator.SortedListKeyValueIterator)1 ChainableEventDataQueryFilter (datawave.query.predicate.ChainableEventDataQueryFilter)1 EventDataQueryFilter (datawave.query.predicate.EventDataQueryFilter)1 TypeMetadata (datawave.query.util.TypeMetadata)1 AbstractMap (java.util.AbstractMap)1 ArrayList (java.util.ArrayList)1 Key (org.apache.accumulo.core.data.Key)1 PartialKey (org.apache.accumulo.core.data.PartialKey)1 Value (org.apache.accumulo.core.data.Value)1 Before (org.junit.Before)1