Search in sources :

Example 1 with TermFrequencyAggregator

use of datawave.query.jexl.functions.TermFrequencyAggregator in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testEndingFieldMismatch.

@Test
public void testEndingFieldMismatch() throws IOException, ParseException {
    Range r = new Range(getFiKey("row", "type1", "123.345.456.3", "FOO", "alf"), true, getFiKey("row", "type1", "123.345.456.3", Constants.MAX_UNICODE_STRING, "buz"), false);
    filter = new EventDataQueryExpressionFilter(JexlASTHelper.parseJexlQuery("FOO=='bar' || FOO=='baz' || FOO=='buf' || FOO=='arm'"), typeMetadata, fieldsToKeep);
    aggregator = new TermFrequencyAggregator(fieldsToKeep, filter);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertFalse(iterator.hasTop());
}
Also used : EventDataQueryExpressionFilter(datawave.query.predicate.EventDataQueryExpressionFilter) Range(org.apache.accumulo.core.data.Range) TermFrequencyAggregator(datawave.query.jexl.functions.TermFrequencyAggregator) TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) Test(org.junit.Test)

Example 2 with TermFrequencyAggregator

use of datawave.query.jexl.functions.TermFrequencyAggregator in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testScanFullRangeExclusive.

@Test
public void testScanFullRangeExclusive() throws IOException, ParseException {
    Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "alf"), false, getFiKey("row", "type1", "123.345.456.2", "FOO", "buz"), false);
    filter = new EventDataQueryExpressionFilter(JexlASTHelper.parseJexlQuery("FOO=='bar' || FOO=='baz' || FOO=='buf' || FOO=='arm'"), typeMetadata, fieldsToKeep);
    aggregator = new TermFrequencyAggregator(fieldsToKeep, filter);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertTrue(iterator.hasTop());
    Document d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Iterator<PreNormalizedAttribute> i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
    Assert.assertTrue(i.next().getValue().equals("bar"));
    Assert.assertTrue(i.next().getValue().equals("baz"));
    iterator.next();
    Assert.assertTrue(iterator.hasTop());
    d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
    Assert.assertTrue(i.next().getValue().equals("buf"));
    Assert.assertTrue(i.next().getValue().equals("buz"));
    iterator.next();
    Assert.assertTrue(iterator.hasTop());
    d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData().equals("arm"));
    iterator.next();
    Assert.assertFalse(iterator.hasTop());
}
Also used : EventDataQueryExpressionFilter(datawave.query.predicate.EventDataQueryExpressionFilter) HashSet(java.util.HashSet) Set(java.util.Set) PreNormalizedAttribute(datawave.query.attributes.PreNormalizedAttribute) Range(org.apache.accumulo.core.data.Range) Document(datawave.query.attributes.Document) TermFrequencyAggregator(datawave.query.jexl.functions.TermFrequencyAggregator) TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) Test(org.junit.Test)

Example 3 with TermFrequencyAggregator

use of datawave.query.jexl.functions.TermFrequencyAggregator in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testScanMinorRangeTLD.

@Test
public void testScanMinorRangeTLD() throws Exception {
    Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "baz"), true, getFiKey("row", "type1", "123.345.456", "FOO", "baz"), true);
    TermFrequencyAggregator aggregator = new TLDTermFrequencyAggregator(fieldsToKeep, filter, -1);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertTrue(iterator.hasTop());
    Document d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Assert.assertTrue((d.getDictionary().get("FOO").getData()).equals("baz"));
}
Also used : TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) Range(org.apache.accumulo.core.data.Range) Document(datawave.query.attributes.Document) TermFrequencyAggregator(datawave.query.jexl.functions.TermFrequencyAggregator) TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) Test(org.junit.Test)

Example 4 with TermFrequencyAggregator

use of datawave.query.jexl.functions.TermFrequencyAggregator in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testEmptyRange.

@Test
public void testEmptyRange() throws Exception {
    Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "biz"), true, getFiKey("row", "type1", "123.345.456", "FOO", "bzz"), true);
    TermFrequencyAggregator aggregator = new TermFrequencyAggregator(null, null);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertFalse(iterator.hasTop());
}
Also used : Range(org.apache.accumulo.core.data.Range) TermFrequencyAggregator(datawave.query.jexl.functions.TermFrequencyAggregator) TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) Test(org.junit.Test)

Example 5 with TermFrequencyAggregator

use of datawave.query.jexl.functions.TermFrequencyAggregator in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method setup.

@Before
public void setup() throws ParseException {
    List<Map.Entry<Key, Value>> baseSource = new ArrayList<>();
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456", "FOO", "bar"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456", "FOO", "baz"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.1", "FOO", "buf"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.1", "FOO", "buz"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.2", "FOO", "alf"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.2", "FOO", "arm"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.2", "FOOT", "armfoot"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.3", "AFOO", "alfa"), new Value()));
    baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.3", "ZFOO", "alfz"), new Value()));
    source = new SortedListKeyValueIterator(baseSource);
    String lcNoDiacritics = LcNoDiacriticsType.class.getName();
    typeMetadata = new TypeMetadata();
    typeMetadata.put("FOO", "type1", lcNoDiacritics);
    fieldsToKeep = new HashSet<>();
    fieldsToKeep.add("FOO");
    filter = new EventDataQueryExpressionFilter(JexlASTHelper.parseJexlQuery("FOO=='bar' || FOO=='baz' || FOO=='buf' || FOO=='buz' || FOO=='alf' || FOO=='arm'"), typeMetadata, fieldsToKeep);
    aggregator = new TermFrequencyAggregator(fieldsToKeep, filter);
}
Also used : AbstractMap(java.util.AbstractMap) SortedListKeyValueIterator(datawave.query.iterator.SortedListKeyValueIterator) TypeMetadata(datawave.query.util.TypeMetadata) EventDataQueryExpressionFilter(datawave.query.predicate.EventDataQueryExpressionFilter) ArrayList(java.util.ArrayList) Value(org.apache.accumulo.core.data.Value) TermFrequencyAggregator(datawave.query.jexl.functions.TermFrequencyAggregator) TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) Before(org.junit.Before)

Aggregations

TermFrequencyAggregator (datawave.query.jexl.functions.TermFrequencyAggregator)6 TLDTermFrequencyAggregator (datawave.query.tld.TLDTermFrequencyAggregator)6 Range (org.apache.accumulo.core.data.Range)5 Test (org.junit.Test)5 Document (datawave.query.attributes.Document)3 EventDataQueryExpressionFilter (datawave.query.predicate.EventDataQueryExpressionFilter)3 PreNormalizedAttribute (datawave.query.attributes.PreNormalizedAttribute)1 SortedListKeyValueIterator (datawave.query.iterator.SortedListKeyValueIterator)1 TypeMetadata (datawave.query.util.TypeMetadata)1 AbstractMap (java.util.AbstractMap)1 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1 Value (org.apache.accumulo.core.data.Value)1 Before (org.junit.Before)1