use of datawave.query.jexl.functions.TermFrequencyAggregator in project datawave by NationalSecurityAgency.
the class TermFrequencyIndexIteratorTest method testEndingFieldMismatch.
@Test
public void testEndingFieldMismatch() throws IOException, ParseException {
Range r = new Range(getFiKey("row", "type1", "123.345.456.3", "FOO", "alf"), true, getFiKey("row", "type1", "123.345.456.3", Constants.MAX_UNICODE_STRING, "buz"), false);
filter = new EventDataQueryExpressionFilter(JexlASTHelper.parseJexlQuery("FOO=='bar' || FOO=='baz' || FOO=='buf' || FOO=='arm'"), typeMetadata, fieldsToKeep);
aggregator = new TermFrequencyAggregator(fieldsToKeep, filter);
TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
// jump to the first doc
iterator.seek(null, null, true);
Assert.assertFalse(iterator.hasTop());
}
use of datawave.query.jexl.functions.TermFrequencyAggregator in project datawave by NationalSecurityAgency.
the class TermFrequencyIndexIteratorTest method testScanFullRangeExclusive.
@Test
public void testScanFullRangeExclusive() throws IOException, ParseException {
Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "alf"), false, getFiKey("row", "type1", "123.345.456.2", "FOO", "buz"), false);
filter = new EventDataQueryExpressionFilter(JexlASTHelper.parseJexlQuery("FOO=='bar' || FOO=='baz' || FOO=='buf' || FOO=='arm'"), typeMetadata, fieldsToKeep);
aggregator = new TermFrequencyAggregator(fieldsToKeep, filter);
TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
// jump to the first doc
iterator.seek(null, null, true);
Assert.assertTrue(iterator.hasTop());
Document d = iterator.document();
Assert.assertTrue(d != null);
Assert.assertTrue(d.getDictionary().size() == 2);
Assert.assertTrue(d.getDictionary().get("FOO") != null);
Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
Iterator<PreNormalizedAttribute> i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
Assert.assertTrue(i.next().getValue().equals("bar"));
Assert.assertTrue(i.next().getValue().equals("baz"));
iterator.next();
Assert.assertTrue(iterator.hasTop());
d = iterator.document();
Assert.assertTrue(d != null);
Assert.assertTrue(d.getDictionary().size() == 2);
Assert.assertTrue(d.getDictionary().get("FOO") != null);
Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
Assert.assertTrue(i.next().getValue().equals("buf"));
Assert.assertTrue(i.next().getValue().equals("buz"));
iterator.next();
Assert.assertTrue(iterator.hasTop());
d = iterator.document();
Assert.assertTrue(d != null);
Assert.assertTrue(d.getDictionary().size() == 2);
Assert.assertTrue(d.getDictionary().get("FOO") != null);
Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
Assert.assertTrue(d.getDictionary().get("FOO").getData().equals("arm"));
iterator.next();
Assert.assertFalse(iterator.hasTop());
}
use of datawave.query.jexl.functions.TermFrequencyAggregator in project datawave by NationalSecurityAgency.
the class TermFrequencyIndexIteratorTest method testScanMinorRangeTLD.
@Test
public void testScanMinorRangeTLD() throws Exception {
Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "baz"), true, getFiKey("row", "type1", "123.345.456", "FOO", "baz"), true);
TermFrequencyAggregator aggregator = new TLDTermFrequencyAggregator(fieldsToKeep, filter, -1);
TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
// jump to the first doc
iterator.seek(null, null, true);
Assert.assertTrue(iterator.hasTop());
Document d = iterator.document();
Assert.assertTrue(d != null);
Assert.assertTrue(d.getDictionary().size() == 2);
Assert.assertTrue(d.getDictionary().get("FOO") != null);
Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
Assert.assertTrue((d.getDictionary().get("FOO").getData()).equals("baz"));
}
use of datawave.query.jexl.functions.TermFrequencyAggregator in project datawave by NationalSecurityAgency.
the class TermFrequencyIndexIteratorTest method testEmptyRange.
@Test
public void testEmptyRange() throws Exception {
Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "biz"), true, getFiKey("row", "type1", "123.345.456", "FOO", "bzz"), true);
TermFrequencyAggregator aggregator = new TermFrequencyAggregator(null, null);
TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
// jump to the first doc
iterator.seek(null, null, true);
Assert.assertFalse(iterator.hasTop());
}
use of datawave.query.jexl.functions.TermFrequencyAggregator in project datawave by NationalSecurityAgency.
the class TermFrequencyIndexIteratorTest method setup.
@Before
public void setup() throws ParseException {
List<Map.Entry<Key, Value>> baseSource = new ArrayList<>();
baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456", "FOO", "bar"), new Value()));
baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456", "FOO", "baz"), new Value()));
baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.1", "FOO", "buf"), new Value()));
baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.1", "FOO", "buz"), new Value()));
baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.2", "FOO", "alf"), new Value()));
baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.2", "FOO", "arm"), new Value()));
baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.2", "FOOT", "armfoot"), new Value()));
baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.3", "AFOO", "alfa"), new Value()));
baseSource.add(new AbstractMap.SimpleEntry(getTfKey("row", "type1", "123.345.456.3", "ZFOO", "alfz"), new Value()));
source = new SortedListKeyValueIterator(baseSource);
String lcNoDiacritics = LcNoDiacriticsType.class.getName();
typeMetadata = new TypeMetadata();
typeMetadata.put("FOO", "type1", lcNoDiacritics);
fieldsToKeep = new HashSet<>();
fieldsToKeep.add("FOO");
filter = new EventDataQueryExpressionFilter(JexlASTHelper.parseJexlQuery("FOO=='bar' || FOO=='baz' || FOO=='buf' || FOO=='buz' || FOO=='alf' || FOO=='arm'"), typeMetadata, fieldsToKeep);
aggregator = new TermFrequencyAggregator(fieldsToKeep, filter);
}
Aggregations