Search in sources :

Example 1 with TLDTermFrequencyAggregator

use of datawave.query.tld.TLDTermFrequencyAggregator in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testScanMinorRangeTLD.

@Test
public void testScanMinorRangeTLD() throws Exception {
    Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "baz"), true, getFiKey("row", "type1", "123.345.456", "FOO", "baz"), true);
    TermFrequencyAggregator aggregator = new TLDTermFrequencyAggregator(fieldsToKeep, filter, -1);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertTrue(iterator.hasTop());
    Document d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Assert.assertTrue((d.getDictionary().get("FOO").getData()).equals("baz"));
}
Also used : TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) Range(org.apache.accumulo.core.data.Range) Document(datawave.query.attributes.Document) TermFrequencyAggregator(datawave.query.jexl.functions.TermFrequencyAggregator) TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) Test(org.junit.Test)

Example 2 with TLDTermFrequencyAggregator

use of datawave.query.tld.TLDTermFrequencyAggregator in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testScanFullRangeTLD.

@Test
public void testScanFullRangeTLD() throws IOException {
    Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "alf"), true, getFiKey("row", "type1", "123.345.456.2", "FOO", "buz"), true);
    aggregator = new TLDTermFrequencyAggregator(fieldsToKeep, filter, -1);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertTrue(iterator.hasTop());
    Document d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Assert.assertTrue(((Set) d.getDictionary().get("FOO").getData()).size() == 6);
    Iterator<PreNormalizedAttribute> i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
    Assert.assertTrue(i.next().getValue().equals("bar"));
    Assert.assertTrue(i.next().getValue().equals("baz"));
    Assert.assertTrue(i.next().getValue().equals("buf"));
    Assert.assertTrue(i.next().getValue().equals("buz"));
    Assert.assertTrue(i.next().getValue().equals("alf"));
    Assert.assertTrue(i.next().getValue().equals("arm"));
    iterator.next();
    Assert.assertFalse(iterator.hasTop());
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) PreNormalizedAttribute(datawave.query.attributes.PreNormalizedAttribute) Range(org.apache.accumulo.core.data.Range) Document(datawave.query.attributes.Document) Test(org.junit.Test)

Example 3 with TLDTermFrequencyAggregator

use of datawave.query.tld.TLDTermFrequencyAggregator in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testScanFullRangeExclusiveTLD.

@Test
public void testScanFullRangeExclusiveTLD() throws IOException, ParseException {
    Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "alf"), false, getFiKey("row", "type1", "123.345.456.2", "FOO", "buz"), false);
    filter = new TLDEventDataFilter(JexlASTHelper.parseJexlQuery("FOO=='bar' || FOO=='baz' || FOO=='buf' || FOO=='arm'"), typeMetadata, null, null, -1, -1, Collections.emptyMap(), null, fieldsToKeep);
    aggregator = new TLDTermFrequencyAggregator(fieldsToKeep, filter, -1);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertTrue(iterator.hasTop());
    Document d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Iterator<PreNormalizedAttribute> i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
    Assert.assertTrue(i.next().getValue().equals("bar"));
    Assert.assertTrue(i.next().getValue().equals("baz"));
    Assert.assertTrue(i.next().getValue().equals("buf"));
    Assert.assertTrue(i.next().getValue().equals("arm"));
    iterator.next();
    Assert.assertFalse(iterator.hasTop());
}
Also used : TLDEventDataFilter(datawave.query.predicate.TLDEventDataFilter) HashSet(java.util.HashSet) Set(java.util.Set) TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) PreNormalizedAttribute(datawave.query.attributes.PreNormalizedAttribute) Range(org.apache.accumulo.core.data.Range) Document(datawave.query.attributes.Document) Test(org.junit.Test)

Example 4 with TLDTermFrequencyAggregator

use of datawave.query.tld.TLDTermFrequencyAggregator in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testScanFullRangeExclusiveEventDataQueryExpressionFilter.

@Test
public void testScanFullRangeExclusiveEventDataQueryExpressionFilter() throws IOException, ParseException {
    Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "alf"), false, getFiKey("row", "type1", "123.345.456.2", "FOO", "buz"), false);
    filter = new EventDataQueryExpressionFilter(JexlASTHelper.parseJexlQuery("FOO=='bar' || FOO=='baz' || FOO=='buf' || FOO=='arm'"), typeMetadata, fieldsToKeep);
    aggregator = new TLDTermFrequencyAggregator(fieldsToKeep, filter, -1);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertTrue(iterator.hasTop());
    Document d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Iterator<PreNormalizedAttribute> i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
    Assert.assertTrue(i.next().getValue().equals("bar"));
    Assert.assertTrue(i.next().getValue().equals("baz"));
    Assert.assertTrue(i.next().getValue().equals("buf"));
    Assert.assertTrue(i.next().getValue().equals("buz"));
    Assert.assertTrue(i.next().getValue().equals("alf"));
    Assert.assertTrue(i.next().getValue().equals("arm"));
    iterator.next();
    Assert.assertFalse(iterator.hasTop());
}
Also used : EventDataQueryExpressionFilter(datawave.query.predicate.EventDataQueryExpressionFilter) HashSet(java.util.HashSet) Set(java.util.Set) TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) PreNormalizedAttribute(datawave.query.attributes.PreNormalizedAttribute) Range(org.apache.accumulo.core.data.Range) Document(datawave.query.attributes.Document) Test(org.junit.Test)

Example 5 with TLDTermFrequencyAggregator

use of datawave.query.tld.TLDTermFrequencyAggregator in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testScanPartialRangesTLD.

@Test
public void testScanPartialRangesTLD() throws Exception {
    Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "alf"), false, getFiKey("row", "type1", "123.345.456.2", "FOO", "bar"), false);
    aggregator = new TLDTermFrequencyAggregator(fieldsToKeep, filter, -1);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertTrue(iterator.hasTop());
    Document d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() + "", d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Assert.assertTrue((d.getDictionary().get("FOO").getData()).equals("arm"));
}
Also used : TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) Range(org.apache.accumulo.core.data.Range) Document(datawave.query.attributes.Document) Test(org.junit.Test)

Aggregations

Document (datawave.query.attributes.Document)5 TLDTermFrequencyAggregator (datawave.query.tld.TLDTermFrequencyAggregator)5 Range (org.apache.accumulo.core.data.Range)5 Test (org.junit.Test)5 PreNormalizedAttribute (datawave.query.attributes.PreNormalizedAttribute)3 HashSet (java.util.HashSet)3 Set (java.util.Set)3 TermFrequencyAggregator (datawave.query.jexl.functions.TermFrequencyAggregator)1 EventDataQueryExpressionFilter (datawave.query.predicate.EventDataQueryExpressionFilter)1 TLDEventDataFilter (datawave.query.predicate.TLDEventDataFilter)1