use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class TermFrequencyIndexIteratorTest method testScanPartialRanges.
@Test
public void testScanPartialRanges() throws Exception {
Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "alf"), false, getFiKey("row", "type1", "123.345.456.2", "FOO", "bar"), false);
TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
// jump to the first doc
iterator.seek(null, null, true);
Assert.assertTrue(iterator.hasTop());
Document d = iterator.document();
Assert.assertTrue(d != null);
Assert.assertTrue(d.getDictionary().size() + "", d.getDictionary().size() == 2);
Assert.assertTrue(d.getDictionary().get("FOO") != null);
Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
Assert.assertTrue((d.getDictionary().get("FOO").getData()).equals("arm"));
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class QueryIteratorIT method eval.
/**
* Deserialize and evaluate the document, expects 0 to 1 documents
*
* @param docKeyHit
* the expected hit Key, null if no hit expected
* @param docKeys
* the expected values
* @throws IOException
*/
protected void eval(Key docKeyHit, Map<String, List<String>> docKeys) throws IOException {
// asserts for a hit or miss
if (docKeyHit == null) {
assertFalse(iterator.hasTop());
} else {
assertTrue("Expected hit, but got none", iterator.hasTop());
Key next = iterator.getTopKey();
assertNotNull(next);
assertEquals(next.getRow().toString(), docKeyHit.getRow().toString());
assertEquals(next.getColumnFamily().toString(), docKeyHit.getColumnFamily().toString());
// asserts for document build
Value topValue = iterator.getTopValue();
assertNotNull(topValue);
Map.Entry<Key, Document> deserializedValue = deserialize(topValue);
assertNotNull(deserializedValue.getValue());
Document d = deserializedValue.getValue();
assertNotNull(d);
// -1 is for RECORD_ID field and -1 for HIT_LIST if configured
int baseSize = d.getDictionary().size() - 1;
int docSize = isExpectHitTerm() ? baseSize - 1 : baseSize;
assertEquals("Unexpected doc size: " + d.getDictionary().size() + "\nGot: " + docSize + "\n" + "expected: " + docKeys, docKeys.keySet().size(), docSize);
// validate the hitlist
assertEquals("HIT_TERM presence expected: " + isExpectHitTerm() + " actual: " + (d.getDictionary().get(JexlEvaluation.HIT_TERM_FIELD) != null), (d.getDictionary().get(JexlEvaluation.HIT_TERM_FIELD) != null), isExpectHitTerm());
// verify hits for each specified field
for (String field : docKeys.keySet()) {
List<String> expected = docKeys.get(field);
if (expected.size() == 1) {
// verify the only doc
Attribute<?> docAttr = d.getDictionary().get(field);
if (docAttr instanceof Attributes) {
// Special handling of Content attributes, typically when TermFrequencies are looked up.
// TFs append Content attributes which results in Attributes coming back instead of a single Attribute
Set<?> datas = (Set<?>) docAttr.getData();
Set<String> dataStrings = datas.stream().map(Object::toString).collect(Collectors.toSet());
boolean stringsMatch = dataStrings.contains(expected.get(0));
assertTrue(field + ": value: " + docAttr.getData() + " did not match expected value: " + expected.get(0), stringsMatch);
} else {
boolean stringsMatch = docAttr.getData().toString().equals(expected.get(0));
assertTrue(field + ": value: " + docAttr.getData() + " did not match expected value: " + expected.get(0), stringsMatch);
}
} else {
// the data should be a set, verify it matches expected
Object dictData = d.getDictionary().get(field).getData();
assertNotNull(dictData);
assertTrue("Expected " + expected.size() + " values for '" + field + "' found 1, '" + dictData.toString() + "'\nexpected: " + expected, dictData instanceof Set);
Set dictSet = (Set) dictData;
assertEquals("Expected " + expected.size() + " values for '" + field + "' found " + dictSet.size() + "\nfound: " + dictSet.toString() + "\nexpected: " + expected, dictSet.size(), expected.size());
Iterator<Attribute> dictIterator = dictSet.iterator();
while (dictIterator.hasNext()) {
String foundString = dictIterator.next().getData().toString();
assertTrue("could not find " + foundString + " in results! Still had " + expected, expected.remove(foundString));
}
// verify that the expected set is now empty
assertEquals(0, expected.size());
}
}
// there should be no other hits
iterator.next();
}
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class TermFrequencyIndexIteratorTest method testScanFullRange.
@Test
public void testScanFullRange() throws IOException {
Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "alf"), true, getFiKey("row", "type1", "123.345.456.2", "FOO", "buz"), true);
TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
// jump to the first doc
iterator.seek(null, null, true);
Assert.assertTrue(iterator.hasTop());
Document d = iterator.document();
Assert.assertTrue(d != null);
Assert.assertTrue(d.getDictionary().size() == 2);
Assert.assertTrue(d.getDictionary().get("FOO") != null);
Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
Assert.assertTrue(((Set) d.getDictionary().get("FOO").getData()).size() == 2);
Iterator<PreNormalizedAttribute> i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
Assert.assertTrue(i.next().getValue().equals("bar"));
Assert.assertTrue(i.next().getValue().equals("baz"));
iterator.next();
Assert.assertTrue(iterator.hasTop());
d = iterator.document();
Assert.assertTrue(d != null);
Assert.assertTrue(d.getDictionary().size() == 2);
Assert.assertTrue(d.getDictionary().get("FOO") != null);
Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
Assert.assertTrue(((Set) d.getDictionary().get("FOO").getData()).size() == 2);
i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
Assert.assertTrue(i.next().getValue().equals("buf"));
Assert.assertTrue(i.next().getValue().equals("buz"));
iterator.next();
Assert.assertTrue(iterator.hasTop());
d = iterator.document();
Assert.assertTrue(d != null);
Assert.assertTrue(d.getDictionary().size() == 2);
Assert.assertTrue(d.getDictionary().get("FOO") != null);
Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
Assert.assertTrue(((Set) d.getDictionary().get("FOO").getData()).size() == 2);
i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
Assert.assertTrue(i.next().getValue().equals("alf"));
Assert.assertTrue(i.next().getValue().equals("arm"));
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class TermFrequencyIndexIteratorTest method testScanFullRangeExclusive.
@Test
public void testScanFullRangeExclusive() throws IOException, ParseException {
Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "alf"), false, getFiKey("row", "type1", "123.345.456.2", "FOO", "buz"), false);
filter = new EventDataQueryExpressionFilter(JexlASTHelper.parseJexlQuery("FOO=='bar' || FOO=='baz' || FOO=='buf' || FOO=='arm'"), typeMetadata, fieldsToKeep);
aggregator = new TermFrequencyAggregator(fieldsToKeep, filter);
TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
// jump to the first doc
iterator.seek(null, null, true);
Assert.assertTrue(iterator.hasTop());
Document d = iterator.document();
Assert.assertTrue(d != null);
Assert.assertTrue(d.getDictionary().size() == 2);
Assert.assertTrue(d.getDictionary().get("FOO") != null);
Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
Iterator<PreNormalizedAttribute> i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
Assert.assertTrue(i.next().getValue().equals("bar"));
Assert.assertTrue(i.next().getValue().equals("baz"));
iterator.next();
Assert.assertTrue(iterator.hasTop());
d = iterator.document();
Assert.assertTrue(d != null);
Assert.assertTrue(d.getDictionary().size() == 2);
Assert.assertTrue(d.getDictionary().get("FOO") != null);
Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
Assert.assertTrue(i.next().getValue().equals("buf"));
Assert.assertTrue(i.next().getValue().equals("buz"));
iterator.next();
Assert.assertTrue(iterator.hasTop());
d = iterator.document();
Assert.assertTrue(d != null);
Assert.assertTrue(d.getDictionary().size() == 2);
Assert.assertTrue(d.getDictionary().get("FOO") != null);
Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
Assert.assertTrue(d.getDictionary().get("FOO").getData().equals("arm"));
iterator.next();
Assert.assertFalse(iterator.hasTop());
}
use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.
the class QueryPruningVisitorTest method falseDoubleAndRewriteTest.
@Test
public void falseDoubleAndRewriteTest() throws ParseException {
String query = "FIELD1 == 'x' && _NOFIELD_ == 'y' && FIELD2 == 'y'";
ASTJexlScript script = JexlASTHelper.parseJexlQuery(query);
JexlNode reduced = QueryPruningVisitor.reduce(script, true);
JexlEvaluation jexlEvaluation = new JexlEvaluation(JexlStringBuildingVisitor.buildQuery(reduced), new DefaultArithmetic());
boolean jexlState = jexlEvaluation.apply(new Tuple3<>(new Key(), new Document(), new DatawaveJexlContext()));
Assert.assertFalse(jexlState);
Assert.assertEquals("false", JexlStringBuildingVisitor.buildQuery(reduced));
Assert.assertEquals("false", JexlStringBuildingVisitor.buildQuery(QueryPruningVisitor.reduce(script, false)));
Assert.assertTrue(logAppender.getMessages().size() == 2);
Assert.assertEquals("Pruning FIELD1 == 'x' && _NOFIELD_ == 'y' && FIELD2 == 'y' to false", logAppender.getMessages().get(0));
Assert.assertEquals("Query before prune: FIELD1 == 'x' && _NOFIELD_ == 'y' && FIELD2 == 'y'\nQuery after prune: false", logAppender.getMessages().get(1));
}
Aggregations