Search in sources :

Example 21 with Document

use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testScanPartialRanges.

@Test
public void testScanPartialRanges() throws Exception {
    Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "alf"), false, getFiKey("row", "type1", "123.345.456.2", "FOO", "bar"), false);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertTrue(iterator.hasTop());
    Document d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() + "", d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Assert.assertTrue((d.getDictionary().get("FOO").getData()).equals("arm"));
}
Also used : Range(org.apache.accumulo.core.data.Range) Document(datawave.query.attributes.Document) Test(org.junit.Test)

Example 22 with Document

use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.

the class QueryIteratorIT method eval.

/**
 * Deserialize and evaluate the document, expects 0 to 1 documents
 *
 * @param docKeyHit
 *            the expected hit Key, null if no hit expected
 * @param docKeys
 *            the expected values
 * @throws IOException
 */
protected void eval(Key docKeyHit, Map<String, List<String>> docKeys) throws IOException {
    // asserts for a hit or miss
    if (docKeyHit == null) {
        assertFalse(iterator.hasTop());
    } else {
        assertTrue("Expected hit, but got none", iterator.hasTop());
        Key next = iterator.getTopKey();
        assertNotNull(next);
        assertEquals(next.getRow().toString(), docKeyHit.getRow().toString());
        assertEquals(next.getColumnFamily().toString(), docKeyHit.getColumnFamily().toString());
        // asserts for document build
        Value topValue = iterator.getTopValue();
        assertNotNull(topValue);
        Map.Entry<Key, Document> deserializedValue = deserialize(topValue);
        assertNotNull(deserializedValue.getValue());
        Document d = deserializedValue.getValue();
        assertNotNull(d);
        // -1 is for RECORD_ID field and -1 for HIT_LIST if configured
        int baseSize = d.getDictionary().size() - 1;
        int docSize = isExpectHitTerm() ? baseSize - 1 : baseSize;
        assertEquals("Unexpected doc size: " + d.getDictionary().size() + "\nGot: " + docSize + "\n" + "expected: " + docKeys, docKeys.keySet().size(), docSize);
        // validate the hitlist
        assertEquals("HIT_TERM presence expected: " + isExpectHitTerm() + " actual: " + (d.getDictionary().get(JexlEvaluation.HIT_TERM_FIELD) != null), (d.getDictionary().get(JexlEvaluation.HIT_TERM_FIELD) != null), isExpectHitTerm());
        // verify hits for each specified field
        for (String field : docKeys.keySet()) {
            List<String> expected = docKeys.get(field);
            if (expected.size() == 1) {
                // verify the only doc
                Attribute<?> docAttr = d.getDictionary().get(field);
                if (docAttr instanceof Attributes) {
                    // Special handling of Content attributes, typically when TermFrequencies are looked up.
                    // TFs append Content attributes which results in Attributes coming back instead of a single Attribute
                    Set<?> datas = (Set<?>) docAttr.getData();
                    Set<String> dataStrings = datas.stream().map(Object::toString).collect(Collectors.toSet());
                    boolean stringsMatch = dataStrings.contains(expected.get(0));
                    assertTrue(field + ": value: " + docAttr.getData() + " did not match expected value: " + expected.get(0), stringsMatch);
                } else {
                    boolean stringsMatch = docAttr.getData().toString().equals(expected.get(0));
                    assertTrue(field + ": value: " + docAttr.getData() + " did not match expected value: " + expected.get(0), stringsMatch);
                }
            } else {
                // the data should be a set, verify it matches expected
                Object dictData = d.getDictionary().get(field).getData();
                assertNotNull(dictData);
                assertTrue("Expected " + expected.size() + " values for '" + field + "' found 1, '" + dictData.toString() + "'\nexpected: " + expected, dictData instanceof Set);
                Set dictSet = (Set) dictData;
                assertEquals("Expected " + expected.size() + " values for '" + field + "' found " + dictSet.size() + "\nfound: " + dictSet.toString() + "\nexpected: " + expected, dictSet.size(), expected.size());
                Iterator<Attribute> dictIterator = dictSet.iterator();
                while (dictIterator.hasNext()) {
                    String foundString = dictIterator.next().getData().toString();
                    assertTrue("could not find " + foundString + " in results! Still had " + expected, expected.remove(foundString));
                }
                // verify that the expected set is now empty
                assertEquals(0, expected.size());
            }
        }
        // there should be no other hits
        iterator.next();
    }
}
Also used : Set(java.util.Set) Attribute(datawave.query.attributes.Attribute) Attributes(datawave.query.attributes.Attributes) Document(datawave.query.attributes.Document) Value(org.apache.accumulo.core.data.Value) Map(java.util.Map) HashMap(java.util.HashMap) AbstractMap(java.util.AbstractMap) Key(org.apache.accumulo.core.data.Key) PartialKey(org.apache.accumulo.core.data.PartialKey)

Example 23 with Document

use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testScanFullRange.

@Test
public void testScanFullRange() throws IOException {
    Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "alf"), true, getFiKey("row", "type1", "123.345.456.2", "FOO", "buz"), true);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertTrue(iterator.hasTop());
    Document d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Assert.assertTrue(((Set) d.getDictionary().get("FOO").getData()).size() == 2);
    Iterator<PreNormalizedAttribute> i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
    Assert.assertTrue(i.next().getValue().equals("bar"));
    Assert.assertTrue(i.next().getValue().equals("baz"));
    iterator.next();
    Assert.assertTrue(iterator.hasTop());
    d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Assert.assertTrue(((Set) d.getDictionary().get("FOO").getData()).size() == 2);
    i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
    Assert.assertTrue(i.next().getValue().equals("buf"));
    Assert.assertTrue(i.next().getValue().equals("buz"));
    iterator.next();
    Assert.assertTrue(iterator.hasTop());
    d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Assert.assertTrue(((Set) d.getDictionary().get("FOO").getData()).size() == 2);
    i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
    Assert.assertTrue(i.next().getValue().equals("alf"));
    Assert.assertTrue(i.next().getValue().equals("arm"));
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) PreNormalizedAttribute(datawave.query.attributes.PreNormalizedAttribute) Range(org.apache.accumulo.core.data.Range) Document(datawave.query.attributes.Document) Test(org.junit.Test)

Example 24 with Document

use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.

the class TermFrequencyIndexIteratorTest method testScanFullRangeExclusive.

@Test
public void testScanFullRangeExclusive() throws IOException, ParseException {
    Range r = new Range(getFiKey("row", "type1", "123.345.456", "FOO", "alf"), false, getFiKey("row", "type1", "123.345.456.2", "FOO", "buz"), false);
    filter = new EventDataQueryExpressionFilter(JexlASTHelper.parseJexlQuery("FOO=='bar' || FOO=='baz' || FOO=='buf' || FOO=='arm'"), typeMetadata, fieldsToKeep);
    aggregator = new TermFrequencyAggregator(fieldsToKeep, filter);
    TermFrequencyIndexIterator iterator = new TermFrequencyIndexIterator(r, source, null, typeMetadata, true, null, aggregator);
    // jump to the first doc
    iterator.seek(null, null, true);
    Assert.assertTrue(iterator.hasTop());
    Document d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Iterator<PreNormalizedAttribute> i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
    Assert.assertTrue(i.next().getValue().equals("bar"));
    Assert.assertTrue(i.next().getValue().equals("baz"));
    iterator.next();
    Assert.assertTrue(iterator.hasTop());
    d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    i = ((Set) d.getDictionary().get("FOO").getData()).iterator();
    Assert.assertTrue(i.next().getValue().equals("buf"));
    Assert.assertTrue(i.next().getValue().equals("buz"));
    iterator.next();
    Assert.assertTrue(iterator.hasTop());
    d = iterator.document();
    Assert.assertTrue(d != null);
    Assert.assertTrue(d.getDictionary().size() == 2);
    Assert.assertTrue(d.getDictionary().get("FOO") != null);
    Assert.assertTrue(d.getDictionary().get("RECORD_ID") != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData() != null);
    Assert.assertTrue(d.getDictionary().get("FOO").getData().equals("arm"));
    iterator.next();
    Assert.assertFalse(iterator.hasTop());
}
Also used : EventDataQueryExpressionFilter(datawave.query.predicate.EventDataQueryExpressionFilter) HashSet(java.util.HashSet) Set(java.util.Set) PreNormalizedAttribute(datawave.query.attributes.PreNormalizedAttribute) Range(org.apache.accumulo.core.data.Range) Document(datawave.query.attributes.Document) TermFrequencyAggregator(datawave.query.jexl.functions.TermFrequencyAggregator) TLDTermFrequencyAggregator(datawave.query.tld.TLDTermFrequencyAggregator) Test(org.junit.Test)

Example 25 with Document

use of datawave.query.attributes.Document in project datawave by NationalSecurityAgency.

the class QueryPruningVisitorTest method falseDoubleAndRewriteTest.

@Test
public void falseDoubleAndRewriteTest() throws ParseException {
    String query = "FIELD1 == 'x' && _NOFIELD_ == 'y' && FIELD2 == 'y'";
    ASTJexlScript script = JexlASTHelper.parseJexlQuery(query);
    JexlNode reduced = QueryPruningVisitor.reduce(script, true);
    JexlEvaluation jexlEvaluation = new JexlEvaluation(JexlStringBuildingVisitor.buildQuery(reduced), new DefaultArithmetic());
    boolean jexlState = jexlEvaluation.apply(new Tuple3<>(new Key(), new Document(), new DatawaveJexlContext()));
    Assert.assertFalse(jexlState);
    Assert.assertEquals("false", JexlStringBuildingVisitor.buildQuery(reduced));
    Assert.assertEquals("false", JexlStringBuildingVisitor.buildQuery(QueryPruningVisitor.reduce(script, false)));
    Assert.assertTrue(logAppender.getMessages().size() == 2);
    Assert.assertEquals("Pruning FIELD1 == 'x' && _NOFIELD_ == 'y' && FIELD2 == 'y' to false", logAppender.getMessages().get(0));
    Assert.assertEquals("Query before prune: FIELD1 == 'x' && _NOFIELD_ == 'y' && FIELD2 == 'y'\nQuery after prune: false", logAppender.getMessages().get(1));
}
Also used : DefaultArithmetic(datawave.query.jexl.DefaultArithmetic) ASTJexlScript(org.apache.commons.jexl2.parser.ASTJexlScript) JexlEvaluation(datawave.query.function.JexlEvaluation) JexlNode(org.apache.commons.jexl2.parser.JexlNode) Document(datawave.query.attributes.Document) DatawaveJexlContext(datawave.query.jexl.DatawaveJexlContext) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Aggregations

Document (datawave.query.attributes.Document)97 Key (org.apache.accumulo.core.data.Key)76 Test (org.junit.Test)35 Value (org.apache.accumulo.core.data.Value)30 HashSet (java.util.HashSet)28 Range (org.apache.accumulo.core.data.Range)26 Attribute (datawave.query.attributes.Attribute)18 Map (java.util.Map)17 Attributes (datawave.query.attributes.Attributes)16 HashMap (java.util.HashMap)16 AbstractMap (java.util.AbstractMap)14 TypeAttribute (datawave.query.attributes.TypeAttribute)13 Entry (java.util.Map.Entry)13 PreNormalizedAttribute (datawave.query.attributes.PreNormalizedAttribute)12 Set (java.util.Set)12 Content (datawave.query.attributes.Content)11 TypeMetadata (datawave.query.util.TypeMetadata)10 QueryImpl (datawave.webservice.query.QueryImpl)10 DatawaveKey (datawave.query.data.parsers.DatawaveKey)9 DatawaveJexlContext (datawave.query.jexl.DatawaveJexlContext)9