Search in sources :

Example 1 with ScannerStream

use of datawave.query.index.lookup.ScannerStream in project datawave by NationalSecurityAgency.

the class RangeStreamScannerTest method testTheSimplestOfScans.

/**
 * Make sure a simple scan returns correctly. FOO == 'bar' hits day 20190314 with 1 shard, each shard has 2 document ids.
 */
@Test
public void testTheSimplestOfScans() throws Exception {
    // Components that define the query: "FOO == 'bar'"
    String fieldName = "FOO";
    String fieldValue = "bar";
    ASTEQNode eqNode = (ASTEQNode) JexlNodeFactory.buildEQNode(fieldName, fieldValue);
    // Construct a ScannerStream from RangeStreamScanner, iterator, entry parser.
    RangeStreamScanner rangeStreamScanner = buildRangeStreamScanner(fieldName, fieldValue);
    EntryParser entryParser = new EntryParser(eqNode, fieldName, fieldValue, config.getIndexedFields());
    ScannerStream scannerStream = ScannerStream.initialized(rangeStreamScanner, entryParser, eqNode);
    // Assert the iterator correctly iterates over the iterables without irritating the unit test.
    assertTrue(scannerStream.hasNext());
    int shardCount = 0;
    int documentCount = 0;
    while (scannerStream.hasNext()) {
        Tuple2<String, IndexInfo> entry = scannerStream.next();
        assertEquals("Expected shard to start with '20190314' but was: " + entry.first(), "20190314", entry.first());
        assertEquals(2, entry.second().count());
        shardCount++;
        documentCount += entry.second().count();
    }
    assertEquals(1, shardCount);
    assertEquals(2, documentCount);
    assertFalse(scannerStream.hasNext());
}
Also used : ASTEQNode(org.apache.commons.jexl2.parser.ASTEQNode) IndexInfo(datawave.query.index.lookup.IndexInfo) EntryParser(datawave.query.index.lookup.EntryParser) ScannerStream(datawave.query.index.lookup.ScannerStream) Test(org.junit.Test)

Example 2 with ScannerStream

use of datawave.query.index.lookup.ScannerStream in project datawave by NationalSecurityAgency.

the class RangeStreamScannerTest method testExceedShardsPerDayThresholdAndDocumentsPerShardThreshold.

/**
 * FOO == 'boohoo' hits day 20190319 with 15 shards, each shard has 25 document ids.
 */
@Test
public void testExceedShardsPerDayThresholdAndDocumentsPerShardThreshold() throws Exception {
    // Components that define the query: "FOO == 'boohoo'"
    String fieldName = "FOO";
    String fieldValue = "boohoo";
    ASTEQNode eqNode = (ASTEQNode) JexlNodeFactory.buildEQNode(fieldName, fieldValue);
    // Construct a ScannerStream from RangeStreamScanner, iterator, entry parser.
    RangeStreamScanner rangeStreamScanner = buildRangeStreamScanner(fieldName, fieldValue);
    EntryParser entryParser = new EntryParser(eqNode, fieldName, fieldValue, config.getIndexedFields());
    // Iterator<Tuple2<String,IndexInfo>> iterator = Iterators.transform(rangeStreamScanner, entryParser);
    ScannerStream scannerStream = ScannerStream.initialized(rangeStreamScanner, entryParser, eqNode);
    // Assert the iterator correctly iterates over the iterables without irritating the unit test.
    assertTrue(scannerStream.hasNext());
    int shardCount = 0;
    int documentCount = 0;
    while (scannerStream.hasNext()) {
        Tuple2<String, IndexInfo> entry = scannerStream.next();
        assertTrue("Expected shard to start with '20190323' but was: " + entry.first(), entry.first().startsWith("20190323"));
        shardCount++;
        documentCount += entry.second().count();
    }
    // A single range with a count of -1 means the shard ranges were collapsed into a day range.
    assertEquals(1, shardCount);
    assertEquals(-1, documentCount);
    assertFalse(scannerStream.hasNext());
}
Also used : ASTEQNode(org.apache.commons.jexl2.parser.ASTEQNode) IndexInfo(datawave.query.index.lookup.IndexInfo) EntryParser(datawave.query.index.lookup.EntryParser) ScannerStream(datawave.query.index.lookup.ScannerStream) Test(org.junit.Test)

Example 3 with ScannerStream

use of datawave.query.index.lookup.ScannerStream in project datawave by NationalSecurityAgency.

the class RangeStreamScannerTest method testExceedMaxMedianDocumentsPerShard.

/**
 * FOO == 'boo' hits day 20190319 with 8 shards, each shard has 15 document ids.
 */
@Test
public void testExceedMaxMedianDocumentsPerShard() throws Exception {
    // Components that define the query: "FOO == 'boo'"
    String fieldName = "FOO";
    String fieldValue = "boo";
    ASTEQNode eqNode = (ASTEQNode) JexlNodeFactory.buildEQNode(fieldName, fieldValue);
    // Construct a ScannerStream from RangeStreamScanner, iterator, entry parser.
    RangeStreamScanner rangeStreamScanner = buildRangeStreamScanner(fieldName, fieldValue);
    EntryParser entryParser = new EntryParser(eqNode, fieldName, fieldValue, config.getIndexedFields());
    Iterator<Tuple2<String, IndexInfo>> iterator = Iterators.transform(rangeStreamScanner, entryParser);
    ScannerStream scannerStream = ScannerStream.initialized(iterator, eqNode);
    // Assert the iterator correctly iterates over the iterables without irritating the unit test.
    assertTrue(scannerStream.hasNext());
    int shardCount = 0;
    int documentCount = 0;
    while (scannerStream.hasNext()) {
        Tuple2<String, IndexInfo> entry = scannerStream.next();
        assertTrue("Expected shard to start with '20190319_' but was: " + entry.first(), entry.first().startsWith("20190319_"));
        assertEquals(15, entry.second().count());
        shardCount++;
        documentCount += entry.second().count();
    }
    assertEquals(8, shardCount);
    assertEquals(120, documentCount);
    assertFalse(scannerStream.hasNext());
}
Also used : ASTEQNode(org.apache.commons.jexl2.parser.ASTEQNode) Tuple2(datawave.query.util.Tuple2) IndexInfo(datawave.query.index.lookup.IndexInfo) EntryParser(datawave.query.index.lookup.EntryParser) ScannerStream(datawave.query.index.lookup.ScannerStream) Test(org.junit.Test)

Example 4 with ScannerStream

use of datawave.query.index.lookup.ScannerStream in project datawave by NationalSecurityAgency.

the class RangeStreamScannerTest method testExceedShardDayThreshold.

/**
 * FOO == 'baz' hits day 20190317 with 15 shards, each shard has 2 document ids.
 */
@Test
public void testExceedShardDayThreshold() throws Exception {
    // Components that define the query: "FOO == 'baz'"
    String fieldName = "FOO";
    String fieldValue = "baz";
    ASTEQNode eqNode = (ASTEQNode) JexlNodeFactory.buildEQNode(fieldName, fieldValue);
    // Construct a ScannerStream from RangeStreamScanner, iterator, entry parser.
    RangeStreamScanner rangeStreamScanner = buildRangeStreamScanner(fieldName, fieldValue);
    EntryParser entryParser = new EntryParser(eqNode, fieldName, fieldValue, config.getIndexedFields());
    Iterator<Tuple2<String, IndexInfo>> iterator = Iterators.transform(rangeStreamScanner, entryParser);
    ScannerStream scannerStream = ScannerStream.initialized(iterator, eqNode);
    // Assert the iterator correctly iterates over the iterables without irritating the unit test.
    assertTrue(scannerStream.hasNext());
    int shardCount = 0;
    int documentCount = 0;
    while (scannerStream.hasNext()) {
        Tuple2<String, IndexInfo> entry = scannerStream.next();
        assertTrue("Expected shard to start with '20190317_' but was: " + entry.first(), entry.first().startsWith("20190317_"));
        assertEquals(2, entry.second().count());
        shardCount++;
        documentCount += entry.second().count();
    }
    assertEquals(15, shardCount);
    assertEquals(30, documentCount);
    assertFalse(scannerStream.hasNext());
}
Also used : ASTEQNode(org.apache.commons.jexl2.parser.ASTEQNode) Tuple2(datawave.query.util.Tuple2) IndexInfo(datawave.query.index.lookup.IndexInfo) EntryParser(datawave.query.index.lookup.EntryParser) ScannerStream(datawave.query.index.lookup.ScannerStream) Test(org.junit.Test)

Aggregations

EntryParser (datawave.query.index.lookup.EntryParser)4 IndexInfo (datawave.query.index.lookup.IndexInfo)4 ScannerStream (datawave.query.index.lookup.ScannerStream)4 ASTEQNode (org.apache.commons.jexl2.parser.ASTEQNode)4 Test (org.junit.Test)4 Tuple2 (datawave.query.util.Tuple2)2