use of datawave.query.index.lookup.ScannerStream in project datawave by NationalSecurityAgency.
the class RangeStreamScannerTest method testTheSimplestOfScans.
/**
* Make sure a simple scan returns correctly. FOO == 'bar' hits day 20190314 with 1 shard, each shard has 2 document ids.
*/
@Test
public void testTheSimplestOfScans() throws Exception {
// Components that define the query: "FOO == 'bar'"
String fieldName = "FOO";
String fieldValue = "bar";
ASTEQNode eqNode = (ASTEQNode) JexlNodeFactory.buildEQNode(fieldName, fieldValue);
// Construct a ScannerStream from RangeStreamScanner, iterator, entry parser.
RangeStreamScanner rangeStreamScanner = buildRangeStreamScanner(fieldName, fieldValue);
EntryParser entryParser = new EntryParser(eqNode, fieldName, fieldValue, config.getIndexedFields());
ScannerStream scannerStream = ScannerStream.initialized(rangeStreamScanner, entryParser, eqNode);
// Assert the iterator correctly iterates over the iterables without irritating the unit test.
assertTrue(scannerStream.hasNext());
int shardCount = 0;
int documentCount = 0;
while (scannerStream.hasNext()) {
Tuple2<String, IndexInfo> entry = scannerStream.next();
assertEquals("Expected shard to start with '20190314' but was: " + entry.first(), "20190314", entry.first());
assertEquals(2, entry.second().count());
shardCount++;
documentCount += entry.second().count();
}
assertEquals(1, shardCount);
assertEquals(2, documentCount);
assertFalse(scannerStream.hasNext());
}
use of datawave.query.index.lookup.ScannerStream in project datawave by NationalSecurityAgency.
the class RangeStreamScannerTest method testExceedShardsPerDayThresholdAndDocumentsPerShardThreshold.
/**
* FOO == 'boohoo' hits day 20190319 with 15 shards, each shard has 25 document ids.
*/
@Test
public void testExceedShardsPerDayThresholdAndDocumentsPerShardThreshold() throws Exception {
// Components that define the query: "FOO == 'boohoo'"
String fieldName = "FOO";
String fieldValue = "boohoo";
ASTEQNode eqNode = (ASTEQNode) JexlNodeFactory.buildEQNode(fieldName, fieldValue);
// Construct a ScannerStream from RangeStreamScanner, iterator, entry parser.
RangeStreamScanner rangeStreamScanner = buildRangeStreamScanner(fieldName, fieldValue);
EntryParser entryParser = new EntryParser(eqNode, fieldName, fieldValue, config.getIndexedFields());
// Iterator<Tuple2<String,IndexInfo>> iterator = Iterators.transform(rangeStreamScanner, entryParser);
ScannerStream scannerStream = ScannerStream.initialized(rangeStreamScanner, entryParser, eqNode);
// Assert the iterator correctly iterates over the iterables without irritating the unit test.
assertTrue(scannerStream.hasNext());
int shardCount = 0;
int documentCount = 0;
while (scannerStream.hasNext()) {
Tuple2<String, IndexInfo> entry = scannerStream.next();
assertTrue("Expected shard to start with '20190323' but was: " + entry.first(), entry.first().startsWith("20190323"));
shardCount++;
documentCount += entry.second().count();
}
// A single range with a count of -1 means the shard ranges were collapsed into a day range.
assertEquals(1, shardCount);
assertEquals(-1, documentCount);
assertFalse(scannerStream.hasNext());
}
use of datawave.query.index.lookup.ScannerStream in project datawave by NationalSecurityAgency.
the class RangeStreamScannerTest method testExceedMaxMedianDocumentsPerShard.
/**
* FOO == 'boo' hits day 20190319 with 8 shards, each shard has 15 document ids.
*/
@Test
public void testExceedMaxMedianDocumentsPerShard() throws Exception {
// Components that define the query: "FOO == 'boo'"
String fieldName = "FOO";
String fieldValue = "boo";
ASTEQNode eqNode = (ASTEQNode) JexlNodeFactory.buildEQNode(fieldName, fieldValue);
// Construct a ScannerStream from RangeStreamScanner, iterator, entry parser.
RangeStreamScanner rangeStreamScanner = buildRangeStreamScanner(fieldName, fieldValue);
EntryParser entryParser = new EntryParser(eqNode, fieldName, fieldValue, config.getIndexedFields());
Iterator<Tuple2<String, IndexInfo>> iterator = Iterators.transform(rangeStreamScanner, entryParser);
ScannerStream scannerStream = ScannerStream.initialized(iterator, eqNode);
// Assert the iterator correctly iterates over the iterables without irritating the unit test.
assertTrue(scannerStream.hasNext());
int shardCount = 0;
int documentCount = 0;
while (scannerStream.hasNext()) {
Tuple2<String, IndexInfo> entry = scannerStream.next();
assertTrue("Expected shard to start with '20190319_' but was: " + entry.first(), entry.first().startsWith("20190319_"));
assertEquals(15, entry.second().count());
shardCount++;
documentCount += entry.second().count();
}
assertEquals(8, shardCount);
assertEquals(120, documentCount);
assertFalse(scannerStream.hasNext());
}
use of datawave.query.index.lookup.ScannerStream in project datawave by NationalSecurityAgency.
the class RangeStreamScannerTest method testExceedShardDayThreshold.
/**
* FOO == 'baz' hits day 20190317 with 15 shards, each shard has 2 document ids.
*/
@Test
public void testExceedShardDayThreshold() throws Exception {
// Components that define the query: "FOO == 'baz'"
String fieldName = "FOO";
String fieldValue = "baz";
ASTEQNode eqNode = (ASTEQNode) JexlNodeFactory.buildEQNode(fieldName, fieldValue);
// Construct a ScannerStream from RangeStreamScanner, iterator, entry parser.
RangeStreamScanner rangeStreamScanner = buildRangeStreamScanner(fieldName, fieldValue);
EntryParser entryParser = new EntryParser(eqNode, fieldName, fieldValue, config.getIndexedFields());
Iterator<Tuple2<String, IndexInfo>> iterator = Iterators.transform(rangeStreamScanner, entryParser);
ScannerStream scannerStream = ScannerStream.initialized(iterator, eqNode);
// Assert the iterator correctly iterates over the iterables without irritating the unit test.
assertTrue(scannerStream.hasNext());
int shardCount = 0;
int documentCount = 0;
while (scannerStream.hasNext()) {
Tuple2<String, IndexInfo> entry = scannerStream.next();
assertTrue("Expected shard to start with '20190317_' but was: " + entry.first(), entry.first().startsWith("20190317_"));
assertEquals(2, entry.second().count());
shardCount++;
documentCount += entry.second().count();
}
assertEquals(15, shardCount);
assertEquals(30, documentCount);
assertFalse(scannerStream.hasNext());
}
Aggregations