use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.
the class TestResultSetLoaderDicts method testKeyOverflow.
@Test
public void testKeyOverflow() {
TupleMetadata schema = new SchemaBuilder().addDict("d", MinorType.VARCHAR).value(MinorType.INT).resumeSchema().buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().rowCountLimit(ValueVector.MAX_ROW_COUNT).readerSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
rsLoader.startBatch();
byte[] key = new byte[523];
Arrays.fill(key, (byte) 'X');
// number of entries in each dict
int dictSize = 4;
// Number of rows should be driven by vector size.
// Our row count should include the overflow row
DictWriter dictWriter = rootWriter.dict(0);
ScalarWriter keyWriter = dictWriter.keyWriter();
ScalarWriter valueWriter = dictWriter.valueWriter().scalar();
int expectedCount = ValueVector.MAX_BUFFER_SIZE / (key.length * dictSize);
{
int count = 0;
while (!rootWriter.isFull()) {
rootWriter.start();
for (int i = 0; i < dictSize; i++) {
keyWriter.setBytes(key, key.length);
// acts as a placeholder, the actual value is not important
valueWriter.setInt(0);
// not necessary for scalars, just for completeness
dictWriter.save();
}
rootWriter.save();
count++;
}
assertEquals(expectedCount + 1, count);
// Loader's row count should include only "visible" rows
assertEquals(expectedCount, rootWriter.rowCount());
// Total count should include invisible and look-ahead rows.
assertEquals(expectedCount + 1, rsLoader.totalRowCount());
// Result should exclude the overflow row
VectorContainer container = rsLoader.harvest();
BatchValidator.validate(container);
RowSet result = fixture.wrap(container);
assertEquals(expectedCount, result.rowCount());
result.clear();
}
// Next batch should start with the overflow row
{
rsLoader.startBatch();
assertEquals(1, rootWriter.rowCount());
assertEquals(expectedCount + 1, rsLoader.totalRowCount());
VectorContainer container = rsLoader.harvest();
BatchValidator.validate(container);
RowSet result = fixture.wrap(container);
assertEquals(1, result.rowCount());
result.clear();
}
rsLoader.close();
}
use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.
the class TestResultSetLoaderDicts method testDictValue.
@Test
public void testDictValue() {
final TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addDict("d", MinorType.INT).dictValue().key(MinorType.INT).nullableValue(MinorType.VARCHAR).resumeDict().resumeSchema().buildSchema();
final ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().readerSchema(schema).build();
final ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
final RowSetLoader rootWriter = rsLoader.writer();
// Write some rows
rsLoader.startBatch();
rootWriter.addRow(10, map(1, map(1, "a", 2, "b", 4, "c"), 2, map(2, "a2", 1, "c2"))).addRow(20, map()).addRow(30, map(1, map(), 2, map(1, "a3"), 3, map(2, "b4", 4, "n4", 1, null), 4, map(3, "m5", 1, "a5", 2, "c5", 8, "m5", 21, "h5")));
// Validate first batch
RowSet actual = fixture.wrap(rsLoader.harvest());
SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(10, map(1, map(1, "a", 2, "b", 4, "c"), 2, map(2, "a2", 1, "c2"))).addRow(20, map()).addRow(30, map(1, map(), 2, map(1, "a3"), 3, map(2, "b4", 4, "n4", 1, null), 4, map(3, "m5", 1, "a5", 2, "c5", 8, "m5", 21, "h5"))).build();
RowSetUtilities.verify(expected, actual);
// Add another rows in the second batch.
rsLoader.startBatch();
rootWriter.addRow(40, map(1, map(1, "j6", 0, "k6"))).addRow(50, map(1, map(2, "l7"), 2, map(1, "o8", 5, "p8", 7, "u8")));
// Validate first batch. The new dict should have been back-filled with
// empty offsets for the missing rows.
actual = fixture.wrap(rsLoader.harvest());
expected = fixture.rowSetBuilder(actual.schema()).addRow(40, map(1, map(1, "j6", 0, "k6"))).addRow(50, map(1, map(2, "l7"), 2, map(1, "o8", 5, "p8", 7, "u8"))).build();
RowSetUtilities.verify(expected, actual);
rsLoader.close();
}
use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.
the class TestResultSetLoaderLimits method testRowLimit.
/**
* Verify that the writer stops when reaching the row limit.
* In this case there is no look-ahead row.
*/
@Test
public void testRowLimit() {
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator());
assertEquals(ResultSetLoaderImpl.DEFAULT_ROW_COUNT, rsLoader.targetRowCount());
RowSetLoader rootWriter = rsLoader.writer();
rootWriter.addColumn(SchemaBuilder.columnSchema("s", MinorType.VARCHAR, DataMode.REQUIRED));
byte[] value = new byte[200];
Arrays.fill(value, (byte) 'X');
int count = 0;
rsLoader.startBatch();
while (!rootWriter.isFull()) {
rootWriter.start();
rootWriter.scalar(0).setBytes(value, value.length);
rootWriter.save();
count++;
}
assertEquals(ResultSetLoaderImpl.DEFAULT_ROW_COUNT, count);
assertEquals(count, rootWriter.rowCount());
rsLoader.harvest().clear();
// Do it again, a different way.
count = 0;
rsLoader.startBatch();
assertEquals(0, rootWriter.rowCount());
while (rootWriter.start()) {
rootWriter.scalar(0).setBytes(value, value.length);
rootWriter.save();
count++;
}
assertEquals(ResultSetLoaderImpl.DEFAULT_ROW_COUNT, count);
assertEquals(count, rootWriter.rowCount());
rsLoader.harvest().clear();
rsLoader.close();
}
use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.
the class TestResultSetLoaderLimits method testLimit100.
/**
* Test filling one batch normally, then hitting the scan limit on the second.
*/
@Test
public void testLimit100() {
ResultSetOptions options = new ResultSetOptionBuilder().rowCountLimit(75).limit(100).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
rootWriter.addColumn(SchemaBuilder.columnSchema("s", MinorType.VARCHAR, DataMode.REQUIRED));
rsLoader.startBatch();
int count = fillToLimit(rootWriter);
assertEquals(75, count);
assertEquals(count, rootWriter.rowCount());
rsLoader.harvest().clear();
assertFalse(rsLoader.atLimit());
// Second batch will hit the limit
rsLoader.startBatch();
count = fillToLimit(rootWriter);
assertEquals(25, count);
assertEquals(count, rootWriter.rowCount());
rsLoader.harvest().clear();
assertTrue(rsLoader.atLimit());
rsLoader.close();
}
use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.
the class TestResultSetLoaderLimits method testDynamicLimit.
/**
* Test that the row limit can change between batches.
*/
@Test
public void testDynamicLimit() {
// Start with a small limit.
ResultSetOptions options = new ResultSetOptionBuilder().rowCountLimit(TEST_ROW_LIMIT).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
assertEquals(TEST_ROW_LIMIT, rsLoader.targetRowCount());
RowSetLoader rootWriter = rsLoader.writer();
rootWriter.addColumn(SchemaBuilder.columnSchema("s", MinorType.VARCHAR, DataMode.REQUIRED));
rsLoader.startBatch();
int count = fillToLimit(rootWriter);
assertEquals(TEST_ROW_LIMIT, count);
assertEquals(count, rootWriter.rowCount());
rsLoader.harvest().clear();
// Reset the batch size larger and fill a second batch
int newLimit = 8000;
rsLoader.setTargetRowCount(newLimit);
rsLoader.startBatch();
count = fillToLimit(rootWriter);
assertEquals(newLimit, count);
assertEquals(count, rootWriter.rowCount());
rsLoader.harvest().clear();
// Put the limit back to a lower number.
newLimit = 1000;
rsLoader.setTargetRowCount(newLimit);
rsLoader.startBatch();
count = fillToLimit(rootWriter);
assertEquals(newLimit, count);
assertEquals(count, rootWriter.rowCount());
rsLoader.harvest().clear();
// Test limits
rsLoader.setTargetRowCount(-3);
assertEquals(1, rsLoader.targetRowCount());
rsLoader.setTargetRowCount(Integer.MAX_VALUE);
assertEquals(ValueVector.MAX_ROW_COUNT, rsLoader.targetRowCount());
rsLoader.close();
}
Aggregations