use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderOmittedValues method testSkipRows.
/**
* Test that omitting the call to saveRow() effectively discards
* the row. Note that the vectors still contain values in the
* discarded position; just the various pointers are unset. If
* the batch ends before the discarded values are overwritten, the
* discarded values just exist at the end of the vector. Since vectors
* start with garbage contents, the discarded values are simply a different
* kind of garbage. But, if the client writes a new row, then the new
* row overwrites the discarded row. This works because we only change
* the tail part of a vector; never the internals.
*/
@Test
public void testSkipRows() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
rsLoader.startBatch();
int rowNumber = 0;
for (int i = 0; i < 14; i++) {
rootWriter.start();
rowNumber++;
rootWriter.scalar(0).setInt(rowNumber);
if (i % 3 == 0) {
rootWriter.scalar(1).setNull();
} else {
rootWriter.scalar(1).setString("b-" + rowNumber);
}
if (i % 2 == 0) {
rootWriter.save();
}
}
RowSet result = fixture.wrap(rsLoader.harvest());
// result.print();
SingleRowSet expected = fixture.rowSetBuilder(result.batchSchema()).addRow(1, null).addRow(3, "b-3").addRow(5, "b-5").addRow(7, null).addRow(9, "b-9").addRow(11, "b-11").addRow(13, null).build();
// expected.print();
new RowSetComparison(expected).verifyAndClearAll(result);
rsLoader.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderOmittedValues method testSkipOverflowRow.
/**
* Test that discarding a row works even if that row happens to be an
* overflow row.
*/
@Test
public void testSkipOverflowRow() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
rsLoader.startBatch();
byte[] value = new byte[512];
Arrays.fill(value, (byte) 'X');
int count = 0;
while (!rootWriter.isFull()) {
rootWriter.start();
rootWriter.scalar(0).setInt(count);
rootWriter.scalar(1).setBytes(value, value.length);
if (!rootWriter.isFull()) {
rootWriter.save();
}
count++;
}
// Discard the results.
rsLoader.harvest().zeroVectors();
// Harvest the next batch. Will be empty (because overflow row
// was discarded.)
rsLoader.startBatch();
RowSet result = fixture.wrap(rsLoader.harvest());
assertEquals(0, result.rowCount());
result.clear();
rsLoader.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderOverflow method testBatchSizeLimit.
/**
* Test that the writer detects a vector overflow. The offending column
* value should be moved to the next batch.
*/
@Test
public void testBatchSizeLimit() {
TupleMetadata schema = new SchemaBuilder().add("s", MinorType.VARCHAR).buildSchema();
ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).setBatchSizeLimit(// Data
8 * 1024 * 1024 + // Offsets, doubled because of +1
2 * ValueVector.MAX_ROW_COUNT * 4).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
rsLoader.startBatch();
byte[] value = new byte[512];
Arrays.fill(value, (byte) 'X');
int count = 0;
while (!rootWriter.isFull()) {
rootWriter.start();
rootWriter.scalar(0).setBytes(value, value.length);
rootWriter.save();
count++;
}
// Our row count should include the overflow row
int expectedCount = 8 * 1024 * 1024 / value.length;
assertEquals(expectedCount + 1, count);
// Loader's row count should include only "visible" rows
assertEquals(expectedCount, rootWriter.rowCount());
// Total count should include invisible and look-ahead rows.
assertEquals(expectedCount + 1, rsLoader.totalRowCount());
// Result should exclude the overflow row
RowSet result = fixture.wrap(rsLoader.harvest());
assertEquals(expectedCount, result.rowCount());
result.clear();
// Next batch should start with the overflow row
rsLoader.startBatch();
assertEquals(1, rootWriter.rowCount());
assertEquals(expectedCount + 1, rsLoader.totalRowCount());
result = fixture.wrap(rsLoader.harvest());
assertEquals(1, result.rowCount());
result.clear();
rsLoader.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderOverflow method testLargeArray.
/**
* Create an array that contains more than 64K values. Drill has no numeric
* limit on array lengths. (Well, it does, but the limit is about 2 billion
* which, even for bytes, is too large to fit into a vector...)
*/
@Test
public void testLargeArray() {
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator());
RowSetLoader rootWriter = rsLoader.writer();
MaterializedField field = SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REPEATED);
rootWriter.addColumn(field);
// Create a single array as the column value in the first row. When
// this overflows, an exception is thrown since overflow is not possible.
rsLoader.startBatch();
rootWriter.start();
ScalarWriter array = rootWriter.array(0).scalar();
try {
for (int i = 0; i < Integer.MAX_VALUE; i++) {
array.setInt(i + 1);
}
fail();
} catch (UserException e) {
// Expected
}
rsLoader.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderOverflow method testMissingArrayValues.
/**
* Test the case that an array has "missing values" before the overflow.
*/
@Test
public void testMissingArrayValues() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addArray("c", MinorType.INT).buildSchema();
ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
byte[] value = new byte[512];
Arrays.fill(value, (byte) 'X');
int blankAfter = ValueVector.MAX_BUFFER_SIZE / 512 * 2 / 3;
ScalarWriter cWriter = rootWriter.array("c").scalar();
rsLoader.startBatch();
int rowId = 0;
while (rootWriter.start()) {
rootWriter.scalar("a").setInt(rowId);
rootWriter.scalar("b").setBytes(value, value.length);
if (rowId < blankAfter) {
for (int i = 0; i < 3; i++) {
cWriter.setInt(rowId * 3 + i);
}
}
rootWriter.save();
rowId++;
}
RowSet result = fixture.wrap(rsLoader.harvest());
assertEquals(rowId - 1, result.rowCount());
RowSetReader reader = result.reader();
ScalarElementReader cReader = reader.array("c").elements();
while (reader.next()) {
assertEquals(reader.rowIndex(), reader.scalar("a").getInt());
assertTrue(Arrays.equals(value, reader.scalar("b").getBytes()));
if (reader.rowIndex() < blankAfter) {
assertEquals(3, cReader.size());
for (int i = 0; i < 3; i++) {
assertEquals(reader.rowIndex() * 3 + i, cReader.getInt(i));
}
} else {
assertEquals(0, cReader.size());
}
}
result.clear();
rsLoader.close();
}
Aggregations