use of org.apache.drill.exec.vector.accessor.ScalarWriter in project drill by apache.
the class TestResultSetLoaderDictArray method testValueOverflow.
@Test
public void testValueOverflow() {
TupleMetadata schema = new SchemaBuilder().addDictArray("d", MinorType.INT).value(MinorType.VARCHAR).resumeSchema().buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().rowCountLimit(ValueVector.MAX_ROW_COUNT).readerSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
rsLoader.startBatch();
byte[] value = new byte[523];
Arrays.fill(value, (byte) 'X');
// number of dicts in each row; array size is the same for every row to find expected row count easier
int arraySize = 2;
// number of entries in each dict
int dictSize = 4;
// Number of rows should be driven by vector size.
// Our row count should include the overflow row
ArrayWriter arrayDictWriter = rootWriter.array(0);
DictWriter dictWriter = arrayDictWriter.dict();
ScalarWriter keyWriter = dictWriter.keyWriter();
ScalarWriter valueWriter = dictWriter.valueWriter().scalar();
int expectedCount = ValueVector.MAX_BUFFER_SIZE / (value.length * dictSize * arraySize);
{
int count = 0;
while (!rootWriter.isFull()) {
rootWriter.start();
for (int i = 0; i < arraySize; i++) {
for (int j = 0; j < dictSize; j++) {
// acts as a placeholder, the actual value is not important
keyWriter.setInt(0);
valueWriter.setBytes(value, value.length);
// not necessary for scalars, just for completeness
dictWriter.save();
}
arrayDictWriter.save();
}
rootWriter.save();
count++;
}
assertEquals(expectedCount + 1, count);
// Loader's row count should include only "visible" rows
assertEquals(expectedCount, rootWriter.rowCount());
// Total count should include invisible and look-ahead rows.
assertEquals(expectedCount + 1, rsLoader.totalRowCount());
// Result should exclude the overflow row
VectorContainer container = rsLoader.harvest();
BatchValidator.validate(container);
RowSet result = fixture.wrap(container);
assertEquals(expectedCount, result.rowCount());
result.clear();
}
// Next batch should start with the overflow row
{
rsLoader.startBatch();
assertEquals(1, rootWriter.rowCount());
assertEquals(expectedCount + 1, rsLoader.totalRowCount());
VectorContainer container = rsLoader.harvest();
BatchValidator.validate(container);
RowSet result = fixture.wrap(container);
assertEquals(1, result.rowCount());
result.clear();
}
rsLoader.close();
}
use of org.apache.drill.exec.vector.accessor.ScalarWriter in project drill by apache.
the class TestResultSetLoaderMapArray method testOverwriteRow.
/**
* Version of the {#link TestResultSetLoaderProtocol#testOverwriteRow()} test
* that uses nested columns inside an array of maps. Here we must call
* {@code start()} to reset the array back to the initial start position after
* each "discard."
*/
@Test
public void testOverwriteRow() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addMapArray("m").add("b", MinorType.INT).add("c", MinorType.VARCHAR).resumeSchema().buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().readerSchema(schema).rowCountLimit(ValueVector.MAX_ROW_COUNT).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
// Can't use the shortcut to populate rows when doing overwrites.
ScalarWriter aWriter = rootWriter.scalar("a");
ArrayWriter maWriter = rootWriter.array("m");
TupleWriter mWriter = maWriter.tuple();
ScalarWriter bWriter = mWriter.scalar("b");
ScalarWriter cWriter = mWriter.scalar("c");
// Write 100,000 rows, overwriting 99% of them. This will cause vector
// overflow and data corruption if overwrite does not work; but will happily
// produce the correct result if everything works as it should.
byte[] value = new byte[512];
Arrays.fill(value, (byte) 'X');
int count = 0;
rsLoader.startBatch();
while (count < 10_000) {
rootWriter.start();
count++;
aWriter.setInt(count);
for (int i = 0; i < 10; i++) {
bWriter.setInt(count * 10 + i);
cWriter.setBytes(value, value.length);
maWriter.save();
}
if (count % 100 == 0) {
rootWriter.save();
}
}
// Verify using a reader.
RowSet result = fixture.wrap(rsLoader.harvest());
assertEquals(count / 100, result.rowCount());
RowSetReader reader = result.reader();
ArrayReader maReader = reader.array("m");
TupleReader mReader = maReader.tuple();
int rowId = 1;
while (reader.next()) {
assertEquals(rowId * 100, reader.scalar("a").getInt());
assertEquals(10, maReader.size());
for (int i = 0; i < 10; i++) {
assert (maReader.next());
assertEquals(rowId * 1000 + i, mReader.scalar("b").getInt());
assertTrue(Arrays.equals(value, mReader.scalar("c").getBytes()));
}
rowId++;
}
result.clear();
rsLoader.close();
}
use of org.apache.drill.exec.vector.accessor.ScalarWriter in project drill by apache.
the class TestResultSetLoaderOmittedValues method testOmittedValuesAtEnd.
/**
* Test "holes" in the middle of a batch, and unset columns at
* the end. Ending the batch should fill in missing values.
*/
@Test
public void testOmittedValuesAtEnd() {
// Create columns up front
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addNullable("c", MinorType.VARCHAR).add("d", MinorType.INT).addNullable("e", MinorType.INT).addArray("f", MinorType.VARCHAR).buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().readerSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
rsLoader.startBatch();
int rowCount = 0;
ScalarWriter arrayWriter;
for (int i = 0; i < 2; i++) {
// Row 0, 1
rootWriter.start();
rowCount++;
rootWriter.scalar(0).setInt(rowCount);
rootWriter.scalar(1).setString("b_" + rowCount);
rootWriter.scalar(2).setString("c_" + rowCount);
rootWriter.scalar(3).setInt(rowCount * 10);
rootWriter.scalar(4).setInt(rowCount * 100);
arrayWriter = rootWriter.column(5).array().scalar();
arrayWriter.setString("f_" + rowCount + "-1");
arrayWriter.setString("f_" + rowCount + "-2");
rootWriter.save();
}
for (int i = 0; i < 2; i++) {
// Rows 2, 3
rootWriter.start();
rowCount++;
rootWriter.scalar(0).setInt(rowCount);
rootWriter.scalar(1).setString("b_" + rowCount);
rootWriter.scalar(3).setInt(rowCount * 10);
arrayWriter = rootWriter.column(5).array().scalar();
arrayWriter.setString("f_" + rowCount + "-1");
arrayWriter.setString("f_" + rowCount + "-2");
rootWriter.save();
}
for (int i = 0; i < 2; i++) {
// Rows 4, 5
rootWriter.start();
rowCount++;
rootWriter.scalar(0).setInt(rowCount);
rootWriter.scalar(2).setString("c_" + rowCount);
rootWriter.scalar(4).setInt(rowCount * 100);
rootWriter.save();
}
for (int i = 0; i < 2; i++) {
// Rows 6, 7
rootWriter.start();
rowCount++;
rootWriter.scalar(0).setInt(rowCount);
rootWriter.scalar(1).setString("b_" + rowCount);
rootWriter.scalar(2).setString("c_" + rowCount);
rootWriter.scalar(3).setInt(rowCount * 10);
rootWriter.scalar(4).setInt(rowCount * 100);
arrayWriter = rootWriter.column(5).array().scalar();
arrayWriter.setString("f_" + rowCount + "-1");
arrayWriter.setString("f_" + rowCount + "-2");
rootWriter.save();
}
for (int i = 0; i < 2; i++) {
// Rows 8, 9
rootWriter.start();
rowCount++;
rootWriter.scalar(0).setInt(rowCount);
rootWriter.save();
}
// Harvest the row and verify.
RowSet actual = fixture.wrap(rsLoader.harvest());
// actual.print();
TupleMetadata expectedSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addNullable("c", MinorType.VARCHAR).add("d", MinorType.INT).addNullable("e", MinorType.INT).addArray("f", MinorType.VARCHAR).buildSchema();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(1, "b_1", "c_1", 10, 100, strArray("f_1-1", "f_1-2")).addRow(2, "b_2", "c_2", 20, 200, strArray("f_2-1", "f_2-2")).addRow(3, "b_3", null, 30, null, strArray("f_3-1", "f_3-2")).addRow(4, "b_4", null, 40, null, strArray("f_4-1", "f_4-2")).addRow(5, "", "c_5", 0, 500, strArray()).addRow(6, "", "c_6", 0, 600, strArray()).addRow(7, "b_7", "c_7", 70, 700, strArray("f_7-1", "f_7-2")).addRow(8, "b_8", "c_8", 80, 800, strArray("f_8-1", "f_8-2")).addRow(9, "", null, 0, null, strArray()).addRow(10, "", null, 0, null, strArray()).build();
RowSetUtilities.verify(expected, actual);
rsLoader.close();
}
use of org.apache.drill.exec.vector.accessor.ScalarWriter in project drill by apache.
the class TestResultSetLoaderOverflow method testVectorSizeLimitWithAppend.
@Test
public void testVectorSizeLimitWithAppend() {
TupleMetadata schema = new SchemaBuilder().add("s", MinorType.VARCHAR).buildSchema();
ResultSetOptions options = new ResultSetOptionBuilder().rowCountLimit(ValueVector.MAX_ROW_COUNT).readerSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
rsLoader.startBatch();
byte[] head = "abc".getBytes();
byte[] tail = new byte[523];
Arrays.fill(tail, (byte) 'X');
String expected = new String(head, Charsets.UTF_8);
expected += new String(tail, Charsets.UTF_8);
expected += new String(tail, Charsets.UTF_8);
int count = 0;
ScalarWriter colWriter = rootWriter.scalar(0);
while (!rootWriter.isFull()) {
rootWriter.start();
colWriter.setBytes(head, head.length);
colWriter.appendBytes(tail, tail.length);
colWriter.appendBytes(tail, tail.length);
rootWriter.save();
count++;
}
// Number of rows should be driven by vector size.
// Our row count should include the overflow row
int valueLength = head.length + 2 * tail.length;
int expectedCount = ValueVector.MAX_BUFFER_SIZE / valueLength;
assertEquals(expectedCount + 1, count);
// Loader's row count should include only "visible" rows
assertEquals(expectedCount, rootWriter.rowCount());
// Total count should include invisible and look-ahead rows.
assertEquals(expectedCount + 1, rsLoader.totalRowCount());
// Result should exclude the overflow row
{
VectorContainer container = rsLoader.harvest();
BatchValidator.validate(container);
RowSet result = fixture.wrap(container);
assertEquals(expectedCount, result.rowCount());
// Verify that the values were, in fact, appended.
RowSetReader reader = result.reader();
while (reader.next()) {
assertEquals(expected, reader.scalar(0).getString());
}
result.clear();
}
// Next batch should start with the overflow row
rsLoader.startBatch();
assertEquals(1, rootWriter.rowCount());
assertEquals(expectedCount + 1, rsLoader.totalRowCount());
{
VectorContainer container = rsLoader.harvest();
BatchValidator.validate(container);
RowSet result = fixture.wrap(container);
assertEquals(1, result.rowCount());
RowSetReader reader = result.reader();
while (reader.next()) {
assertEquals(expected, reader.scalar(0).getString());
}
result.clear();
}
rsLoader.close();
}
use of org.apache.drill.exec.vector.accessor.ScalarWriter in project drill by apache.
the class TestResultSetLoaderOverflow method testLargeArray.
/**
* Create an array that contains more than 64K values. Drill has no numeric
* limit on array lengths. (Well, it does, but the limit is about 2 billion
* which, even for bytes, is too large to fit into a vector...)
*/
@Test
public void testLargeArray() {
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator());
RowSetLoader rootWriter = rsLoader.writer();
MaterializedField field = SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REPEATED);
rootWriter.addColumn(field);
// Create a single array as the column value in the first row. When
// this overflows, an exception is thrown since overflow is not possible.
rsLoader.startBatch();
rootWriter.start();
ScalarWriter array = rootWriter.array(0).scalar();
try {
for (int i = 0; i < Integer.MAX_VALUE; i++) {
array.setInt(i + 1);
}
fail();
} catch (UserException e) {
// Expected
}
rsLoader.close();
}
Aggregations