use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderMaps method testMapOverflowWithNewColumn.
/**
* Test the case in which a new column is added during the overflow row. Unlike
* the top-level schema case, internally we must create a copy of the map, and
* move vectors across only when the result is to include the schema version
* of the target column. For overflow, the new column is added after the
* first batch; it is added in the second batch that contains the overflow
* row in which the column was added.
*/
@Test
public void testMapOverflowWithNewColumn() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addMap("m").add("b", MinorType.INT).add("c", MinorType.VARCHAR).resumeSchema().buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setSchema(schema).setRowCountLimit(ValueVector.MAX_ROW_COUNT).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
assertEquals(4, rsLoader.schemaVersion());
RowSetLoader rootWriter = rsLoader.writer();
// Can't use the shortcut to populate rows when doing a schema
// change.
ScalarWriter aWriter = rootWriter.scalar("a");
TupleWriter mWriter = rootWriter.tuple("m");
ScalarWriter bWriter = mWriter.scalar("b");
ScalarWriter cWriter = mWriter.scalar("c");
byte[] value = new byte[512];
Arrays.fill(value, (byte) 'X');
int count = 0;
rsLoader.startBatch();
while (!rootWriter.isFull()) {
rootWriter.start();
aWriter.setInt(count);
bWriter.setInt(count * 10);
cWriter.setBytes(value, value.length);
if (rootWriter.isFull()) {
// Overflow just occurred. Add another column.
mWriter.addColumn(SchemaBuilder.columnSchema("d", MinorType.INT, DataMode.OPTIONAL));
mWriter.scalar("d").setInt(count * 100);
}
rootWriter.save();
count++;
}
// Result set should include the original columns, but not d.
RowSet result = fixture.wrap(rsLoader.harvest());
assertEquals(4, rsLoader.schemaVersion());
assertTrue(schema.isEquivalent(result.schema()));
BatchSchema expectedSchema = new BatchSchema(SelectionVectorMode.NONE, schema.toFieldList());
assertTrue(expectedSchema.isEquivalent(result.batchSchema()));
// Use a reader to validate row-by-row. Too large to create an expected
// result set.
RowSetReader reader = result.reader();
TupleReader mapReader = reader.tuple("m");
int rowId = 0;
while (reader.next()) {
assertEquals(rowId, reader.scalar("a").getInt());
assertEquals(rowId * 10, mapReader.scalar("b").getInt());
assertTrue(Arrays.equals(value, mapReader.scalar("c").getBytes()));
rowId++;
}
result.clear();
// Next batch should start with the overflow row
rsLoader.startBatch();
assertEquals(1, rootWriter.rowCount());
result = fixture.wrap(rsLoader.harvest());
assertEquals(1, result.rowCount());
reader = result.reader();
mapReader = reader.tuple("m");
while (reader.next()) {
assertEquals(rowId, reader.scalar("a").getInt());
assertEquals(rowId * 10, mapReader.scalar("b").getInt());
assertTrue(Arrays.equals(value, mapReader.scalar("c").getBytes()));
assertEquals(rowId * 100, mapReader.scalar("d").getInt());
}
result.clear();
rsLoader.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderMaps method testMapAddition.
/**
* Test adding a map to a loader after writing the first row.
*/
@Test
public void testMapAddition() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
assertEquals(1, rsLoader.schemaVersion());
RowSetLoader rootWriter = rsLoader.writer();
// Start without the map. Add a map after the first row.
rsLoader.startBatch();
rootWriter.addRow(10);
int mapIndex = rootWriter.addColumn(SchemaBuilder.columnSchema("m", MinorType.MAP, DataMode.REQUIRED));
TupleWriter mapWriter = rootWriter.tuple(mapIndex);
// Add a column to the map with the same name as the top-level column.
// Verifies that the name spaces are independent.
mapWriter.addColumn(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.REQUIRED));
rootWriter.addRow(20, objArray("fred")).addRow(30, objArray("barney"));
RowSet actual = fixture.wrap(rsLoader.harvest());
assertEquals(3, rsLoader.schemaVersion());
assertEquals(3, actual.rowCount());
// Validate first batch
TupleMetadata expectedSchema = new SchemaBuilder().add("a", MinorType.INT).addMap("m").add("a", MinorType.VARCHAR).resumeSchema().buildSchema();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(10, objArray("")).addRow(20, objArray("fred")).addRow(30, objArray("barney")).build();
new RowSetComparison(expected).verifyAndClearAll(actual);
rsLoader.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderMaps method testMapWithArray.
/**
* Test a map that contains a scalar array. No reason to suspect that this
* will have problem as the array writer is fully tested in the accessor
* subsystem. Still, need to test the cardinality methods of the loader
* layer.
*/
@Test
public void testMapWithArray() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addMap("m").addArray("c", MinorType.INT).addArray("d", MinorType.VARCHAR).resumeSchema().buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
// Write some rows
rsLoader.startBatch();
rootWriter.addRow(10, objArray(intArray(110, 120, 130), strArray("d1.1", "d1.2", "d1.3", "d1.4"))).addRow(20, objArray(intArray(210), strArray())).addRow(30, objArray(intArray(), strArray("d3.1")));
// Validate first batch
RowSet actual = fixture.wrap(rsLoader.harvest());
SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(10, objArray(intArray(110, 120, 130), strArray("d1.1", "d1.2", "d1.3", "d1.4"))).addRow(20, objArray(intArray(210), strArray())).addRow(30, objArray(intArray(), strArray("d3.1"))).build();
new RowSetComparison(expected).verifyAndClearAll(actual);
// Add another array after the first row in the second batch.
rsLoader.startBatch();
rootWriter.addRow(40, objArray(intArray(410, 420), strArray("d4.1", "d4.2"))).addRow(50, objArray(intArray(510), strArray("d5.1")));
TupleWriter mapWriter = rootWriter.tuple("m");
mapWriter.addColumn(SchemaBuilder.columnSchema("e", MinorType.VARCHAR, DataMode.REPEATED));
rootWriter.addRow(60, objArray(intArray(610, 620), strArray("d6.1", "d6.2"), strArray("e6.1", "e6.2"))).addRow(70, objArray(intArray(710), strArray(), strArray("e7.1", "e7.2")));
// Validate first batch. The new array should have been back-filled with
// empty offsets for the missing rows.
actual = fixture.wrap(rsLoader.harvest());
// System.out.println(actual.schema().toString());
expected = fixture.rowSetBuilder(actual.schema()).addRow(40, objArray(intArray(410, 420), strArray("d4.1", "d4.2"), strArray())).addRow(50, objArray(intArray(510), strArray("d5.1"), strArray())).addRow(60, objArray(intArray(610, 620), strArray("d6.1", "d6.2"), strArray("e6.1", "e6.2"))).addRow(70, objArray(intArray(710), strArray(), strArray("e7.1", "e7.2"))).build();
// expected.print();
new RowSetComparison(expected).verifyAndClearAll(actual);
rsLoader.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderOmittedValues method testOmittedValuesAtEnd.
/**
* Test "holes" in the middle of a batch, and unset columns at
* the end. Ending the batch should fill in missing values.
*/
@Test
public void testOmittedValuesAtEnd() {
// Create columns up front
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addNullable("c", MinorType.VARCHAR).add("d", MinorType.INT).addNullable("e", MinorType.INT).addArray("f", MinorType.VARCHAR).buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
rsLoader.startBatch();
int rowCount = 0;
ScalarWriter arrayWriter;
for (int i = 0; i < 2; i++) {
// Row 0, 1
rootWriter.start();
rowCount++;
rootWriter.scalar(0).setInt(rowCount);
rootWriter.scalar(1).setString("b_" + rowCount);
rootWriter.scalar(2).setString("c_" + rowCount);
rootWriter.scalar(3).setInt(rowCount * 10);
rootWriter.scalar(4).setInt(rowCount * 100);
arrayWriter = rootWriter.column(5).array().scalar();
arrayWriter.setString("f_" + rowCount + "-1");
arrayWriter.setString("f_" + rowCount + "-2");
rootWriter.save();
}
for (int i = 0; i < 2; i++) {
// Rows 2, 3
rootWriter.start();
rowCount++;
rootWriter.scalar(0).setInt(rowCount);
rootWriter.scalar(1).setString("b_" + rowCount);
rootWriter.scalar(3).setInt(rowCount * 10);
arrayWriter = rootWriter.column(5).array().scalar();
arrayWriter.setString("f_" + rowCount + "-1");
arrayWriter.setString("f_" + rowCount + "-2");
rootWriter.save();
}
for (int i = 0; i < 2; i++) {
// Rows 4, 5
rootWriter.start();
rowCount++;
rootWriter.scalar(0).setInt(rowCount);
rootWriter.scalar(2).setString("c_" + rowCount);
rootWriter.scalar(4).setInt(rowCount * 100);
rootWriter.save();
}
for (int i = 0; i < 2; i++) {
// Rows 6, 7
rootWriter.start();
rowCount++;
rootWriter.scalar(0).setInt(rowCount);
rootWriter.scalar(1).setString("b_" + rowCount);
rootWriter.scalar(2).setString("c_" + rowCount);
rootWriter.scalar(3).setInt(rowCount * 10);
rootWriter.scalar(4).setInt(rowCount * 100);
arrayWriter = rootWriter.column(5).array().scalar();
arrayWriter.setString("f_" + rowCount + "-1");
arrayWriter.setString("f_" + rowCount + "-2");
rootWriter.save();
}
for (int i = 0; i < 2; i++) {
// Rows 8, 9
rootWriter.start();
rowCount++;
rootWriter.scalar(0).setInt(rowCount);
rootWriter.save();
}
// Harvest the row and verify.
RowSet actual = fixture.wrap(rsLoader.harvest());
// actual.print();
BatchSchema expectedSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addNullable("c", MinorType.VARCHAR).add("3", MinorType.INT).addNullable("e", MinorType.INT).addArray("f", MinorType.VARCHAR).build();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(1, "b_1", "c_1", 10, 100, strArray("f_1-1", "f_1-2")).addRow(2, "b_2", "c_2", 20, 200, strArray("f_2-1", "f_2-2")).addRow(3, "b_3", null, 30, null, strArray("f_3-1", "f_3-2")).addRow(4, "b_4", null, 40, null, strArray("f_4-1", "f_4-2")).addRow(5, "", "c_5", 0, 500, strArray()).addRow(6, "", "c_6", 0, 600, strArray()).addRow(7, "b_7", "c_7", 70, 700, strArray("f_7-1", "f_7-2")).addRow(8, "b_8", "c_8", 80, 800, strArray("f_8-1", "f_8-2")).addRow(9, "", null, 0, null, strArray()).addRow(10, "", null, 0, null, strArray()).build();
new RowSetComparison(expected).verifyAndClearAll(actual);
rsLoader.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderOmittedValues method testOmittedValuesAtEndWithOverflow.
/**
* Test "holes" at the end of a batch when batch overflows. Completed
* batch must be finalized correctly, new batch initialized correct,
* for the missing values.
*/
@Test
public void testOmittedValuesAtEndWithOverflow() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addNullable("c", MinorType.VARCHAR).addNullable("d", MinorType.VARCHAR).buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
// Fill the batch. Column d has some values. Column c is worst case: no values.
rsLoader.startBatch();
byte[] value = new byte[533];
Arrays.fill(value, (byte) 'X');
int rowNumber = 0;
while (!rootWriter.isFull()) {
rootWriter.start();
rowNumber++;
rootWriter.scalar(0).setInt(rowNumber);
rootWriter.scalar(1).setBytes(value, value.length);
if (rowNumber < 10_000) {
rootWriter.scalar(3).setString("d-" + rowNumber);
}
rootWriter.save();
assertEquals(rowNumber, rsLoader.totalRowCount());
}
// Harvest and verify
RowSet result = fixture.wrap(rsLoader.harvest());
assertEquals(rowNumber - 1, result.rowCount());
RowSetReader reader = result.reader();
int rowIndex = 0;
while (reader.next()) {
int expectedRowNumber = 1 + rowIndex;
assertEquals(expectedRowNumber, reader.scalar(0).getInt());
assertTrue(reader.scalar(2).isNull());
if (expectedRowNumber < 10_000) {
assertEquals("d-" + expectedRowNumber, reader.scalar(3).getString());
} else {
assertTrue(reader.scalar(3).isNull());
}
rowIndex++;
}
// Start count for this batch is one less than current
// count, because of the overflow row.
int startRowNumber = rowNumber;
// Write a few more rows to the next batch
rsLoader.startBatch();
for (int i = 0; i < 10; i++) {
rootWriter.start();
rowNumber++;
rootWriter.scalar(0).setInt(rowNumber);
rootWriter.scalar(1).setBytes(value, value.length);
if (i > 5) {
rootWriter.scalar(3).setString("d-" + rowNumber);
}
rootWriter.save();
assertEquals(rowNumber, rsLoader.totalRowCount());
}
// Verify that holes were preserved.
result = fixture.wrap(rsLoader.harvest());
assertEquals(rowNumber, rsLoader.totalRowCount());
assertEquals(rowNumber - startRowNumber + 1, result.rowCount());
// result.print();
reader = result.reader();
rowIndex = 0;
while (reader.next()) {
int expectedRowNumber = startRowNumber + rowIndex;
assertEquals(expectedRowNumber, reader.scalar(0).getInt());
assertTrue(reader.scalar(2).isNull());
if (rowIndex > 6) {
assertEquals("d-" + expectedRowNumber, reader.scalar(3).getString());
} else {
assertTrue("Row " + rowIndex + " col d should be null", reader.scalar(3).isNull());
}
rowIndex++;
}
assertEquals(rowIndex, 11);
rsLoader.close();
}
Aggregations