use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderOverflow method testArrayOverflowWithOtherArrays.
/**
* Test the complete set of array overflow cases:
* <ul>
* <li>Array a is written before the column that has overflow,
* and must be copied, in its entirety, to the overflow row.</li>
* <li>Column b causes the overflow.</li>
* <li>Column c is written after the overflow, and should go
* to the look-ahead row.</li>
* <li>Column d is written for a while, then has empties before
* the overflow row, but is written in the overflow row.<li>
* <li>Column e is like d, but is not written in the overflow
* row.</li>
*/
@Test
public void testArrayOverflowWithOtherArrays() {
TupleMetadata schema = new SchemaBuilder().addArray("a", MinorType.INT).addArray("b", MinorType.VARCHAR).addArray("c", MinorType.INT).addArray("d", MinorType.INT).buildSchema();
ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
// Fill batch with rows of with a single array, three values each. Tack on
// a suffix to each so we can be sure the proper data is written and moved
// to the overflow batch.
byte[] value = new byte[512];
Arrays.fill(value, (byte) 'X');
String strValue = new String(value, Charsets.UTF_8);
int aCount = 3;
int bCount = 11;
int cCount = 5;
int dCount = 7;
int cCutoff = ValueVector.MAX_BUFFER_SIZE / value.length / bCount / 2;
ScalarWriter aWriter = rootWriter.array("a").scalar();
ScalarWriter bWriter = rootWriter.array("b").scalar();
ScalarWriter cWriter = rootWriter.array("c").scalar();
ScalarWriter dWriter = rootWriter.array("d").scalar();
int count = 0;
rsLoader.startBatch();
while (rootWriter.start()) {
if (rootWriter.rowCount() == 2952) {
count = count + 0;
}
for (int i = 0; i < aCount; i++) {
aWriter.setInt(count * aCount + i);
}
for (int i = 0; i < bCount; i++) {
String cellValue = strValue + (count * bCount + i);
bWriter.setString(cellValue);
}
if (count < cCutoff) {
for (int i = 0; i < cCount; i++) {
cWriter.setInt(count * cCount + i);
}
}
if (count < cCutoff || rootWriter.isFull()) {
for (int i = 0; i < dCount; i++) {
dWriter.setInt(count * dCount + i);
}
}
rootWriter.save();
count++;
}
// Verify
RowSet result = fixture.wrap(rsLoader.harvest());
assertEquals(count - 1, result.rowCount());
RowSetReader reader = result.reader();
ScalarElementReader aReader = reader.array("a").elements();
ScalarElementReader bReader = reader.array("b").elements();
ScalarElementReader cReader = reader.array("c").elements();
ScalarElementReader dReader = reader.array("d").elements();
while (reader.next()) {
int rowId = reader.rowIndex();
assertEquals(aCount, aReader.size());
for (int i = 0; i < aCount; i++) {
assertEquals(rowId * aCount + i, aReader.getInt(i));
}
assertEquals(bCount, bReader.size());
for (int i = 0; i < bCount; i++) {
String cellValue = strValue + (rowId * bCount + i);
assertEquals(cellValue, bReader.getString(i));
}
if (rowId < cCutoff) {
assertEquals(cCount, cReader.size());
for (int i = 0; i < cCount; i++) {
assertEquals(rowId * cCount + i, cReader.getInt(i));
}
assertEquals(dCount, dReader.size());
for (int i = 0; i < dCount; i++) {
assertEquals(rowId * dCount + i, dReader.getInt(i));
}
} else {
assertEquals(0, cReader.size());
assertEquals(0, dReader.size());
}
}
result.clear();
int firstCount = count - 1;
// One row is in the batch. Write more, skipping over the
// initial few values for columns c and d. Column d has a
// roll-over value, c has an empty roll-over.
rsLoader.startBatch();
for (int j = 0; j < 5; j++) {
rootWriter.start();
for (int i = 0; i < aCount; i++) {
aWriter.setInt(count * aCount + i);
}
for (int i = 0; i < bCount; i++) {
String cellValue = strValue + (count * bCount + i);
bWriter.setString(cellValue);
}
if (j > 3) {
for (int i = 0; i < cCount; i++) {
cWriter.setInt(count * cCount + i);
}
for (int i = 0; i < dCount; i++) {
dWriter.setInt(count * dCount + i);
}
}
rootWriter.save();
count++;
}
result = fixture.wrap(rsLoader.harvest());
assertEquals(6, result.rowCount());
reader = result.reader();
aReader = reader.array("a").elements();
bReader = reader.array("b").elements();
cReader = reader.array("c").elements();
dReader = reader.array("d").elements();
int j = 0;
while (reader.next()) {
int rowId = firstCount + reader.rowIndex();
assertEquals(aCount, aReader.size());
for (int i = 0; i < aCount; i++) {
assertEquals("Index " + i, rowId * aCount + i, aReader.getInt(i));
}
assertEquals(bCount, bReader.size());
for (int i = 0; i < bCount; i++) {
String cellValue = strValue + (rowId * bCount + i);
assertEquals(cellValue, bReader.getString(i));
}
if (j > 4) {
assertEquals(cCount, cReader.size());
for (int i = 0; i < cCount; i++) {
assertEquals(rowId * cCount + i, cReader.getInt(i));
}
} else {
assertEquals(0, cReader.size());
}
if (j == 0 || j > 4) {
assertEquals(dCount, dReader.size());
for (int i = 0; i < dCount; i++) {
assertEquals(rowId * dCount + i, dReader.getInt(i));
}
} else {
assertEquals(0, dReader.size());
}
j++;
}
result.clear();
rsLoader.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderOverflow method testOverflowWithNullables.
@Test
public void testOverflowWithNullables() {
TupleMetadata schema = new SchemaBuilder().add("n", MinorType.INT).addNullable("a", MinorType.VARCHAR).addNullable("b", MinorType.VARCHAR).addNullable("c", MinorType.VARCHAR).buildSchema();
ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
rsLoader.startBatch();
byte[] value = new byte[512];
Arrays.fill(value, (byte) 'X');
int count = 0;
while (!rootWriter.isFull()) {
rootWriter.start();
rootWriter.scalar(0).setInt(count);
rootWriter.scalar(1).setNull();
rootWriter.scalar(2).setBytes(value, value.length);
rootWriter.scalar(3).setNull();
rootWriter.save();
count++;
}
// Result should exclude the overflow row
RowSet result = fixture.wrap(rsLoader.harvest());
assertEquals(count - 1, result.rowCount());
RowSetReader reader = result.reader();
while (reader.next()) {
assertEquals(reader.rowIndex(), reader.scalar(0).getInt());
assertTrue(reader.scalar(1).isNull());
assertTrue(Arrays.equals(value, reader.scalar(2).getBytes()));
assertTrue(reader.scalar(3).isNull());
}
result.clear();
// Next batch should start with the overflow row
rsLoader.startBatch();
result = fixture.wrap(rsLoader.harvest());
reader = result.reader();
assertEquals(1, result.rowCount());
assertTrue(reader.next());
assertEquals(count - 1, reader.scalar(0).getInt());
assertTrue(reader.scalar(1).isNull());
assertTrue(Arrays.equals(value, reader.scalar(2).getBytes()));
assertTrue(reader.scalar(3).isNull());
result.clear();
rsLoader.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderProjection method testMapProjection.
@Test
public void testMapProjection() {
List<SchemaPath> selection = Lists.newArrayList(SchemaPath.getSimplePath("m1"), SchemaPath.getCompoundPath("m2", "d"));
TupleMetadata schema = new SchemaBuilder().addMap("m1").add("a", MinorType.INT).add("b", MinorType.INT).resumeSchema().addMap("m2").add("c", MinorType.INT).add("d", MinorType.INT).resumeSchema().addMap("m3").add("e", MinorType.INT).add("f", MinorType.INT).resumeSchema().buildSchema();
ResultSetOptions options = new OptionBuilder().setProjection(selection).setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
// Verify the projected columns
TupleMetadata actualSchema = rootWriter.schema();
ColumnMetadata m1Md = actualSchema.metadata("m1");
assertTrue(m1Md.isMap());
assertTrue(m1Md.isProjected());
assertEquals(2, m1Md.mapSchema().size());
assertTrue(m1Md.mapSchema().metadata("a").isProjected());
assertTrue(m1Md.mapSchema().metadata("b").isProjected());
ColumnMetadata m2Md = actualSchema.metadata("m2");
assertTrue(m2Md.isMap());
assertTrue(m2Md.isProjected());
assertEquals(2, m2Md.mapSchema().size());
assertFalse(m2Md.mapSchema().metadata("c").isProjected());
assertTrue(m2Md.mapSchema().metadata("d").isProjected());
ColumnMetadata m3Md = actualSchema.metadata("m3");
assertTrue(m3Md.isMap());
assertFalse(m3Md.isProjected());
assertEquals(2, m3Md.mapSchema().size());
assertFalse(m3Md.mapSchema().metadata("e").isProjected());
assertFalse(m3Md.mapSchema().metadata("f").isProjected());
// Write a couple of rows.
rsLoader.startBatch();
rootWriter.start();
rootWriter.tuple("m1").scalar("a").setInt(1);
rootWriter.tuple("m1").scalar("b").setInt(2);
rootWriter.tuple("m2").scalar("c").setInt(3);
rootWriter.tuple("m2").scalar("d").setInt(4);
rootWriter.tuple("m3").scalar("e").setInt(5);
rootWriter.tuple("m3").scalar("f").setInt(6);
rootWriter.save();
rootWriter.start();
rootWriter.tuple("m1").scalar("a").setInt(11);
rootWriter.tuple("m1").scalar("b").setInt(12);
rootWriter.tuple("m2").scalar("c").setInt(13);
rootWriter.tuple("m2").scalar("d").setInt(14);
rootWriter.tuple("m3").scalar("e").setInt(15);
rootWriter.tuple("m3").scalar("f").setInt(16);
rootWriter.save();
// Verify. Only the projected columns appear in the result set.
BatchSchema expectedSchema = new SchemaBuilder().addMap("m1").add("a", MinorType.INT).add("b", MinorType.INT).resumeSchema().addMap("m2").add("d", MinorType.INT).resumeSchema().build();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(objArray(1, 2), objArray(4)).addRow(objArray(11, 12), objArray(14)).build();
new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(rsLoader.harvest()));
rsLoader.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderProjection method testMapArrayProjection.
/**
* Test a map array. Use the convenience methods to set values.
* Only the projected array members should appear in the harvested
* results.
*/
@Test
public void testMapArrayProjection() {
List<SchemaPath> selection = Lists.newArrayList(SchemaPath.getSimplePath("m1"), SchemaPath.getCompoundPath("m2", "d"));
TupleMetadata schema = new SchemaBuilder().addMapArray("m1").add("a", MinorType.INT).add("b", MinorType.INT).resumeSchema().addMapArray("m2").add("c", MinorType.INT).add("d", MinorType.INT).resumeSchema().addMapArray("m3").add("e", MinorType.INT).add("f", MinorType.INT).resumeSchema().buildSchema();
ResultSetOptions options = new OptionBuilder().setProjection(selection).setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
// Write a couple of rows.
rsLoader.startBatch();
rootWriter.addRow(objArray(objArray(10, 20), objArray(11, 21)), objArray(objArray(30, 40), objArray(31, 42)), objArray(objArray(50, 60), objArray(51, 62)));
rootWriter.addRow(objArray(objArray(110, 120), objArray(111, 121)), objArray(objArray(130, 140), objArray(131, 142)), objArray(objArray(150, 160), objArray(151, 162)));
// Verify. Only the projected columns appear in the result set.
BatchSchema expectedSchema = new SchemaBuilder().addMapArray("m1").add("a", MinorType.INT).add("b", MinorType.INT).resumeSchema().addMapArray("m2").add("d", MinorType.INT).resumeSchema().build();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(objArray(objArray(10, 20), objArray(11, 21)), objArray(objArray(40), objArray(42))).addRow(objArray(objArray(110, 120), objArray(111, 121)), objArray(objArray(140), objArray(142))).build();
new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(rsLoader.harvest()));
rsLoader.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.
the class TestResultSetLoaderProjection method testProjectWithOverflow.
/**
* Verify that the projection code plays nice with vector overflow. Overflow
* is the most complex operation in this subsystem with many specialized
* methods that must work together flawlessly. This test ensures that
* non-projected columns stay in the background and don't interfere
* with overflow logic.
*/
@Test
public void testProjectWithOverflow() {
List<SchemaPath> selection = Lists.newArrayList(SchemaPath.getSimplePath("small"), SchemaPath.getSimplePath("dummy"));
TupleMetadata schema = new SchemaBuilder().add("big", MinorType.VARCHAR).add("small", MinorType.VARCHAR).buildSchema();
ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setProjection(selection).setSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
byte[] big = new byte[600];
Arrays.fill(big, (byte) 'X');
byte[] small = new byte[512];
Arrays.fill(small, (byte) 'X');
rsLoader.startBatch();
int count = 0;
while (!rootWriter.isFull()) {
rootWriter.start();
rootWriter.scalar(0).setBytes(big, big.length);
rootWriter.scalar(1).setBytes(small, small.length);
rootWriter.save();
count++;
}
// Number of rows should be driven by size of the
// projected vector ("small"), not by the larger, unprojected
// "big" vector.
// Our row count should include the overflow row
int expectedCount = ValueVector.MAX_BUFFER_SIZE / small.length;
assertEquals(expectedCount + 1, count);
// Loader's row count should include only "visible" rows
assertEquals(expectedCount, rootWriter.rowCount());
// Total count should include invisible and look-ahead rows.
assertEquals(expectedCount + 1, rsLoader.totalRowCount());
// Result should exclude the overflow row
RowSet result = fixture.wrap(rsLoader.harvest());
assertEquals(expectedCount, result.rowCount());
result.clear();
// Next batch should start with the overflow row
rsLoader.startBatch();
assertEquals(1, rootWriter.rowCount());
assertEquals(expectedCount + 1, rsLoader.totalRowCount());
result = fixture.wrap(rsLoader.harvest());
assertEquals(1, result.rowCount());
result.clear();
rsLoader.close();
}
Aggregations