use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.
the class TestScanOrchestratorLateSchema method testLateSchemaSelectDisjoint.
/**
* Test SELECT a, c FROM table(a, b)
*/
@Test
public void testLateSchemaSelectDisjoint() {
ScanOrchestratorBuilder builder = new MockScanBuilder();
// SELECT a, c ...
builder.projection(RowSetTestUtils.projectList("a", "c"));
ScanSchemaOrchestrator orchestrator = new ScanSchemaOrchestrator(fixture.allocator(), builder);
// ... FROM file
ReaderSchemaOrchestrator reader = orchestrator.startReader();
// Create the table loader
ResultSetLoader loader = reader.makeTableLoader(null);
// file schema (a, b)
reader.startBatch();
RowSetLoader writer = loader.writer();
writer.addColumn(SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REQUIRED));
writer.addColumn(SchemaBuilder.columnSchema("b", MinorType.VARCHAR, DataMode.REQUIRED));
// Create a batch of data.
writer.addRow(1, "fred").addRow(2, "wilma");
reader.endBatch();
// Verify
TupleMetadata expectedSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("c", MinorType.INT).buildSchema();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(1, null).addRow(2, null).build();
new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(orchestrator.output()));
orchestrator.close();
}
use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.
the class TestScanOrchestratorLateSchema method testLateSchemaWildcard.
/**
* Test SELECT * from an early-schema table of (a, b)
*/
@Test
public void testLateSchemaWildcard() {
ScanOrchestratorBuilder builder = new MockScanBuilder();
// SELECT * ...
builder.projection(RowSetTestUtils.projectAll());
ScanSchemaOrchestrator orchestrator = new ScanSchemaOrchestrator(fixture.allocator(), builder);
// ... FROM table
ReaderSchemaOrchestrator reader = orchestrator.startReader();
// Create the table loader
ResultSetLoader loader = reader.makeTableLoader(null);
// Late schema: no batch provided up front.
assertFalse(reader.hasSchema());
// Start a batch and discover a schema: (a, b)
reader.startBatch();
RowSetLoader writer = loader.writer();
writer.addColumn(SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REQUIRED));
writer.addColumn(SchemaBuilder.columnSchema("b", MinorType.VARCHAR, DataMode.REQUIRED));
// Create a batch of data using the discovered schema
writer.addRow(1, "fred").addRow(2, "wilma");
reader.endBatch();
// Verify
TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
SingleRowSet expected = fixture.rowSetBuilder(tableSchema).addRow(1, "fred").addRow(2, "wilma").build();
new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(orchestrator.output()));
orchestrator.close();
}
use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.
the class AvroMessageReader method readMessage.
@Override
public void readMessage(ConsumerRecord<?, ?> record) {
RowSetLoader rowWriter = loader.writer();
byte[] recordArray = (byte[]) record.value();
GenericRecord genericRecord = (GenericRecord) deserializer.deserialize(null, recordArray);
Schema schema = genericRecord.getSchema();
if (Schema.Type.RECORD != schema.getType()) {
throw UserException.dataReadError().message(String.format("Root object must be record type. Found: %s", schema.getType())).addContext("Reader", this).build(logger);
}
rowWriter.start();
converter.convert(genericRecord);
writeValue(rowWriter, MetaDataField.KAFKA_TOPIC, record.topic());
writeValue(rowWriter, MetaDataField.KAFKA_PARTITION_ID, record.partition());
writeValue(rowWriter, MetaDataField.KAFKA_OFFSET, record.offset());
writeValue(rowWriter, MetaDataField.KAFKA_TIMESTAMP, record.timestamp());
writeValue(rowWriter, MetaDataField.KAFKA_MSG_KEY, record.key() != null ? getKeyValue((byte[]) record.key()) : null);
rowWriter.save();
}
use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.
the class TestResultSetLoaderRepeatedList method test3DEarlySchema.
// Adapted from TestRepeatedListAccessors.testSchema3DWriterReader
// That test exercises the low-level schema and writer mechanisms.
// Here we simply ensure that the 3D case continues to work when
// wrapped in the Result Set Loader
@Test
public void test3DEarlySchema() {
final TupleMetadata schema = new SchemaBuilder().add("id", MinorType.INT).addArray("cube", MinorType.VARCHAR, 3).buildSchema();
final ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().readerSchema(schema).build();
final ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
rsLoader.startBatch();
final RowSetLoader writer = rsLoader.writer();
writer.addRow(1, objArray(objArray(strArray("a", "b"), strArray("c")), objArray(strArray("d", "e", "f"), null), null, objArray())).addRow(2, null).addRow(3, objArray()).addRow(4, objArray(objArray())).addRow(5, singleObjArray(objArray(strArray("g", "h"), strArray("i"))));
final SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(1, objArray(objArray(strArray("a", "b"), strArray("c")), objArray(strArray("d", "e", "f"), strArray()), objArray(), objArray())).addRow(2, objArray()).addRow(3, objArray()).addRow(4, objArray(objArray())).addRow(5, singleObjArray(objArray(strArray("g", "h"), strArray("i")))).build();
RowSetUtilities.verify(expected, fixture.wrap(rsLoader.harvest()));
}
use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.
the class TestResultSetLoaderRepeatedList method test2DOverflow.
@Test
public void test2DOverflow() {
final TupleMetadata schema = new SchemaBuilder().add("id", MinorType.INT).addRepeatedList("list2").addArray(MinorType.VARCHAR).resumeSchema().buildSchema();
final ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().rowCountLimit(ValueVector.MAX_ROW_COUNT).readerSchema(schema).build();
final ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
final RowSetLoader writer = rsLoader.writer();
// Fill the batch with enough data to cause overflow.
// Data must be large enough to cause overflow before 64K rows
// Make a bit bigger to overflow early.
final int outerSize = 7;
final int innerSize = 5;
final int strLength = ValueVector.MAX_BUFFER_SIZE / ValueVector.MAX_ROW_COUNT / outerSize / innerSize + 20;
final byte[] value = new byte[strLength - 6];
Arrays.fill(value, (byte) 'X');
final String strValue = new String(value, Charsets.UTF_8);
int rowCount = 0;
int elementCount = 0;
final ArrayWriter outerWriter = writer.array(1);
final ArrayWriter innerWriter = outerWriter.array();
final ScalarWriter elementWriter = innerWriter.scalar();
rsLoader.startBatch();
while (!writer.isFull()) {
writer.start();
writer.scalar(0).setInt(rowCount);
for (int j = 0; j < outerSize; j++) {
for (int k = 0; k < innerSize; k++) {
elementWriter.setString(String.format("%s%06d", strValue, elementCount));
elementCount++;
}
outerWriter.save();
}
writer.save();
rowCount++;
}
// Number of rows should be driven by vector size.
// Our row count should include the overflow row
final int expectedCount = ValueVector.MAX_BUFFER_SIZE / (strLength * innerSize * outerSize);
assertEquals(expectedCount + 1, rowCount);
// Loader's row count should include only "visible" rows
assertEquals(expectedCount, writer.rowCount());
// Total count should include invisible and look-ahead rows.
assertEquals(expectedCount + 1, rsLoader.totalRowCount());
// Result should exclude the overflow row
RowSet result = fixture.wrap(rsLoader.harvest());
assertEquals(expectedCount, result.rowCount());
// Verify the data.
RowSetReader reader = result.reader();
ArrayReader outerReader = reader.array(1);
ArrayReader innerReader = outerReader.array();
ScalarReader strReader = innerReader.scalar();
int readRowCount = 0;
int readElementCount = 0;
while (reader.next()) {
assertEquals(readRowCount, reader.scalar(0).getInt());
for (int i = 0; i < outerSize; i++) {
assertTrue(outerReader.next());
for (int j = 0; j < innerSize; j++) {
assertTrue(innerReader.next());
assertEquals(String.format("%s%06d", strValue, readElementCount), strReader.getString());
readElementCount++;
}
assertFalse(innerReader.next());
}
assertFalse(outerReader.next());
readRowCount++;
}
assertEquals(readRowCount, result.rowCount());
result.clear();
// Write a few more rows to verify the overflow row.
rsLoader.startBatch();
for (int i = 0; i < 1000; i++) {
writer.start();
writer.scalar(0).setInt(rowCount);
for (int j = 0; j < outerSize; j++) {
for (int k = 0; k < innerSize; k++) {
elementWriter.setString(String.format("%s%06d", strValue, elementCount));
elementCount++;
}
outerWriter.save();
}
writer.save();
rowCount++;
}
result = fixture.wrap(rsLoader.harvest());
assertEquals(1001, result.rowCount());
final int startCount = readRowCount;
reader = result.reader();
outerReader = reader.array(1);
innerReader = outerReader.array();
strReader = innerReader.scalar();
while (reader.next()) {
assertEquals(readRowCount, reader.scalar(0).getInt());
for (int i = 0; i < outerSize; i++) {
assertTrue(outerReader.next());
for (int j = 0; j < innerSize; j++) {
assertTrue(innerReader.next());
elementWriter.setString(String.format("%s%06d", strValue, readElementCount));
assertEquals(String.format("%s%06d", strValue, readElementCount), strReader.getString());
readElementCount++;
}
assertFalse(innerReader.next());
}
assertFalse(outerReader.next());
readRowCount++;
}
assertEquals(readRowCount - startCount, result.rowCount());
result.clear();
rsLoader.close();
}
Aggregations