use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.
the class TestFileScanFramework method testMapProject.
@Test
public void testMapProject() {
MockMapReader reader = new MockMapReader();
reader.batchLimit = 1;
// Select one of the two map columns
FileScanFixtureBuilder builder = new FileScanFixtureBuilder();
builder.setProjection("m1.a");
builder.addReader(reader);
ScanFixture scanFixture = builder.build();
ScanOperatorExec scan = scanFixture.scanOp;
// Expect data and implicit columns
SchemaBuilder schemaBuilder = new SchemaBuilder().addMap("m1").add("a", MinorType.INT).resumeSchema();
BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(schemaBuilder).build();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addSingleCol(new Object[] { 10 }).addSingleCol(new Object[] { 20 }).build();
assertTrue(scan.buildSchema());
assertEquals(expectedSchema, scan.batchAccessor().schema());
scan.batchAccessor().release();
assertTrue(scan.next());
RowSetUtilities.verify(expected, fixture.wrap(scan.batchAccessor().container()));
// EOF
assertFalse(scan.next());
assertEquals(0, scan.batchAccessor().rowCount());
scanFixture.close();
}
use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.
the class TestFileScanFramework method testFullProject.
/**
* Exercise the major project operations: subset of table
* columns, implicit, partition, missing columns, and output
* order (and positions) different than table. These cases
* are more fully test on lower level components; here we verify
* that the components are wired up correctly.
*/
@Test
public void testFullProject() {
MockEarlySchemaReader reader = new MockEarlySchemaReader();
reader.batchLimit = 1;
// Select table and implicit columns.
FileScanFixtureBuilder builder = new FileScanFixtureBuilder();
builder.setProjection("dir0", "b", "filename", "c", "suffix");
builder.addReader(reader);
ScanFixture scanFixture = builder.build();
ScanOperatorExec scan = scanFixture.scanOp;
// Expect data and implicit columns
SchemaBuilder schemaBuilder = new SchemaBuilder().addNullable("dir0", MinorType.VARCHAR).addNullable("b", MinorType.VARCHAR, 10).add("filename", MinorType.VARCHAR).addNullable("c", MinorType.INT).add("suffix", MinorType.VARCHAR);
BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(schemaBuilder).build();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(MOCK_DIR0, "fred", MOCK_FILE_NAME, null, MOCK_SUFFIX).addRow(MOCK_DIR0, "wilma", MOCK_FILE_NAME, null, MOCK_SUFFIX).build();
// Schema should include implicit columns.
assertTrue(scan.buildSchema());
assertEquals(expectedSchema, scan.batchAccessor().schema());
scan.batchAccessor().release();
// Read one batch, should contain implicit columns
assertTrue(scan.next());
RowSetUtilities.verify(expected, fixture.wrap(scan.batchAccessor().container()));
// EOF
assertFalse(scan.next());
assertEquals(0, scan.batchAccessor().rowCount());
scanFixture.close();
}
use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.
the class TestParquetWriterEmptyFiles method testComplexEmptyFileSchema.
@Test
public void testComplexEmptyFileSchema() throws Exception {
final String outputFileName = "testparquetwriteremptyfiles_testcomplexemptyfileschema";
test("create table dfs.tmp.%s as select * from dfs.`parquet/empty/complex/empty_complex.parquet`", outputFileName);
// end_date column is null, so it missing in result schema.
SchemaBuilder schemaBuilder = new SchemaBuilder().addNullable("id", TypeProtos.MinorType.BIGINT).addNullable("name", TypeProtos.MinorType.VARCHAR).addArray("orders", TypeProtos.MinorType.BIGINT);
BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(schemaBuilder).build();
testBuilder().unOrdered().sqlQuery("select * from dfs.tmp.%s", outputFileName).schemaBaseLine(expectedSchema).go();
}
use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.
the class TestParquetWriterEmptyFiles method testWriteEmptyFileWithSchema.
@Test
public void testWriteEmptyFileWithSchema() throws Exception {
final String outputFileName = "testparquetwriteremptyfiles_testwriteemptyfilewithschema";
test("CREATE TABLE dfs.tmp.%s AS select * from dfs.`parquet/alltypes_required.parquet` where `col_int` = 0", outputFileName);
// Only the last scan scheme is written
SchemaBuilder schemaBuilder = new SchemaBuilder().add("col_int", TypeProtos.MinorType.INT).add("col_chr", TypeProtos.MinorType.VARCHAR).add("col_vrchr", TypeProtos.MinorType.VARCHAR).add("col_dt", TypeProtos.MinorType.DATE).add("col_tim", TypeProtos.MinorType.TIME).add("col_tmstmp", TypeProtos.MinorType.TIMESTAMP).add("col_flt", TypeProtos.MinorType.FLOAT4).add("col_intrvl_yr", TypeProtos.MinorType.INTERVAL).add("col_intrvl_day", TypeProtos.MinorType.INTERVAL).add("col_bln", TypeProtos.MinorType.BIT);
BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(schemaBuilder).build();
testBuilder().unOrdered().sqlQuery("select * from dfs.tmp.%s", outputFileName).schemaBaseLine(expectedSchema).go();
}
use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.
the class TestResultSetSchemaChange method testSchemaChangeWithOverflow.
/**
* Test a schema change on the row that overflows. If the
* new column is added after overflow, it will appear as
* a schema-change in the following batch. This is fine as
* we are essentially time-shifting: pretending that the
* overflow row was written in the next batch (which, in
* fact, it is: that's what overflow means.)
*/
@Test
public void testSchemaChangeWithOverflow() {
ResultSetOptions options = new ResultSetOptionBuilder().rowCountLimit(ValueVector.MAX_ROW_COUNT).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
rootWriter.addColumn(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.REQUIRED));
rsLoader.startBatch();
byte[] value = new byte[512];
Arrays.fill(value, (byte) 'X');
int count = 0;
while (!rootWriter.isFull()) {
rootWriter.start();
rootWriter.scalar(0).setBytes(value, value.length);
if (rootWriter.isFull()) {
rootWriter.addColumn(SchemaBuilder.columnSchema("b", MinorType.INT, DataMode.OPTIONAL));
rootWriter.scalar(1).setInt(count);
// Add a Varchar to ensure its offset fiddling is done properly
rootWriter.addColumn(SchemaBuilder.columnSchema("c", MinorType.VARCHAR, DataMode.OPTIONAL));
rootWriter.scalar(2).setString("c-" + count);
// Allow adding a required column at this point.
// (Not intuitively obvious that this should work; we back-fill
// with zeros.)
rootWriter.addColumn(SchemaBuilder.columnSchema("d", MinorType.INT, DataMode.REQUIRED));
}
rootWriter.save();
count++;
}
// Result should include only the first column.
SchemaBuilder schemaBuilder = new SchemaBuilder().add("a", MinorType.VARCHAR);
BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(schemaBuilder).build();
RowSet result = fixture.wrap(rsLoader.harvest());
assertTrue(result.batchSchema().isEquivalent(expectedSchema));
assertEquals(count - 1, result.rowCount());
result.clear();
assertEquals(1, rsLoader.schemaVersion());
// Double check: still can add a required column after
// starting the next batch. (No longer in overflow state.)
rsLoader.startBatch();
rootWriter.addColumn(SchemaBuilder.columnSchema("e", MinorType.INT, DataMode.REQUIRED));
// Next batch should start with the overflow row, including
// the column added at the end of the previous batch, after
// overflow.
result = fixture.wrap(rsLoader.harvest());
assertEquals(5, rsLoader.schemaVersion());
assertEquals(1, result.rowCount());
BatchSchemaBuilder batchSchemaBuilder = new BatchSchemaBuilder(expectedSchema);
batchSchemaBuilder.schemaBuilder().addNullable("b", MinorType.INT).addNullable("c", MinorType.VARCHAR).add("d", MinorType.INT).add("e", MinorType.INT);
expectedSchema = batchSchemaBuilder.build();
assertTrue(result.batchSchema().isEquivalent(expectedSchema));
RowSetReader reader = result.reader();
reader.next();
assertEquals(count - 1, reader.scalar(1).getInt());
assertEquals("c-" + (count - 1), reader.scalar(2).getString());
assertEquals(0, reader.scalar("d").getInt());
assertEquals(0, reader.scalar("e").getInt());
result.clear();
rsLoader.close();
}
Aggregations