use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.
the class TestScanOrchestratorEarlySchema method testEarlySchemaSelectNone.
/**
* Test SELECT - FROM table(a, b)
*/
@Test
public void testEarlySchemaSelectNone() {
ScanOrchestratorBuilder builder = new MockScanBuilder();
// SELECT ...
// (Like SELECT COUNT(*) ...
builder.projection(RowSetTestUtils.projectList());
ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
// ... FROM table
ReaderSchemaOrchestrator reader = scanner.startReader();
// file schema (a, b)
TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
// Create the table loader
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
// Verify that unprojected column is unprojected in the
// table loader.
assertTrue(loader.isProjectionEmpty());
assertFalse(loader.writer().column("a").isProjected());
assertFalse(loader.writer().column("b").isProjected());
// Verify empty batch.
BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(new SchemaBuilder()).build();
// Create a batch of data.
reader.startBatch();
loader.writer().addRow(1, "fred").addRow(2, "wilma");
reader.endBatch();
// Verify
{
// Two rows, no data.
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow().addRow().build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
}
// Fast path to fill in empty rows
reader.startBatch();
loader.skipRows(10);
reader.endBatch();
// Verify
{
VectorContainer output = scanner.output();
assertEquals(10, output.getRecordCount());
output.zeroVectors();
}
scanner.close();
}
use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.
the class TestOperatorRecordBatch method testBatchAccessor.
/**
* The record batch abstraction has a bunch of methods to work with a vector container.
* Rather than simply exposing the container itself, the batch instead exposes various
* container operations. Probably an artifact of its history. In any event, make
* sure those methods are passed through to the container accessor.
*/
@Test
public void testBatchAccessor() {
SchemaBuilder schemaBuilder = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR);
BatchSchema schema = new BatchSchemaBuilder().withSchemaBuilder(schemaBuilder).build();
SingleRowSet rs = fixture.rowSetBuilder(schema).addRow(10, "fred").addRow(20, "wilma").build();
MockOperatorExec opExec = new MockOperatorExec(rs.container());
opExec.nextCalls = 1;
try (OperatorRecordBatch opBatch = makeOpBatch(opExec)) {
assertEquals(IterOutcome.OK_NEW_SCHEMA, opBatch.next());
assertEquals(schema, opBatch.getSchema());
assertEquals(2, opBatch.getRecordCount());
assertSame(rs.container(), opBatch.getOutgoingContainer());
Iterator<VectorWrapper<?>> iter = opBatch.iterator();
assertEquals("a", iter.next().getValueVector().getField().getName());
assertEquals("b", iter.next().getValueVector().getField().getName());
// Not a full test of the schema path; just make sure that the
// pass-through to the Vector Container works.
SchemaPath path = SchemaPath.create(NamePart.newBuilder().setName("a").build());
TypedFieldId id = opBatch.getValueVectorId(path);
assertEquals(MinorType.INT, id.getFinalType().getMinorType());
assertEquals(1, id.getFieldIds().length);
assertEquals(0, id.getFieldIds()[0]);
path = SchemaPath.create(NamePart.newBuilder().setName("b").build());
id = opBatch.getValueVectorId(path);
assertEquals(MinorType.VARCHAR, id.getFinalType().getMinorType());
assertEquals(1, id.getFieldIds().length);
assertEquals(1, id.getFieldIds()[0]);
// Sanity check of getValueAccessorById()
VectorWrapper<?> w = opBatch.getValueAccessorById(IntVector.class, 0);
assertNotNull(w);
assertEquals("a", w.getValueVector().getField().getName());
w = opBatch.getValueAccessorById(VarCharVector.class, 1);
assertNotNull(w);
assertEquals("b", w.getValueVector().getField().getName());
try {
opBatch.getSelectionVector2();
fail();
} catch (UnsupportedOperationException e) {
// Expected
}
try {
opBatch.getSelectionVector4();
fail();
} catch (UnsupportedOperationException e) {
// Expected
}
} catch (Exception e) {
fail(e.getMessage());
}
assertTrue(opExec.closeCalled);
}
use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.
the class TestFileScanFramework method testMetadataColumns.
/**
* Basic sanity test of a couple of implicit columns, along
* with all table columns in table order. Full testing of implicit
* columns is done on lower-level components.
*/
@Test
public void testMetadataColumns() {
MockEarlySchemaReader reader = new MockEarlySchemaReader();
reader.batchLimit = 1;
// Select table and implicit columns.
FileScanFixtureBuilder builder = new FileScanFixtureBuilder();
builder.setProjection("a", "b", "filename", "suffix");
builder.addReader(reader);
ScanFixture scanFixture = builder.build();
ScanOperatorExec scan = scanFixture.scanOp;
// Expect data and implicit columns
SchemaBuilder schemaBuilder = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).add("filename", MinorType.VARCHAR).add("suffix", MinorType.VARCHAR);
BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(schemaBuilder).build();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(10, "fred", MOCK_FILE_NAME, MOCK_SUFFIX).addRow(20, "wilma", MOCK_FILE_NAME, MOCK_SUFFIX).build();
// Schema should include implicit columns.
assertTrue(scan.buildSchema());
assertEquals(expectedSchema, scan.batchAccessor().schema());
scan.batchAccessor().release();
// Read one batch, should contain implicit columns
assertTrue(scan.next());
RowSetUtilities.verify(expected, fixture.wrap(scan.batchAccessor().container()));
// EOF
assertFalse(scan.next());
assertEquals(0, scan.batchAccessor().rowCount());
scanFixture.close();
}
use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.
the class TestParquetMetadataCache method testEmptyDirectoryWithMetadataDirFile.
@Test
public void testEmptyDirectoryWithMetadataDirFile() throws Exception {
final String emptyDirNameWithMetadataFile = "empty_directory";
dirTestWatcher.makeTestTmpSubDir(Paths.get(emptyDirNameWithMetadataFile));
dirTestWatcher.makeTestTmpSubDir(Paths.get(emptyDirNameWithMetadataFile, "t2"));
dirTestWatcher.makeTestTmpSubDir(Paths.get(emptyDirNameWithMetadataFile, "t1"));
dirTestWatcher.copyResourceToTestTmp(Paths.get("parquet", "metadata_files_with_old_versions", "v3_1", "metadata_directories.requires_replace.txt"), Paths.get(emptyDirNameWithMetadataFile, Metadata.METADATA_DIRECTORIES_FILENAME));
final BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(new SchemaBuilder()).build();
testBuilder().sqlQuery("select * from dfs.tmp.`%s`", emptyDirNameWithMetadataFile).schemaBaseLine(expectedSchema).build().run();
}
use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.
the class TestParquetWriterEmptyFiles method testWriteEmptySchemaChange.
@Test
public void testWriteEmptySchemaChange() throws Exception {
final String outputFileName = "testparquetwriteremptyfiles_testwriteemptyschemachange";
final File outputFile = FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
test("CREATE TABLE dfs.tmp.%s AS select id, a, b from dfs.`schemachange/multi/*.json` WHERE id = 0", outputFileName);
// Only the last scan scheme is written
SchemaBuilder schemaBuilder = new SchemaBuilder().addNullable("id", TypeProtos.MinorType.BIGINT).addNullable("a", TypeProtos.MinorType.BIGINT).addNullable("b", TypeProtos.MinorType.BIT);
BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(schemaBuilder).build();
testBuilder().unOrdered().sqlQuery("select * from dfs.tmp.%s", outputFileName).schemaBaseLine(expectedSchema).go();
// Make sure that only 1 parquet file was created
assertEquals(1, outputFile.list((dir, name) -> name.endsWith("parquet")).length);
}
Aggregations