use of org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet in project drill by apache.
the class TestMissingColumnLoader method testDefaultValue.
/**
* Test the ability to provide a default value for a "null" column.
* Default values are only allowed for required "null" columns. For
* nullable columns, NULL is already the default.
*/
@Test
public void testDefaultValue() {
TupleMetadata missingCols = new SchemaBuilder().add("int", MinorType.INT).add("str", MinorType.VARCHAR).add("dub", MinorType.FLOAT8).build();
missingCols.metadata("int").setDefaultValue("10");
missingCols.metadata("str").setDefaultValue("foo");
missingCols.metadata("dub").setDefaultValue("20.0");
final ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
StaticBatchBuilder handler = new MissingColumnHandlerBuilder().inputSchema(missingCols).vectorCache(cache).build();
assertNotNull(handler);
// Create a batch
handler.load(2);
// Verify values and types
final TupleMetadata expectedSchema = new SchemaBuilder().add("int", MinorType.INT).add("str", MinorType.VARCHAR).add("dub", MinorType.FLOAT8).buildSchema();
final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(10, "foo", 20.0D).addRow(10, "foo", 20.0D).build();
RowSetUtilities.verify(expected, fixture.wrap(handler.outputContainer()));
handler.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet in project drill by apache.
the class TestMissingColumnLoader method testVectorCache.
/**
* Drill requires "schema persistence": if a scan operator
* reads two files, F1 and F2, then the scan operator must
* provide the same vectors from both readers. Not just the
* same types, the same value vector instances (but, of course,
* populated with different data.)
* <p>
* Test the case in which the reader for F1 found columns
* (a, b, c) but, F2 found only (a, b), requiring that we
* fill in column c, filled with nulls, but of the same type that it
* was in file F1. We use a vector cache to pull off this trick.
* This test ensures that the null column mechanism looks in that
* vector cache when asked to create a nullable column.
*/
@Test
public void testVectorCache() {
TupleMetadata missingCols = new SchemaBuilder().addNullable("req", MinorType.FLOAT8).addNullable("opt", MinorType.FLOAT8).addArray("rep", MinorType.FLOAT8).addDynamic("unk").build();
// Populate the cache with a column of each mode.
final ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator());
cache.vectorFor(SchemaBuilder.columnSchema("req", MinorType.FLOAT8, DataMode.REQUIRED));
final ValueVector opt = cache.vectorFor(SchemaBuilder.columnSchema("opt", MinorType.FLOAT8, DataMode.OPTIONAL));
final ValueVector rep = cache.vectorFor(SchemaBuilder.columnSchema("rep", MinorType.FLOAT8, DataMode.REPEATED));
// Use nullable Varchar for unknown null columns.
final MajorType nullType = Types.optional(MinorType.VARCHAR);
StaticBatchBuilder handler = new MissingColumnHandlerBuilder().inputSchema(missingCols).vectorCache(cache).nullType(nullType).build();
assertNotNull(handler);
// Create a batch
handler.load(2);
final VectorContainer output = handler.outputContainer();
// Verify vectors are reused
assertSame(opt, output.getValueVector(1).getValueVector());
assertSame(rep, output.getValueVector(2).getValueVector());
// Verify values and types
final TupleMetadata expectedSchema = new SchemaBuilder().addNullable("req", MinorType.FLOAT8).addNullable("opt", MinorType.FLOAT8).addArray("rep", MinorType.FLOAT8).addNullable("unk", MinorType.VARCHAR).buildSchema();
final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(null, null, new int[] {}, null).addRow(null, null, new int[] {}, null).build();
RowSetUtilities.verify(expected, fixture.wrap(output));
handler.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet in project drill by apache.
the class TestScanLateSchema method testLateSchemaLifecycle.
/**
* Most basic test of a reader that discovers its schema as it goes along.
* The purpose is to validate the most basic life-cycle steps before trying
* more complex variations.
*/
@Test
public void testLateSchemaLifecycle() {
// Create a mock reader, return two batches: one schema-only, another with data.
ReaderCreator creator = negotiator -> {
MockLateSchemaReader reader = new MockLateSchemaReader(negotiator);
reader.batchLimit = 2;
reader.returnDataOnFirst = false;
return reader;
};
// Create the scan operator
ScanFixture scanFixture = simpleFixture(creator);
ScanOperatorExec scan = scanFixture.scanOp;
// First batch: build schema. The reader does not help: it returns an
// empty first batch.
assertTrue(scan.buildSchema());
assertEquals(0, scan.batchAccessor().rowCount());
// Create the expected result.
SingleRowSet expected = makeExpected(20);
RowSetComparison verifier = new RowSetComparison(expected);
assertEquals(expected.batchSchema(), scan.batchAccessor().schema());
// Next call, return with data.
assertTrue(scan.next());
verifier.verifyAndClearAll(fixture.wrap(scan.batchAccessor().container()));
// EOF
assertFalse(scan.next());
assertEquals(0, scan.batchAccessor().rowCount());
scanFixture.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet in project drill by apache.
the class TestFileScan method testImplicitColumns.
/**
* Basic sanity test of a couple of implicit columns, along
* with all table columns in table order. Full testing of implicit
* columns is done on lower-level components.
*/
@Test
public void testImplicitColumns() {
ReaderCreator creator = negotiator -> {
MockEarlySchemaReader reader = new MockEarlySchemaReader(negotiator);
reader.batchLimit = 1;
return reader;
};
// Select table and implicit columns.
FileScanFixtureBuilder builder = new FileScanFixtureBuilder();
builder.setProjection("a", "b", "filename", "suffix");
builder.addReader(creator);
ScanFixture scanFixture = builder.build();
ScanOperatorExec scan = scanFixture.scanOp;
// Expect data and implicit columns
TupleMetadata expectedSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).add("filename", MinorType.VARCHAR).add("suffix", MinorType.VARCHAR).build();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(10, "fred", MOCK_FILE_NAME, MOCK_SUFFIX).addRow(20, "wilma", MOCK_FILE_NAME, MOCK_SUFFIX).build();
// Schema should include implicit columns.
assertTrue(scan.buildSchema());
assertEquals(expected.container().getSchema(), scan.batchAccessor().schema());
scan.batchAccessor().release();
// Read one batch, should contain implicit columns
assertTrue(scan.next());
RowSetUtilities.verify(expected, fixture.wrap(scan.batchAccessor().container()));
// EOF
assertFalse(scan.next());
assertEquals(0, scan.batchAccessor().rowCount());
scanFixture.close();
}
use of org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet in project drill by apache.
the class TestFileScan method testEmptyProject.
@Test
public void testEmptyProject() {
ReaderCreator creator = negotiator -> {
MockEarlySchemaReader reader = new MockEarlySchemaReader(negotiator);
reader.batchLimit = 1;
return reader;
};
// Select no columns
FileScanFixtureBuilder builder = new FileScanFixtureBuilder();
builder.setProjection();
builder.addReader(creator);
ScanFixture scanFixture = builder.build();
ScanOperatorExec scan = scanFixture.scanOp;
// Expect data and implicit columns
TupleMetadata expectedSchema = new SchemaBuilder().build();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow().addRow().build();
// Schema should include implicit columns.
assertTrue(scan.buildSchema());
assertEquals(expected.container().getSchema(), scan.batchAccessor().schema());
scan.batchAccessor().release();
// Read one batch, should contain implicit columns
assertTrue(scan.next());
RowSetUtilities.verify(expected, fixture.wrap(scan.batchAccessor().container()));
// EOF
assertFalse(scan.next());
assertEquals(0, scan.batchAccessor().rowCount());
scanFixture.close();
}
Aggregations