use of org.apache.drill.exec.physical.resultSet.ResultSetLoader in project drill by apache.
the class TestScanOrchestratorEarlySchema method testTypeSmoothing.
/**
* Test the ability of the scan scanner to "smooth" out schema changes
* by reusing the type from a previous reader, if known. That is,
* given three readers:<br>
* (a, b)<br>
* (b)<br>
* (a, b)<br>
* Then the type of column a should be preserved for the second reader that
* does not include a. This works if a is nullable. If so, a's type will
* be used for the empty column, rather than the usual nullable int.
* <p>
* Detailed testing of type matching for "missing" columns is done
* in {@link #testNullColumnLoader()}.
* <p>
* As a side effect, makes sure that two identical tables (in this case,
* separated by a different table) results in no schema change.
*/
@Test
public void testTypeSmoothing() {
ScanOrchestratorBuilder builder = new MockScanBuilder();
// SELECT a, b ...
builder.projection(RowSetTestUtils.projectList("a", "b"));
ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
// file schema (a, b)
TupleMetadata twoColSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
SchemaTracker tracker = new SchemaTracker();
int schemaVersion;
{
// ... FROM table 1
ReaderSchemaOrchestrator reader = scanner.startReader();
ResultSetLoader loader = reader.makeTableLoader(twoColSchema);
// Projection of (a, b) to (a, b)
reader.startBatch();
loader.writer().addRow(10, "fred").addRow(20, "wilma");
reader.endBatch();
tracker.trackSchema(scanner.output());
schemaVersion = tracker.schemaVersion();
SingleRowSet expected = fixture.rowSetBuilder(twoColSchema).addRow(10, "fred").addRow(20, "wilma").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
}
{
// ... FROM table 2
ReaderSchemaOrchestrator reader = scanner.startReader();
// File schema (a)
TupleMetadata oneColSchema = new SchemaBuilder().add("a", MinorType.INT).buildSchema();
// Projection of (a) to (a, b), reusing b from above.
ResultSetLoader loader = reader.makeTableLoader(oneColSchema);
reader.startBatch();
loader.writer().addRow(30).addRow(40);
reader.endBatch();
tracker.trackSchema(scanner.output());
assertEquals(schemaVersion, tracker.schemaVersion());
SingleRowSet expected = fixture.rowSetBuilder(twoColSchema).addRow(30, null).addRow(40, null).build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
}
{
// ... FROM table 3
ReaderSchemaOrchestrator reader = scanner.startReader();
// Projection of (a, b), to (a, b), reusing b yet again
ResultSetLoader loader = reader.makeTableLoader(twoColSchema);
reader.startBatch();
loader.writer().addRow(50, "dino").addRow(60, "barney");
reader.endBatch();
tracker.trackSchema(scanner.output());
assertEquals(schemaVersion, tracker.schemaVersion());
SingleRowSet expected = fixture.rowSetBuilder(twoColSchema).addRow(50, "dino").addRow(60, "barney").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
}
scanner.close();
}
use of org.apache.drill.exec.physical.resultSet.ResultSetLoader in project drill by apache.
the class TestScanOrchestratorEarlySchema method testModeSmoothing.
@Test
public void testModeSmoothing() {
ScanOrchestratorBuilder builder = new MockScanBuilder();
builder.enableSchemaSmoothing(true);
builder.projection(RowSetTestUtils.projectList("a"));
ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
// Most general schema: nullable, with precision.
TupleMetadata schema1 = new SchemaBuilder().addNullable("a", MinorType.VARCHAR, 10).buildSchema();
SchemaTracker tracker = new SchemaTracker();
int schemaVersion;
{
// Table 1: most permissive type
ReaderSchemaOrchestrator reader = scanner.startReader();
ResultSetLoader loader = reader.makeTableLoader(schema1);
// Create a batch
reader.startBatch();
loader.writer().addRow("fred").addRow("wilma");
reader.endBatch();
tracker.trackSchema(scanner.output());
schemaVersion = tracker.schemaVersion();
// Verify
SingleRowSet expected = fixture.rowSetBuilder(schema1).addRow("fred").addRow("wilma").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
scanner.closeReader();
}
{
// Table 2: required, use nullable
// Required version.
TupleMetadata schema2 = new SchemaBuilder().add("a", MinorType.VARCHAR, 10).buildSchema();
ReaderSchemaOrchestrator reader = scanner.startReader();
ResultSetLoader loader = reader.makeTableLoader(schema2);
// Create a batch
reader.startBatch();
loader.writer().addRow("barney").addRow("betty");
reader.endBatch();
// Verify, using persistent schema
tracker.trackSchema(scanner.output());
assertEquals(schemaVersion, tracker.schemaVersion());
SingleRowSet expected = fixture.rowSetBuilder(schema1).addRow("barney").addRow("betty").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
scanner.closeReader();
}
{
// Table 3: narrower precision, use wider
// Required version with narrower precision.
TupleMetadata schema3 = new SchemaBuilder().add("a", MinorType.VARCHAR, 5).buildSchema();
ReaderSchemaOrchestrator reader = scanner.startReader();
ResultSetLoader loader = reader.makeTableLoader(schema3);
// Create a batch
reader.startBatch();
loader.writer().addRow("bam-bam").addRow("pebbles");
reader.endBatch();
// Verify, using persistent schema
tracker.trackSchema(scanner.output());
assertEquals(schemaVersion, tracker.schemaVersion());
SingleRowSet expected = fixture.rowSetBuilder(schema1).addRow("bam-bam").addRow("pebbles").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
scanner.closeReader();
}
scanner.close();
}
use of org.apache.drill.exec.physical.resultSet.ResultSetLoader in project drill by apache.
the class TestScanOrchestratorEarlySchema method testEarlySchemaWildcard.
/**
* Test SELECT * from an early-schema table of (a, b)
*/
@Test
public void testEarlySchemaWildcard() {
ScanOrchestratorBuilder builder = new MockScanBuilder();
// SELECT * ...
builder.projection(RowSetTestUtils.projectAll());
ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
// ... FROM table
ReaderSchemaOrchestrator reader = scanner.startReader();
// file schema (a, b)
TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
// Create the table loader
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
// Simulate a first reader in a scan that can provide an
// empty batch to define schema.
{
reader.defineSchema();
SingleRowSet expected = fixture.rowSetBuilder(tableSchema).build();
assertNotNull(scanner.output());
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
}
// Create a batch of data.
reader.startBatch();
loader.writer().addRow(1, "fred").addRow(2, "wilma");
reader.endBatch();
// Verify
{
SingleRowSet expected = fixture.rowSetBuilder(tableSchema).addRow(1, "fred").addRow(2, "wilma").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
}
// Second batch.
reader.startBatch();
loader.writer().addRow(3, "barney").addRow(4, "betty");
reader.endBatch();
// Verify
{
SingleRowSet expected = fixture.rowSetBuilder(tableSchema).addRow(3, "barney").addRow(4, "betty").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
}
// Explicit reader close. (All other tests are lazy, they
// use an implicit close.)
scanner.closeReader();
scanner.close();
}
use of org.apache.drill.exec.physical.resultSet.ResultSetLoader in project drill by apache.
the class TestResultSetLoaderProjection method testStrictMapMemberProjectionWithSchema.
/**
* Projection is based on both the projection list and the
* provided schema, if strict.
*/
@Test
public void testStrictMapMemberProjectionWithSchema() {
// m1.a is projected and in the provided schema
// m1.b is not projected
// m1.z is projected, in the provided schema, but not in the reader schema
// m2.c is projected, in the reader schema, but not in the provided schema
// m3.f is projected, but m3 is not in the provided schema
// m4.g is projected, is in the provided schema, but not in the reader schema
List<SchemaPath> selection = RowSetTestUtils.projectList("m1.a", "m1.z", "m2.c", "m3.f", "m4.g");
ResultSetLoader rsLoader = setupProvidedSchema(true, selection);
RowSetLoader rootWriter = rsLoader.writer();
// Verify the projected columns
TupleMetadata actualSchema = rootWriter.tupleSchema();
TupleWriter m1Writer = rootWriter.tuple("m1");
assertTrue(m1Writer.isProjected());
assertEquals(2, m1Writer.tupleSchema().size());
assertTrue(m1Writer.column("a").isProjected());
assertFalse(m1Writer.column("b").isProjected());
TupleWriter m2Writer = rootWriter.tuple("m2");
assertTrue(m2Writer.isProjected());
assertEquals(2, m2Writer.tupleSchema().size());
assertFalse(m2Writer.column("c").isProjected());
assertFalse(m2Writer.column("d").isProjected());
TupleWriter m3Writer = rootWriter.tuple("m3");
assertFalse(m3Writer.isProjected());
assertEquals(2, m3Writer.tupleSchema().size());
assertFalse(m3Writer.column("e").isProjected());
assertFalse(m3Writer.column("f").isProjected());
assertNull(actualSchema.metadata("m4"));
// Verify. Only the projected columns appear in the result set.
TupleMetadata expectedSchema = new SchemaBuilder().addMap("m1").add("a", MinorType.INT).resumeSchema().addMap("m2").resumeSchema().build();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(mapValue(1), mapValue()).addRow(mapValue(11), mapValue()).build();
RowSetUtilities.verify(expected, fixture.wrap(rsLoader.harvest()));
rsLoader.close();
}
use of org.apache.drill.exec.physical.resultSet.ResultSetLoader in project drill by apache.
the class TestResultSetLoaderProjection method testNonStrictMapMemberProjectionWithSchema.
/**
* Projection is based on both the projection list and the
* provided schema, if strict.
*/
@Test
public void testNonStrictMapMemberProjectionWithSchema() {
// m1 is not projected, though in the provided schema
// m2.c is projected, in the reader schema, but not in the provided schema,
// but schema is non-strict, so is projected
// m2.d is projected and in both schemas
// m3.f is projected, but m3 is not in the provided schema, but since schema is
// non-strict, it is projected
// m4.g is projected, is in the provided schema, but not in the reader schema
List<SchemaPath> selection = RowSetTestUtils.projectList("m2.c", "m2.d", "m3.f", "m4.g");
ResultSetLoader rsLoader = setupProvidedSchema(false, selection);
RowSetLoader rootWriter = rsLoader.writer();
// Verify the projected columns
TupleMetadata actualSchema = rootWriter.tupleSchema();
TupleWriter m1Writer = rootWriter.tuple("m1");
assertFalse(m1Writer.isProjected());
assertEquals(2, m1Writer.tupleSchema().size());
assertFalse(m1Writer.column("a").isProjected());
assertFalse(m1Writer.column("b").isProjected());
TupleWriter m2Writer = rootWriter.tuple("m2");
assertTrue(m2Writer.isProjected());
assertEquals(2, m2Writer.tupleSchema().size());
assertTrue(m2Writer.column("c").isProjected());
assertTrue(m2Writer.column("d").isProjected());
TupleWriter m3Writer = rootWriter.tuple("m3");
assertTrue(m3Writer.isProjected());
assertEquals(2, m3Writer.tupleSchema().size());
assertFalse(m3Writer.column("e").isProjected());
assertTrue(m3Writer.column("f").isProjected());
assertNull(actualSchema.metadata("m4"));
// Verify. Only the projected columns appear in the result set.
TupleMetadata expectedSchema = new SchemaBuilder().addMap("m2").add("c", MinorType.INT).add("d", MinorType.INT).resumeSchema().addMap("m3").add("f", MinorType.INT).resumeSchema().build();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(mapValue(3, 4), mapValue(6)).addRow(mapValue(13, 14), mapValue(16)).build();
RowSetUtilities.verify(expected, fixture.wrap(rsLoader.harvest()));
rsLoader.close();
}
Aggregations