use of org.apache.drill.exec.physical.impl.protocol.SchemaTracker in project drill by apache.
the class TestScanOrchestratorImplicitColumns method testMetadataMulti.
/**
* Verify that metadata columns follow distinct files
* <br>
* SELECT dir0, filename, b FROM (a.csv, b.csv)
*/
@Test
public void testMetadataMulti() {
ScanOrchestratorBuilder builder = new MockScanBuilder();
File file = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "csv", "1994", "Q1", "orders_94_q1.csv"), Paths.get("x", "y", "a.csv"));
Path filePathA = new Path(file.toURI().getPath());
File file2 = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "csv", "1994", "Q2", "orders_94_q2.csv"), Paths.get("x", "b.csv"));
Path filePathB = new Path(file2.toURI().getPath());
ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(Lists.newArrayList(filePathA, filePathB)));
builder.withImplicitColumns(metadataManager);
// SELECT dir0, dir1, filename, b ...
builder.projection(RowSetTestUtils.projectList(ScanTestUtils.partitionColName(0), ScanTestUtils.partitionColName(1), ScanTestUtils.FILE_NAME_COL, "b"));
ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
// file schema (a, b)
TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
TupleMetadata expectedSchema = new SchemaBuilder().addNullable(ScanTestUtils.partitionColName(0), MinorType.VARCHAR).addNullable(ScanTestUtils.partitionColName(1), MinorType.VARCHAR).add(ScanTestUtils.FILE_NAME_COL, MinorType.VARCHAR).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
SchemaTracker tracker = new SchemaTracker();
int schemaVersion;
{
// ... FROM file a.csv
metadataManager.startFile(filePathA);
ReaderSchemaOrchestrator reader = scanner.startReader();
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
reader.startBatch();
loader.writer().addRow(10, "fred").addRow(20, "wilma");
reader.endBatch();
tracker.trackSchema(scanner.output());
schemaVersion = tracker.schemaVersion();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow("x", "y", "a.csv", "fred").addRow("x", "y", "a.csv", "wilma").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
// Do explicit close (as in real code) to avoid an implicit
// close which will blow away the current file info...
scanner.closeReader();
}
{
// ... FROM file b.csv
metadataManager.startFile(filePathB);
ReaderSchemaOrchestrator reader = scanner.startReader();
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
reader.startBatch();
loader.writer().addRow(30, "bambam").addRow(40, "betty");
reader.endBatch();
tracker.trackSchema(scanner.output());
assertEquals(schemaVersion, tracker.schemaVersion());
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow("x", null, "b.csv", "bambam").addRow("x", null, "b.csv", "betty").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
scanner.closeReader();
}
scanner.close();
}
use of org.apache.drill.exec.physical.impl.protocol.SchemaTracker in project drill by apache.
the class TestScanOrchestratorEarlySchema method testTypeSmoothingExplicit.
/**
* The projection mechanism provides "type smoothing": null
* columns prefer the type of previously-seen non-null columns.
*
* <code><pre>
* SELECT a, b ...
*
* Table 1: (a: BIGINT, b: VARCHAR)
* Table 2: (a: BIGINT)
* Table 3: (b: VARCHAR)
* </pre></code>
* The result in all cases should be
* <tt>(a : BIGINT, b: VARCHAR)</tt>
*/
@Test
public void testTypeSmoothingExplicit() {
ScanOrchestratorBuilder builder = new MockScanBuilder();
TupleMetadata table1Schema = new SchemaBuilder().add("A", MinorType.BIGINT).addNullable("B", MinorType.VARCHAR).addArray("C", MinorType.INT).buildSchema();
BatchSchema resultSchema = new BatchSchema(SelectionVectorMode.NONE, table1Schema.toFieldList());
SchemaTracker tracker = new SchemaTracker();
// SELECT * ...
builder.projection(RowSetTestUtils.projectList("a", "b", "c"));
ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
int schemaVersion;
{
// ... FROM table1(a, b, c)
ReaderSchemaOrchestrator reader = scanner.startReader();
reader.makeTableLoader(table1Schema);
reader.defineSchema();
VectorContainer output = scanner.output();
tracker.trackSchema(output);
schemaVersion = tracker.schemaVersion();
assertTrue(resultSchema.isEquivalent(output.getSchema()));
scanner.closeReader();
}
{
// ... FROM table1(a, c)
//
// B is dropped. But, it is nullable, so the vector cache
// can supply the proper type to ensure continuity.
TupleMetadata table2Schema = new SchemaBuilder().add("A", MinorType.BIGINT).addArray("C", MinorType.INT).buildSchema();
ReaderSchemaOrchestrator reader = scanner.startReader();
reader.makeTableLoader(table2Schema);
reader.defineSchema();
VectorContainer output = scanner.output();
tracker.trackSchema(output);
assertEquals(schemaVersion, tracker.schemaVersion());
assertTrue(resultSchema.isEquivalent(output.getSchema()));
scanner.closeReader();
}
{
// ... FROM table1(a, b)
//
// C is dropped. But, it is an array, which uses zero-elements
// to indicate null, so the vector cache can fill in the type.
TupleMetadata table3Schema = new SchemaBuilder().add("A", MinorType.BIGINT).addNullable("B", MinorType.VARCHAR).buildSchema();
ReaderSchemaOrchestrator reader = scanner.startReader();
reader.makeTableLoader(table3Schema);
reader.defineSchema();
VectorContainer output = scanner.output();
tracker.trackSchema(output);
assertEquals(schemaVersion, tracker.schemaVersion());
assertTrue(resultSchema.isEquivalent(output.getSchema()));
scanner.closeReader();
}
{
// ... FROM table1(b, c)
//
// This version carries over a non-nullable BIGINT, but that
// can't become a null column, so nullable BIGINT is substituted,
// result in a schema change.
TupleMetadata table2Schema = new SchemaBuilder().addNullable("B", MinorType.VARCHAR).addArray("C", MinorType.INT).buildSchema();
ReaderSchemaOrchestrator reader = scanner.startReader();
reader.makeTableLoader(table2Schema);
reader.defineSchema();
VectorContainer output = scanner.output();
tracker.trackSchema(output);
assertEquals(MinorType.BIGINT, output.getSchema().getColumn(0).getType().getMinorType());
assertEquals(DataMode.OPTIONAL, output.getSchema().getColumn(0).getType().getMode());
assertTrue(schemaVersion < tracker.schemaVersion());
scanner.closeReader();
}
scanner.close();
}
use of org.apache.drill.exec.physical.impl.protocol.SchemaTracker in project drill by apache.
the class TestScanOrchestratorEarlySchema method testColumnReordering.
/**
* Verify that different table column orders are projected into the
* SELECT order, preserving vectors, so no schema change for column
* reordering.
*/
@Test
public void testColumnReordering() {
ScanOrchestratorBuilder builder = new MockScanBuilder();
builder.enableSchemaSmoothing(true);
builder.projection(RowSetTestUtils.projectList("a", "b", "c"));
ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
TupleMetadata schema1 = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).add("c", MinorType.BIGINT).buildSchema();
TupleMetadata schema2 = new SchemaBuilder().add("c", MinorType.BIGINT).add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
TupleMetadata schema3 = new SchemaBuilder().add("a", MinorType.INT).add("c", MinorType.BIGINT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
SchemaTracker tracker = new SchemaTracker();
int schemaVersion;
{
// ... FROM table 1
ReaderSchemaOrchestrator reader = scanner.startReader();
// Projection of (a, b, c) to (a, b, c)
ResultSetLoader loader = reader.makeTableLoader(schema1);
reader.startBatch();
loader.writer().addRow(10, "fred", 110L).addRow(20, "wilma", 110L);
reader.endBatch();
tracker.trackSchema(scanner.output());
schemaVersion = tracker.schemaVersion();
SingleRowSet expected = fixture.rowSetBuilder(schema1).addRow(10, "fred", 110L).addRow(20, "wilma", 110L).build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
scanner.closeReader();
}
{
// ... FROM table 2
ReaderSchemaOrchestrator reader = scanner.startReader();
// Projection of (c, a, b) to (a, b, c)
ResultSetLoader loader = reader.makeTableLoader(schema2);
reader.startBatch();
loader.writer().addRow(330L, 30, "bambam").addRow(440L, 40, "betty");
reader.endBatch();
tracker.trackSchema(scanner.output());
assertEquals(schemaVersion, tracker.schemaVersion());
SingleRowSet expected = fixture.rowSetBuilder(schema1).addRow(30, "bambam", 330L).addRow(40, "betty", 440L).build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
}
{
// ... FROM table 3
ReaderSchemaOrchestrator reader = scanner.startReader();
// Projection of (a, c, b) to (a, b, c)
ResultSetLoader loader = reader.makeTableLoader(schema3);
reader.startBatch();
loader.writer().addRow(50, 550L, "dino").addRow(60, 660L, "barney");
reader.endBatch();
tracker.trackSchema(scanner.output());
assertEquals(schemaVersion, tracker.schemaVersion());
SingleRowSet expected = fixture.rowSetBuilder(schema1).addRow(50, "dino", 550L).addRow(60, "barney", 660L).build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
}
scanner.close();
}
use of org.apache.drill.exec.physical.impl.protocol.SchemaTracker in project drill by apache.
the class TestSchemaSmoothing method testWildcardSmoothing.
/**
* A SELECT * query uses the schema of the table as the output schema.
* This is trivial when the scanner has one table. But, if two or more
* tables occur, then things get interesting. The first table sets the
* schema. The second table then has:
* <ul>
* <li>The same schema, trivial case.</li>
* <li>A subset of the first table. The type of the "missing" column
* from the first table is used for a null column in the second table.</li>
* <li>A superset or disjoint set of the first schema. This triggers a hard schema
* change.</li>
* </ul>
* <p>
* It is an open question whether previous columns should be preserved on
* a hard reset. For now, the code implements, and this test verifies, that a
* hard reset clears the "memory" of prior schemas.
*/
@Test
public void testWildcardSmoothing() {
ScanOrchestratorBuilder builder = new MockScanBuilder();
builder.enableSchemaSmoothing(true);
builder.projection(RowSetTestUtils.projectAll());
final ScanSchemaOrchestrator projector = new ScanSchemaOrchestrator(fixture.allocator(), builder);
final TupleMetadata firstSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).addNullable("c", MinorType.BIGINT).buildSchema();
final TupleMetadata subsetSchema = new SchemaBuilder().addNullable("b", MinorType.VARCHAR, 10).add("a", MinorType.INT).buildSchema();
final TupleMetadata disjointSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).add("d", MinorType.VARCHAR).buildSchema();
final SchemaTracker tracker = new SchemaTracker();
int schemaVersion;
{
// First table, establishes the baseline
// ... FROM table 1
final ReaderSchemaOrchestrator reader = projector.startReader();
final ResultSetLoader loader = reader.makeTableLoader(firstSchema);
reader.startBatch();
loader.writer().addRow(10, "fred", 110L).addRow(20, "wilma", 110L);
reader.endBatch();
tracker.trackSchema(projector.output());
schemaVersion = tracker.schemaVersion();
final SingleRowSet expected = fixture.rowSetBuilder(firstSchema).addRow(10, "fred", 110L).addRow(20, "wilma", 110L).build();
new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(projector.output()));
}
{
// Second table, same schema, the trivial case
// ... FROM table 2
final ReaderSchemaOrchestrator reader = projector.startReader();
final ResultSetLoader loader = reader.makeTableLoader(firstSchema);
reader.startBatch();
loader.writer().addRow(70, "pebbles", 770L).addRow(80, "hoppy", 880L);
reader.endBatch();
tracker.trackSchema(projector.output());
assertEquals(schemaVersion, tracker.schemaVersion());
final SingleRowSet expected = fixture.rowSetBuilder(firstSchema).addRow(70, "pebbles", 770L).addRow(80, "hoppy", 880L).build();
new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(projector.output()));
}
{
// Third table: subset schema of first two
// ... FROM table 3
final ReaderSchemaOrchestrator reader = projector.startReader();
final ResultSetLoader loader = reader.makeTableLoader(subsetSchema);
reader.startBatch();
loader.writer().addRow("bambam", 30).addRow("betty", 40);
reader.endBatch();
tracker.trackSchema(projector.output());
assertEquals(schemaVersion, tracker.schemaVersion());
final SingleRowSet expected = fixture.rowSetBuilder(firstSchema).addRow(30, "bambam", null).addRow(40, "betty", null).build();
new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(projector.output()));
}
{
// Fourth table: disjoint schema, cases a schema reset
// ... FROM table 4
final ReaderSchemaOrchestrator reader = projector.startReader();
final ResultSetLoader loader = reader.makeTableLoader(disjointSchema);
reader.startBatch();
loader.writer().addRow(50, "dino", "supporting").addRow(60, "barney", "main");
reader.endBatch();
tracker.trackSchema(projector.output());
assertNotEquals(schemaVersion, tracker.schemaVersion());
final SingleRowSet expected = fixture.rowSetBuilder(disjointSchema).addRow(50, "dino", "supporting").addRow(60, "barney", "main").build();
new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(projector.output()));
}
projector.close();
}
use of org.apache.drill.exec.physical.impl.protocol.SchemaTracker in project drill by apache.
the class TestScanOrchestratorEarlySchema method testTypeSmoothing.
/**
* Test the ability of the scan scanner to "smooth" out schema changes
* by reusing the type from a previous reader, if known. That is,
* given three readers:<br>
* (a, b)<br>
* (b)<br>
* (a, b)<br>
* Then the type of column a should be preserved for the second reader that
* does not include a. This works if a is nullable. If so, a's type will
* be used for the empty column, rather than the usual nullable int.
* <p>
* Detailed testing of type matching for "missing" columns is done
* in {@link #testNullColumnLoader()}.
* <p>
* As a side effect, makes sure that two identical tables (in this case,
* separated by a different table) results in no schema change.
*/
@Test
public void testTypeSmoothing() {
ScanOrchestratorBuilder builder = new MockScanBuilder();
// SELECT a, b ...
builder.projection(RowSetTestUtils.projectList("a", "b"));
ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
// file schema (a, b)
TupleMetadata twoColSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
SchemaTracker tracker = new SchemaTracker();
int schemaVersion;
{
// ... FROM table 1
ReaderSchemaOrchestrator reader = scanner.startReader();
ResultSetLoader loader = reader.makeTableLoader(twoColSchema);
// Projection of (a, b) to (a, b)
reader.startBatch();
loader.writer().addRow(10, "fred").addRow(20, "wilma");
reader.endBatch();
tracker.trackSchema(scanner.output());
schemaVersion = tracker.schemaVersion();
SingleRowSet expected = fixture.rowSetBuilder(twoColSchema).addRow(10, "fred").addRow(20, "wilma").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
}
{
// ... FROM table 2
ReaderSchemaOrchestrator reader = scanner.startReader();
// File schema (a)
TupleMetadata oneColSchema = new SchemaBuilder().add("a", MinorType.INT).buildSchema();
// Projection of (a) to (a, b), reusing b from above.
ResultSetLoader loader = reader.makeTableLoader(oneColSchema);
reader.startBatch();
loader.writer().addRow(30).addRow(40);
reader.endBatch();
tracker.trackSchema(scanner.output());
assertEquals(schemaVersion, tracker.schemaVersion());
SingleRowSet expected = fixture.rowSetBuilder(twoColSchema).addRow(30, null).addRow(40, null).build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
}
{
// ... FROM table 3
ReaderSchemaOrchestrator reader = scanner.startReader();
// Projection of (a, b), to (a, b), reusing b yet again
ResultSetLoader loader = reader.makeTableLoader(twoColSchema);
reader.startBatch();
loader.writer().addRow(50, "dino").addRow(60, "barney");
reader.endBatch();
tracker.trackSchema(scanner.output());
assertEquals(schemaVersion, tracker.schemaVersion());
SingleRowSet expected = fixture.rowSetBuilder(twoColSchema).addRow(50, "dino").addRow(60, "barney").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
}
scanner.close();
}
Aggregations