Search in sources :

Example 16 with MockScanBuilder

use of org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder in project drill by apache.

the class TestScanOrchestratorLateSchema method testLateSchemaWildcard.

/**
 * Test SELECT * from an early-schema table of (a, b)
 */
@Test
public void testLateSchemaWildcard() {
    ScanOrchestratorBuilder builder = new MockScanBuilder();
    // SELECT * ...
    builder.projection(RowSetTestUtils.projectAll());
    ScanSchemaOrchestrator orchestrator = new ScanSchemaOrchestrator(fixture.allocator(), builder);
    // ... FROM table
    ReaderSchemaOrchestrator reader = orchestrator.startReader();
    // Create the table loader
    ResultSetLoader loader = reader.makeTableLoader(null);
    // Late schema: no batch provided up front.
    assertFalse(reader.hasSchema());
    // Start a batch and discover a schema: (a, b)
    reader.startBatch();
    RowSetLoader writer = loader.writer();
    writer.addColumn(SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REQUIRED));
    writer.addColumn(SchemaBuilder.columnSchema("b", MinorType.VARCHAR, DataMode.REQUIRED));
    // Create a batch of data using the discovered schema
    writer.addRow(1, "fred").addRow(2, "wilma");
    reader.endBatch();
    // Verify
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    SingleRowSet expected = fixture.rowSetBuilder(tableSchema).addRow(1, "fred").addRow(2, "wilma").build();
    new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(orchestrator.output()));
    orchestrator.close();
}
Also used : SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) ScanOrchestratorBuilder(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) MockScanBuilder(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) ScanSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator) ReaderSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 17 with MockScanBuilder

use of org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder in project drill by apache.

the class TestScanOrchestratorImplicitColumns method testMixture.

/**
 * Test SELECT dir0, b, suffix, c FROM table(a, b)
 * Full combination of metadata, table and null columns
 */
@Test
public void testMixture() {
    ScanOrchestratorBuilder builder = new MockScanBuilder();
    File file = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "csv", "1994", "Q1", "orders_94_q1.csv"), Paths.get("x", "y", "z.csv"));
    Path filePath = new Path(file.toURI().getPath());
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    builder.withImplicitColumns(metadataManager);
    // SELECT dir0, b, suffix, c ...
    builder.projection(RowSetTestUtils.projectList("dir0", "b", "suffix", "c"));
    ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
    // ... FROM file
    metadataManager.startFile(filePath);
    ReaderSchemaOrchestrator reader = scanner.startReader();
    // file schema (a, b)
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    // Create the table loader
    ResultSetLoader loader = reader.makeTableLoader(tableSchema);
    TupleMetadata expectedSchema = new SchemaBuilder().addNullable("dir0", MinorType.VARCHAR).add("b", MinorType.VARCHAR).add("suffix", MinorType.VARCHAR).addNullable("c", MinorType.INT).buildSchema();
    // Create a batch of data.
    reader.startBatch();
    loader.writer().addRow(1, "fred").addRow(2, "wilma");
    reader.endBatch();
    // Verify
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow("x", "fred", "csv", null).addRow("x", "wilma", "csv", null).build();
    RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
    scanner.close();
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) ScanOrchestratorBuilder(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) MockScanBuilder(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder) File(java.io.File) ScanSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator) ReaderSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 18 with MockScanBuilder

use of org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder in project drill by apache.

the class TestColumnsArray method buildScanner.

private MockScanner buildScanner(List<SchemaPath> projList) {
    MockScanner mock = new MockScanner();
    // Set up the file metadata manager
    Path filePath = new Path("hdfs:///w/x/y/z.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    // ...and the columns array manager
    ColumnsArrayManager colsManager = new ColumnsArrayManager(false);
    // Configure the schema orchestrator
    ScanOrchestratorBuilder builder = new MockScanBuilder();
    builder.withImplicitColumns(metadataManager);
    builder.addParser(colsManager.projectionParser());
    builder.addResolver(colsManager.resolver());
    // SELECT <proj list> ...
    builder.projection(projList);
    mock.scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
    // FROM z.csv
    metadataManager.startFile(filePath);
    mock.reader = mock.scanner.startReader();
    // Table schema (columns: VARCHAR[])
    TupleMetadata tableSchema = new SchemaBuilder().addArray(ColumnsScanFramework.COLUMNS_COL, MinorType.VARCHAR).buildSchema();
    mock.loader = mock.reader.makeTableLoader(tableSchema);
    // First empty batch
    mock.reader.defineSchema();
    return mock;
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ColumnsArrayManager(org.apache.drill.exec.physical.impl.scan.columns.ColumnsArrayManager) ScanOrchestratorBuilder(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) MockScanBuilder(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder) ScanSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator)

Example 19 with MockScanBuilder

use of org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder in project drill by apache.

the class TestSchemaSmoothing method testWildcardSmoothing.

/**
 * A SELECT * query uses the schema of the table as the output schema.
 * This is trivial when the scanner has one table. But, if two or more
 * tables occur, then things get interesting. The first table sets the
 * schema. The second table then has:
 * <ul>
 * <li>The same schema, trivial case.</li>
 * <li>A subset of the first table. The type of the "missing" column
 * from the first table is used for a null column in the second table.</li>
 * <li>A superset or disjoint set of the first schema. This triggers a hard schema
 * change.</li>
 * </ul>
 * <p>
 * It is an open question whether previous columns should be preserved on
 * a hard reset. For now, the code implements, and this test verifies, that a
 * hard reset clears the "memory" of prior schemas.
 */
@Test
public void testWildcardSmoothing() {
    ScanOrchestratorBuilder builder = new MockScanBuilder();
    builder.enableSchemaSmoothing(true);
    builder.projection(RowSetTestUtils.projectAll());
    final ScanSchemaOrchestrator projector = new ScanSchemaOrchestrator(fixture.allocator(), builder);
    final TupleMetadata firstSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).addNullable("c", MinorType.BIGINT).buildSchema();
    final TupleMetadata subsetSchema = new SchemaBuilder().addNullable("b", MinorType.VARCHAR, 10).add("a", MinorType.INT).buildSchema();
    final TupleMetadata disjointSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).add("d", MinorType.VARCHAR).buildSchema();
    final SchemaTracker tracker = new SchemaTracker();
    int schemaVersion;
    {
        // First table, establishes the baseline
        // ... FROM table 1
        final ReaderSchemaOrchestrator reader = projector.startReader();
        final ResultSetLoader loader = reader.makeTableLoader(firstSchema);
        reader.startBatch();
        loader.writer().addRow(10, "fred", 110L).addRow(20, "wilma", 110L);
        reader.endBatch();
        tracker.trackSchema(projector.output());
        schemaVersion = tracker.schemaVersion();
        final SingleRowSet expected = fixture.rowSetBuilder(firstSchema).addRow(10, "fred", 110L).addRow(20, "wilma", 110L).build();
        new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(projector.output()));
    }
    {
        // Second table, same schema, the trivial case
        // ... FROM table 2
        final ReaderSchemaOrchestrator reader = projector.startReader();
        final ResultSetLoader loader = reader.makeTableLoader(firstSchema);
        reader.startBatch();
        loader.writer().addRow(70, "pebbles", 770L).addRow(80, "hoppy", 880L);
        reader.endBatch();
        tracker.trackSchema(projector.output());
        assertEquals(schemaVersion, tracker.schemaVersion());
        final SingleRowSet expected = fixture.rowSetBuilder(firstSchema).addRow(70, "pebbles", 770L).addRow(80, "hoppy", 880L).build();
        new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(projector.output()));
    }
    {
        // Third table: subset schema of first two
        // ... FROM table 3
        final ReaderSchemaOrchestrator reader = projector.startReader();
        final ResultSetLoader loader = reader.makeTableLoader(subsetSchema);
        reader.startBatch();
        loader.writer().addRow("bambam", 30).addRow("betty", 40);
        reader.endBatch();
        tracker.trackSchema(projector.output());
        assertEquals(schemaVersion, tracker.schemaVersion());
        final SingleRowSet expected = fixture.rowSetBuilder(firstSchema).addRow(30, "bambam", null).addRow(40, "betty", null).build();
        new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(projector.output()));
    }
    {
        // Fourth table: disjoint schema, cases a schema reset
        // ... FROM table 4
        final ReaderSchemaOrchestrator reader = projector.startReader();
        final ResultSetLoader loader = reader.makeTableLoader(disjointSchema);
        reader.startBatch();
        loader.writer().addRow(50, "dino", "supporting").addRow(60, "barney", "main");
        reader.endBatch();
        tracker.trackSchema(projector.output());
        assertNotEquals(schemaVersion, tracker.schemaVersion());
        final SingleRowSet expected = fixture.rowSetBuilder(disjointSchema).addRow(50, "dino", "supporting").addRow(60, "barney", "main").build();
        new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(projector.output()));
    }
    projector.close();
}
Also used : SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) ScanOrchestratorBuilder(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SchemaTracker(org.apache.drill.exec.physical.impl.protocol.SchemaTracker) MockScanBuilder(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 20 with MockScanBuilder

use of org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder in project drill by apache.

the class TestScanOrchestratorEarlySchema method testTypeSmoothing.

/**
 * Test the ability of the scan scanner to "smooth" out schema changes
 * by reusing the type from a previous reader, if known. That is,
 * given three readers:<br>
 * (a, b)<br>
 * (b)<br>
 * (a, b)<br>
 * Then the type of column a should be preserved for the second reader that
 * does not include a. This works if a is nullable. If so, a's type will
 * be used for the empty column, rather than the usual nullable int.
 * <p>
 * Detailed testing of type matching for "missing" columns is done
 * in {@link #testNullColumnLoader()}.
 * <p>
 * As a side effect, makes sure that two identical tables (in this case,
 * separated by a different table) results in no schema change.
 */
@Test
public void testTypeSmoothing() {
    ScanOrchestratorBuilder builder = new MockScanBuilder();
    // SELECT a, b ...
    builder.projection(RowSetTestUtils.projectList("a", "b"));
    ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
    // file schema (a, b)
    TupleMetadata twoColSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
    SchemaTracker tracker = new SchemaTracker();
    int schemaVersion;
    {
        // ... FROM table 1
        ReaderSchemaOrchestrator reader = scanner.startReader();
        ResultSetLoader loader = reader.makeTableLoader(twoColSchema);
        // Projection of (a, b) to (a, b)
        reader.startBatch();
        loader.writer().addRow(10, "fred").addRow(20, "wilma");
        reader.endBatch();
        tracker.trackSchema(scanner.output());
        schemaVersion = tracker.schemaVersion();
        SingleRowSet expected = fixture.rowSetBuilder(twoColSchema).addRow(10, "fred").addRow(20, "wilma").build();
        RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
    }
    {
        // ... FROM table 2
        ReaderSchemaOrchestrator reader = scanner.startReader();
        // File schema (a)
        TupleMetadata oneColSchema = new SchemaBuilder().add("a", MinorType.INT).buildSchema();
        // Projection of (a) to (a, b), reusing b from above.
        ResultSetLoader loader = reader.makeTableLoader(oneColSchema);
        reader.startBatch();
        loader.writer().addRow(30).addRow(40);
        reader.endBatch();
        tracker.trackSchema(scanner.output());
        assertEquals(schemaVersion, tracker.schemaVersion());
        SingleRowSet expected = fixture.rowSetBuilder(twoColSchema).addRow(30, null).addRow(40, null).build();
        RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
    }
    {
        // ... FROM table 3
        ReaderSchemaOrchestrator reader = scanner.startReader();
        // Projection of (a, b), to (a, b), reusing b yet again
        ResultSetLoader loader = reader.makeTableLoader(twoColSchema);
        reader.startBatch();
        loader.writer().addRow(50, "dino").addRow(60, "barney");
        reader.endBatch();
        tracker.trackSchema(scanner.output());
        assertEquals(schemaVersion, tracker.schemaVersion());
        SingleRowSet expected = fixture.rowSetBuilder(twoColSchema).addRow(50, "dino").addRow(60, "barney").build();
        RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
    }
    scanner.close();
}
Also used : SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) ScanOrchestratorBuilder(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) SchemaTracker(org.apache.drill.exec.physical.impl.protocol.SchemaTracker) MockScanBuilder(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder) ScanSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator) ReaderSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

MockScanBuilder (org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder)22 ScanOrchestratorBuilder (org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder)22 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)22 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)22 ScanSchemaOrchestrator (org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator)21 SubOperatorTest (org.apache.drill.test.SubOperatorTest)21 Test (org.junit.Test)21 ReaderSchemaOrchestrator (org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator)20 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)20 ResultSetLoader (org.apache.drill.exec.physical.resultSet.ResultSetLoader)18 BatchSchemaBuilder (org.apache.drill.exec.record.BatchSchemaBuilder)13 SchemaPath (org.apache.drill.common.expression.SchemaPath)6 SchemaTracker (org.apache.drill.exec.physical.impl.protocol.SchemaTracker)6 ImplicitColumnManager (org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager)6 Path (org.apache.hadoop.fs.Path)6 File (java.io.File)5 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)3 MajorType (org.apache.drill.common.types.TypeProtos.MajorType)2 RowSetLoader (org.apache.drill.exec.physical.resultSet.RowSetLoader)2 BatchSchema (org.apache.drill.exec.record.BatchSchema)2