Search in sources :

Example 1 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestScanOrchestratorImplicitColumns method testSelectNone.

/**
 * Test SELECT c FROM table(a, b)
 * The result set will be one null column for each record, but
 * no file data.
 */
@Test
public void testSelectNone() {
    ScanOrchestratorBuilder builder = new MockScanBuilder();
    File file = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "csv", "1994", "Q1", "orders_94_q1.csv"), Paths.get("x", "y", "z.csv"));
    Path filePath = new Path(file.toURI().getPath());
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    builder.withImplicitColumns(metadataManager);
    // SELECT c ...
    builder.projection(RowSetTestUtils.projectList("c"));
    ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
    // ... FROM file
    metadataManager.startFile(filePath);
    ReaderSchemaOrchestrator reader = scanner.startReader();
    // file schema (a, b)
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    // Create the table loader
    ResultSetLoader loader = reader.makeTableLoader(tableSchema);
    TupleMetadata expectedSchema = new SchemaBuilder().addNullable("c", MinorType.INT).buildSchema();
    // Create a batch of data.
    reader.startBatch();
    loader.writer().addRow(1, "fred").addRow(2, "wilma");
    reader.endBatch();
    // Verify
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addSingleCol(null).addSingleCol(null).build();
    RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
    scanner.close();
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) ScanOrchestratorBuilder(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) MockScanBuilder(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder) File(java.io.File) ScanSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator) ReaderSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 2 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestScanOrchestratorImplicitColumns method testEarlySchemaSelectAllAndMetadata.

/**
 * Test SELECT a, b, dir0, suffix FROM table(a, b)
 * dir0, suffix are file metadata columns
 */
@Test
public void testEarlySchemaSelectAllAndMetadata() {
    // Null columns of type VARCHAR
    MajorType nullType = MajorType.newBuilder().setMinorType(MinorType.VARCHAR).setMode(DataMode.OPTIONAL).build();
    ScanOrchestratorBuilder builder = new MockScanBuilder();
    builder.nullType(nullType);
    File file = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "csv", "1994", "Q1", "orders_94_q1.csv"), Paths.get("x", "y", "z.csv"));
    Path filePath = new Path(file.toURI().getPath());
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    builder.withImplicitColumns(metadataManager);
    // SELECT a, b, dir0, suffix ...
    builder.projection(RowSetTestUtils.projectList("a", "b", "dir0", "suffix"));
    ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
    // ... FROM file
    metadataManager.startFile(filePath);
    ReaderSchemaOrchestrator reader = scanner.startReader();
    // file schema (a, b)
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    // Create the table loader
    ResultSetLoader loader = reader.makeTableLoader(tableSchema);
    // Verify empty batch.
    reader.defineSchema();
    TupleMetadata expectedSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addNullable("dir0", MinorType.VARCHAR).add("suffix", MinorType.VARCHAR).buildSchema();
    {
        SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).build();
        assertNotNull(scanner.output());
        RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
    }
    // Create a batch of data.
    reader.startBatch();
    loader.writer().addRow(1, "fred").addRow(2, "wilma");
    reader.endBatch();
    // Verify
    {
        SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(1, "fred", "x", "csv").addRow(2, "wilma", "x", "csv").build();
        RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
    }
    scanner.close();
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) ScanOrchestratorBuilder(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) MockScanBuilder(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder) File(java.io.File) ScanSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator) ReaderSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 3 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestScanOrchestratorImplicitColumns method testWildcardWithMetadata.

/**
 * Resolve a selection list using SELECT *.
 */
@Test
public void testWildcardWithMetadata() throws IOException {
    File file = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "csv", "1994", "Q1", "orders_94_q1.csv"), Paths.get("x", "y", "z.csv"));
    Path filePath = new Path(file.toURI().getPath());
    DrillFileSystem fileSystem = new DrillFileSystem(new Configuration());
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath), fileSystem);
    ScanOrchestratorBuilder builder = new MockScanBuilder();
    builder.withImplicitColumns(metadataManager);
    // SELECT *, filename, suffix ...
    builder.projection(RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, ScanTestUtils.FULLY_QUALIFIED_NAME_COL, ScanTestUtils.FILE_PATH_COL, ScanTestUtils.FILE_NAME_COL, ScanTestUtils.SUFFIX_COL, ScanTestUtils.LAST_MODIFIED_TIME_COL, ScanTestUtils.PROJECT_METADATA_COL, ScanTestUtils.partitionColName(0), ScanTestUtils.partitionColName(1)));
    ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
    // ... FROM file
    metadataManager.startFile(filePath);
    ReaderSchemaOrchestrator reader = scanner.startReader();
    // file schema (a, b)
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    ResultSetLoader loader = reader.makeTableLoader(tableSchema);
    // Create a batch of data.
    reader.startBatch();
    loader.writer().addRow(1, "fred").addRow(2, "wilma");
    reader.endBatch();
    // Verify
    TupleMetadata expectedSchema = ScanTestUtils.expandImplicit(tableSchema, metadataManager, 2);
    String fqn = ImplicitFileColumns.FQN.getValue(filePath);
    String filePathValue = ImplicitFileColumns.FILEPATH.getValue(filePath);
    String fileName = ImplicitFileColumns.FILENAME.getValue(filePath);
    String suffix = ImplicitFileColumns.SUFFIX.getValue(filePath);
    String lastModifiedTime = ColumnExplorer.getImplicitColumnValue(ImplicitInternalFileColumns.LAST_MODIFIED_TIME, filePath, fileSystem);
    String projectMetadata = ColumnExplorer.getImplicitColumnValue(ImplicitInternalFileColumns.USE_METADATA, filePath, fileSystem);
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(1, "fred", fqn, filePathValue, fileName, suffix, lastModifiedTime, projectMetadata, "x", "y").addRow(2, "wilma", fqn, filePathValue, fileName, suffix, lastModifiedTime, projectMetadata, "x", "y").build();
    RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
    scanner.close();
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) Configuration(org.apache.hadoop.conf.Configuration) ScanOrchestratorBuilder(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder) ReaderSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) DrillFileSystem(org.apache.drill.exec.store.dfs.DrillFileSystem) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) MockScanBuilder(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder) File(java.io.File) ScanSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 4 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestScanOrchestratorImplicitColumns method testMetadataMulti.

/**
 * Verify that metadata columns follow distinct files
 * <br>
 * SELECT dir0, filename, b FROM (a.csv, b.csv)
 */
@Test
public void testMetadataMulti() {
    ScanOrchestratorBuilder builder = new MockScanBuilder();
    File file = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "csv", "1994", "Q1", "orders_94_q1.csv"), Paths.get("x", "y", "a.csv"));
    Path filePathA = new Path(file.toURI().getPath());
    File file2 = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "csv", "1994", "Q2", "orders_94_q2.csv"), Paths.get("x", "b.csv"));
    Path filePathB = new Path(file2.toURI().getPath());
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(Lists.newArrayList(filePathA, filePathB)));
    builder.withImplicitColumns(metadataManager);
    // SELECT dir0, dir1, filename, b ...
    builder.projection(RowSetTestUtils.projectList(ScanTestUtils.partitionColName(0), ScanTestUtils.partitionColName(1), ScanTestUtils.FILE_NAME_COL, "b"));
    ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
    // file schema (a, b)
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
    TupleMetadata expectedSchema = new SchemaBuilder().addNullable(ScanTestUtils.partitionColName(0), MinorType.VARCHAR).addNullable(ScanTestUtils.partitionColName(1), MinorType.VARCHAR).add(ScanTestUtils.FILE_NAME_COL, MinorType.VARCHAR).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
    SchemaTracker tracker = new SchemaTracker();
    int schemaVersion;
    {
        // ... FROM file a.csv
        metadataManager.startFile(filePathA);
        ReaderSchemaOrchestrator reader = scanner.startReader();
        ResultSetLoader loader = reader.makeTableLoader(tableSchema);
        reader.startBatch();
        loader.writer().addRow(10, "fred").addRow(20, "wilma");
        reader.endBatch();
        tracker.trackSchema(scanner.output());
        schemaVersion = tracker.schemaVersion();
        SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow("x", "y", "a.csv", "fred").addRow("x", "y", "a.csv", "wilma").build();
        RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
        // Do explicit close (as in real code) to avoid an implicit
        // close which will blow away the current file info...
        scanner.closeReader();
    }
    {
        // ... FROM file b.csv
        metadataManager.startFile(filePathB);
        ReaderSchemaOrchestrator reader = scanner.startReader();
        ResultSetLoader loader = reader.makeTableLoader(tableSchema);
        reader.startBatch();
        loader.writer().addRow(30, "bambam").addRow(40, "betty");
        reader.endBatch();
        tracker.trackSchema(scanner.output());
        assertEquals(schemaVersion, tracker.schemaVersion());
        SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow("x", null, "b.csv", "bambam").addRow("x", null, "b.csv", "betty").build();
        RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
        scanner.closeReader();
    }
    scanner.close();
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ScanOrchestratorBuilder(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder) SchemaTracker(org.apache.drill.exec.physical.impl.protocol.SchemaTracker) ReaderSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) MockScanBuilder(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder) File(java.io.File) ScanSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 5 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestImplicitColumnProjection method testPartitionColumnTwoDigits.

/**
 * Test the obscure case that the partition column contains two digits:
 * dir11. Also tests the obscure case that the output only has partition
 * columns.
 */
@Test
public void testPartitionColumnTwoDigits() {
    Path filePath = new Path("hdfs:///x/0/1/2/3/4/5/6/7/8/9/10/d11/z.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList("dir11"), ScanTestUtils.parsers(metadataManager.projectionParser()));
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.VARCHAR).buildSchema();
    metadataManager.startFile(filePath);
    NullColumnBuilder builder = new NullBuilderBuilder().build();
    ResolvedRow rootTuple = new ResolvedRow(builder);
    new ExplicitSchemaProjection(scanProj, tableSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
    List<ResolvedColumn> columns = rootTuple.columns();
    assertEquals(1, columns.size());
    assertEquals("d11", ((MetadataColumn) columns.get(0)).value());
}
Also used : Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) NullColumnBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder) ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) ExplicitSchemaProjection(org.apache.drill.exec.physical.impl.scan.project.ExplicitSchemaProjection) ResolvedColumn(org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

ImplicitColumnManager (org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager)24 Path (org.apache.hadoop.fs.Path)24 SubOperatorTest (org.apache.drill.test.SubOperatorTest)23 Test (org.junit.Test)23 SchemaPath (org.apache.drill.common.expression.SchemaPath)17 ScanLevelProjection (org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection)14 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)13 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)13 ColumnProjection (org.apache.drill.exec.physical.impl.scan.project.ColumnProjection)8 FileMetadataColumn (org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn)7 PartitionColumn (org.apache.drill.exec.physical.impl.scan.file.PartitionColumn)7 ResolvedRow (org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow)7 MockScanBuilder (org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder)6 UnresolvedWildcardColumn (org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedWildcardColumn)6 ScanSchemaOrchestrator (org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator)6 ScanOrchestratorBuilder (org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder)6 File (java.io.File)5 ReaderSchemaOrchestrator (org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator)5 ResultSetLoader (org.apache.drill.exec.physical.resultSet.ResultSetLoader)5 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)5