Search in sources :

Example 16 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestImplicitColumnProjection method testFileMetadata.

/**
 * Test a query with explicit mention of file metadata columns.
 */
@Test
public void testFileMetadata() {
    Path filePath = new Path("hdfs:///w/x/y/z.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList("a", ScanTestUtils.FULLY_QUALIFIED_NAME_COL, // Sic, to test case sensitivity
    "filEPath", ScanTestUtils.FILE_NAME_COL, ScanTestUtils.SUFFIX_COL), ScanTestUtils.parsers(metadataManager.projectionParser()));
    assertEquals(5, scanProj.columns().size());
    assertEquals(ScanTestUtils.FULLY_QUALIFIED_NAME_COL, scanProj.columns().get(1).name());
    assertEquals("filEPath", scanProj.columns().get(2).name());
    assertEquals(ScanTestUtils.FILE_NAME_COL, scanProj.columns().get(3).name());
    assertEquals(ScanTestUtils.SUFFIX_COL, scanProj.columns().get(4).name());
    // Schema-level projection, fills in values.
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.VARCHAR).buildSchema();
    metadataManager.startFile(filePath);
    NullColumnBuilder builder = new NullBuilderBuilder().build();
    ResolvedRow rootTuple = new ResolvedRow(builder);
    new ExplicitSchemaProjection(scanProj, tableSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
    List<ResolvedColumn> columns = rootTuple.columns();
    assertEquals(5, columns.size());
    assertEquals("/w/x/y/z.csv", ((MetadataColumn) columns.get(1)).value());
    assertEquals("/w/x/y", ((MetadataColumn) columns.get(2)).value());
    assertEquals("z.csv", ((MetadataColumn) columns.get(3)).value());
    assertEquals("csv", ((MetadataColumn) columns.get(4)).value());
}
Also used : Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) NullColumnBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder) ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) ExplicitSchemaProjection(org.apache.drill.exec.physical.impl.scan.project.ExplicitSchemaProjection) ResolvedColumn(org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 17 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestScanOrchestratorImplicitColumns method testMixture.

/**
 * Test SELECT dir0, b, suffix, c FROM table(a, b)
 * Full combination of metadata, table and null columns
 */
@Test
public void testMixture() {
    ScanOrchestratorBuilder builder = new MockScanBuilder();
    File file = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "csv", "1994", "Q1", "orders_94_q1.csv"), Paths.get("x", "y", "z.csv"));
    Path filePath = new Path(file.toURI().getPath());
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    builder.withImplicitColumns(metadataManager);
    // SELECT dir0, b, suffix, c ...
    builder.projection(RowSetTestUtils.projectList("dir0", "b", "suffix", "c"));
    ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
    // ... FROM file
    metadataManager.startFile(filePath);
    ReaderSchemaOrchestrator reader = scanner.startReader();
    // file schema (a, b)
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    // Create the table loader
    ResultSetLoader loader = reader.makeTableLoader(tableSchema);
    TupleMetadata expectedSchema = new SchemaBuilder().addNullable("dir0", MinorType.VARCHAR).add("b", MinorType.VARCHAR).add("suffix", MinorType.VARCHAR).addNullable("c", MinorType.INT).buildSchema();
    // Create a batch of data.
    reader.startBatch();
    loader.writer().addRow(1, "fred").addRow(2, "wilma");
    reader.endBatch();
    // Verify
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow("x", "fred", "csv", null).addRow("x", "wilma", "csv", null).build();
    RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
    scanner.close();
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) ScanOrchestratorBuilder(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) MockScanBuilder(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder) File(java.io.File) ScanSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator) ReaderSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 18 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestColumnsArray method buildScanner.

private MockScanner buildScanner(List<SchemaPath> projList) {
    MockScanner mock = new MockScanner();
    // Set up the file metadata manager
    Path filePath = new Path("hdfs:///w/x/y/z.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    // ...and the columns array manager
    ColumnsArrayManager colsManager = new ColumnsArrayManager(false);
    // Configure the schema orchestrator
    ScanOrchestratorBuilder builder = new MockScanBuilder();
    builder.withImplicitColumns(metadataManager);
    builder.addParser(colsManager.projectionParser());
    builder.addResolver(colsManager.resolver());
    // SELECT <proj list> ...
    builder.projection(projList);
    mock.scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
    // FROM z.csv
    metadataManager.startFile(filePath);
    mock.reader = mock.scanner.startReader();
    // Table schema (columns: VARCHAR[])
    TupleMetadata tableSchema = new SchemaBuilder().addArray(ColumnsScanFramework.COLUMNS_COL, MinorType.VARCHAR).buildSchema();
    mock.loader = mock.reader.makeTableLoader(tableSchema);
    // First empty batch
    mock.reader.defineSchema();
    return mock;
}
Also used : Path(org.apache.hadoop.fs.Path) SchemaPath(org.apache.drill.common.expression.SchemaPath) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ColumnsArrayManager(org.apache.drill.exec.physical.impl.scan.columns.ColumnsArrayManager) ScanOrchestratorBuilder(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) MockScanBuilder(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder) ScanSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator)

Example 19 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestImplicitColumnParser method testBasics.

@Test
public void testBasics() {
    Path filePath = new Path("hdfs:///w/x/y/z.csv");
    ImplicitColumnManager implictColManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    // Simulate SELECT a, b, c ...
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList("a", "b", "c"), Lists.newArrayList(implictColManager.projectionParser()));
    // Verify
    assertFalse(scanProj.projectAll());
    assertFalse(implictColManager.hasImplicitCols());
}
Also used : SchemaPath(org.apache.drill.common.expression.SchemaPath) Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 20 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestImplicitColumnParser method testShadowed.

/**
 * Verify that names that look like metadata columns, but appear
 * to be maps or arrays, are not interpreted as metadata. That is,
 * the projected table map or array "shadows" the metadata column.
 */
@Test
public void testShadowed() {
    Path filePath = new Path("hdfs:///w/x/y/z.csv");
    ImplicitColumnManager implictColManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL + ".a", ScanTestUtils.FILE_PATH_COL + "[0]", ScanTestUtils.partitionColName(0) + ".b", ScanTestUtils.partitionColName(1) + "[0]", ScanTestUtils.SUFFIX_COL), Lists.newArrayList(implictColManager.projectionParser()));
    List<ColumnProjection> cols = scanProj.columns();
    assertEquals(5, cols.size());
    for (int i = 0; i < 4; i++) {
        assertTrue(scanProj.columns().get(i) instanceof UnresolvedColumn);
    }
    assertTrue(scanProj.columns().get(4) instanceof FileMetadataColumn);
}
Also used : SchemaPath(org.apache.drill.common.expression.SchemaPath) Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) UnresolvedColumn(org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedColumn) ColumnProjection(org.apache.drill.exec.physical.impl.scan.project.ColumnProjection) FileMetadataColumn(org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

ImplicitColumnManager (org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager)24 Path (org.apache.hadoop.fs.Path)24 SubOperatorTest (org.apache.drill.test.SubOperatorTest)23 Test (org.junit.Test)23 SchemaPath (org.apache.drill.common.expression.SchemaPath)17 ScanLevelProjection (org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection)14 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)13 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)13 ColumnProjection (org.apache.drill.exec.physical.impl.scan.project.ColumnProjection)8 FileMetadataColumn (org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn)7 PartitionColumn (org.apache.drill.exec.physical.impl.scan.file.PartitionColumn)7 ResolvedRow (org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow)7 MockScanBuilder (org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder)6 UnresolvedWildcardColumn (org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedWildcardColumn)6 ScanSchemaOrchestrator (org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator)6 ScanOrchestratorBuilder (org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder)6 File (java.io.File)5 ReaderSchemaOrchestrator (org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator)5 ResultSetLoader (org.apache.drill.exec.physical.resultSet.ResultSetLoader)5 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)5