Search in sources :

Example 21 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestImplicitColumnParser method testLegacyWildcardAndFileMetadataMixed.

/**
 * As above, but include implicit columns before and after the
 * wildcard.
 */
@Test
public void testLegacyWildcardAndFileMetadataMixed() {
    Path filePath = new Path("hdfs:///w/x/y/z.csv");
    ImplicitColumnOptions options = standardOptions(filePath);
    options.useLegacyWildcardExpansion(true);
    ImplicitColumnManager implictColManager = new ImplicitColumnManager(fixture.getOptionManager(), options);
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, SchemaPath.DYNAMIC_STAR, ScanTestUtils.SUFFIX_COL), Lists.newArrayList(implictColManager.projectionParser()));
    List<ColumnProjection> cols = scanProj.columns();
    assertEquals(5, cols.size());
    assertTrue(scanProj.columns().get(0) instanceof FileMetadataColumn);
    assertTrue(scanProj.columns().get(1) instanceof UnresolvedWildcardColumn);
    assertTrue(scanProj.columns().get(2) instanceof FileMetadataColumn);
    assertTrue(scanProj.columns().get(3) instanceof PartitionColumn);
    assertTrue(scanProj.columns().get(4) instanceof PartitionColumn);
}
Also used : SchemaPath(org.apache.drill.common.expression.SchemaPath) Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ImplicitColumnOptions(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager.ImplicitColumnOptions) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) ColumnProjection(org.apache.drill.exec.physical.impl.scan.project.ColumnProjection) UnresolvedWildcardColumn(org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedWildcardColumn) PartitionColumn(org.apache.drill.exec.physical.impl.scan.file.PartitionColumn) FileMetadataColumn(org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 22 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestImplicitColumnParser method testLegacyWildcardAndFileMetadata.

/**
 * Combine wildcard and file metadata columns. The wildcard expands
 * table columns but not metadata columns.
 */
@Test
public void testLegacyWildcardAndFileMetadata() {
    Path filePath = new Path("hdfs:///w/x/y/z.csv");
    ImplicitColumnOptions options = standardOptions(filePath);
    options.useLegacyWildcardExpansion(true);
    ImplicitColumnManager implictColManager = new ImplicitColumnManager(fixture.getOptionManager(), options);
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, ScanTestUtils.FILE_NAME_COL, ScanTestUtils.SUFFIX_COL), Lists.newArrayList(implictColManager.projectionParser()));
    List<ColumnProjection> cols = scanProj.columns();
    assertEquals(5, cols.size());
    assertTrue(scanProj.columns().get(0) instanceof UnresolvedWildcardColumn);
    assertTrue(scanProj.columns().get(1) instanceof FileMetadataColumn);
    assertTrue(scanProj.columns().get(2) instanceof FileMetadataColumn);
    assertTrue(scanProj.columns().get(3) instanceof PartitionColumn);
    assertTrue(scanProj.columns().get(4) instanceof PartitionColumn);
}
Also used : SchemaPath(org.apache.drill.common.expression.SchemaPath) Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ImplicitColumnOptions(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager.ImplicitColumnOptions) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) ColumnProjection(org.apache.drill.exec.physical.impl.scan.project.ColumnProjection) UnresolvedWildcardColumn(org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedWildcardColumn) PartitionColumn(org.apache.drill.exec.physical.impl.scan.file.PartitionColumn) FileMetadataColumn(org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 23 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestSchemaSmoothing method testDiscrete.

/**
 * Sanity test for the simple, discrete case. The purpose of
 * discrete is just to run the basic lifecycle in a way that
 * is compatible with the schema-persistence version.
 */
@Test
public void testDiscrete() {
    // Set up the file metadata manager
    Path filePathA = new Path("hdfs:///w/x/y/a.csv");
    Path filePathB = new Path("hdfs:///w/x/y/b.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(Lists.newArrayList(filePathA, filePathB)));
    // Set up the scan level projection
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, "a", "b"), ScanTestUtils.parsers(metadataManager.projectionParser()));
    {
        // Define a file a.csv
        metadataManager.startFile(filePathA);
        // Build the output schema from the (a, b) table schema
        TupleMetadata twoColSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
        final NullColumnBuilder builder = new NullBuilderBuilder().build();
        ResolvedRow rootTuple = new ResolvedRow(builder);
        new ExplicitSchemaProjection(scanProj, twoColSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
        // Verify the full output schema
        TupleMetadata expectedSchema = new SchemaBuilder().add("filename", MinorType.VARCHAR).add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
        // Verify
        List<ResolvedColumn> columns = rootTuple.columns();
        assertEquals(3, columns.size());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
        assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name());
        assertEquals("a.csv", ((FileMetadataColumn) columns.get(0)).value());
        assertTrue(columns.get(1) instanceof ResolvedTableColumn);
    }
    {
        // Define a file b.csv
        metadataManager.startFile(filePathB);
        // Build the output schema from the (a) table schema
        TupleMetadata oneColSchema = new SchemaBuilder().add("a", MinorType.INT).buildSchema();
        final NullColumnBuilder builder = new NullBuilderBuilder().build();
        ResolvedRow rootTuple = new ResolvedRow(builder);
        new ExplicitSchemaProjection(scanProj, oneColSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
        // Verify the full output schema
        // Since this mode is "discrete", we don't remember the type
        // of the missing column. (Instead, it is filled in at the
        // vector level as part of vector persistence.) During projection, it is
        // marked with type NULL so that the null column builder will fill in
        // the proper type.
        TupleMetadata expectedSchema = new SchemaBuilder().add("filename", MinorType.VARCHAR).add("a", MinorType.INT).addNullable("b", MinorType.NULL).buildSchema();
        // Verify
        List<ResolvedColumn> columns = rootTuple.columns();
        assertEquals(3, columns.size());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
        assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name());
        assertEquals("b.csv", ((FileMetadataColumn) columns.get(0)).value());
        assertTrue(columns.get(1) instanceof ResolvedTableColumn);
        assertTrue(columns.get(2) instanceof ResolvedNullColumn);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) List(java.util.List) FileMetadataColumn(org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 24 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestImplicitColumnParser method testRevisedWildcard.

/**
 * Test wildcard expansion.
 */
@Test
public void testRevisedWildcard() {
    Path filePath = new Path("hdfs:///w/x/y/z.csv");
    ImplicitColumnManager implictColManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectAll(), Lists.newArrayList(implictColManager.projectionParser()));
    List<ColumnProjection> cols = scanProj.columns();
    assertEquals(1, cols.size());
    assertTrue(scanProj.columns().get(0) instanceof UnresolvedWildcardColumn);
}
Also used : SchemaPath(org.apache.drill.common.expression.SchemaPath) Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) ColumnProjection(org.apache.drill.exec.physical.impl.scan.project.ColumnProjection) UnresolvedWildcardColumn(org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedWildcardColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

ImplicitColumnManager (org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager)24 Path (org.apache.hadoop.fs.Path)24 SubOperatorTest (org.apache.drill.test.SubOperatorTest)23 Test (org.junit.Test)23 SchemaPath (org.apache.drill.common.expression.SchemaPath)17 ScanLevelProjection (org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection)14 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)13 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)13 ColumnProjection (org.apache.drill.exec.physical.impl.scan.project.ColumnProjection)8 FileMetadataColumn (org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn)7 PartitionColumn (org.apache.drill.exec.physical.impl.scan.file.PartitionColumn)7 ResolvedRow (org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow)7 MockScanBuilder (org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder)6 UnresolvedWildcardColumn (org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedWildcardColumn)6 ScanSchemaOrchestrator (org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator)6 ScanOrchestratorBuilder (org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder)6 File (java.io.File)5 ReaderSchemaOrchestrator (org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator)5 ResultSetLoader (org.apache.drill.exec.physical.resultSet.ResultSetLoader)5 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)5