Search in sources :

Example 6 with FileMetadataColumn

use of org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn in project drill by apache.

the class TestImplicitColumnParser method testLegacyWildcardAndFileMetadataMixed.

/**
 * As above, but include implicit columns before and after the
 * wildcard.
 */
@Test
public void testLegacyWildcardAndFileMetadataMixed() {
    Path filePath = new Path("hdfs:///w/x/y/z.csv");
    ImplicitColumnOptions options = standardOptions(filePath);
    options.useLegacyWildcardExpansion(true);
    ImplicitColumnManager implictColManager = new ImplicitColumnManager(fixture.getOptionManager(), options);
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, SchemaPath.DYNAMIC_STAR, ScanTestUtils.SUFFIX_COL), Lists.newArrayList(implictColManager.projectionParser()));
    List<ColumnProjection> cols = scanProj.columns();
    assertEquals(5, cols.size());
    assertTrue(scanProj.columns().get(0) instanceof FileMetadataColumn);
    assertTrue(scanProj.columns().get(1) instanceof UnresolvedWildcardColumn);
    assertTrue(scanProj.columns().get(2) instanceof FileMetadataColumn);
    assertTrue(scanProj.columns().get(3) instanceof PartitionColumn);
    assertTrue(scanProj.columns().get(4) instanceof PartitionColumn);
}
Also used : SchemaPath(org.apache.drill.common.expression.SchemaPath) Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ImplicitColumnOptions(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager.ImplicitColumnOptions) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) ColumnProjection(org.apache.drill.exec.physical.impl.scan.project.ColumnProjection) UnresolvedWildcardColumn(org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedWildcardColumn) PartitionColumn(org.apache.drill.exec.physical.impl.scan.file.PartitionColumn) FileMetadataColumn(org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 7 with FileMetadataColumn

use of org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn in project drill by apache.

the class TestImplicitColumnParser method testLegacyWildcardAndFileMetadata.

/**
 * Combine wildcard and file metadata columns. The wildcard expands
 * table columns but not metadata columns.
 */
@Test
public void testLegacyWildcardAndFileMetadata() {
    Path filePath = new Path("hdfs:///w/x/y/z.csv");
    ImplicitColumnOptions options = standardOptions(filePath);
    options.useLegacyWildcardExpansion(true);
    ImplicitColumnManager implictColManager = new ImplicitColumnManager(fixture.getOptionManager(), options);
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, ScanTestUtils.FILE_NAME_COL, ScanTestUtils.SUFFIX_COL), Lists.newArrayList(implictColManager.projectionParser()));
    List<ColumnProjection> cols = scanProj.columns();
    assertEquals(5, cols.size());
    assertTrue(scanProj.columns().get(0) instanceof UnresolvedWildcardColumn);
    assertTrue(scanProj.columns().get(1) instanceof FileMetadataColumn);
    assertTrue(scanProj.columns().get(2) instanceof FileMetadataColumn);
    assertTrue(scanProj.columns().get(3) instanceof PartitionColumn);
    assertTrue(scanProj.columns().get(4) instanceof PartitionColumn);
}
Also used : SchemaPath(org.apache.drill.common.expression.SchemaPath) Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ImplicitColumnOptions(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager.ImplicitColumnOptions) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) ColumnProjection(org.apache.drill.exec.physical.impl.scan.project.ColumnProjection) UnresolvedWildcardColumn(org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedWildcardColumn) PartitionColumn(org.apache.drill.exec.physical.impl.scan.file.PartitionColumn) FileMetadataColumn(org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 8 with FileMetadataColumn

use of org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn in project drill by apache.

the class TestSchemaSmoothing method testDiscrete.

/**
 * Sanity test for the simple, discrete case. The purpose of
 * discrete is just to run the basic lifecycle in a way that
 * is compatible with the schema-persistence version.
 */
@Test
public void testDiscrete() {
    // Set up the file metadata manager
    Path filePathA = new Path("hdfs:///w/x/y/a.csv");
    Path filePathB = new Path("hdfs:///w/x/y/b.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(Lists.newArrayList(filePathA, filePathB)));
    // Set up the scan level projection
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, "a", "b"), ScanTestUtils.parsers(metadataManager.projectionParser()));
    {
        // Define a file a.csv
        metadataManager.startFile(filePathA);
        // Build the output schema from the (a, b) table schema
        TupleMetadata twoColSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
        final NullColumnBuilder builder = new NullBuilderBuilder().build();
        ResolvedRow rootTuple = new ResolvedRow(builder);
        new ExplicitSchemaProjection(scanProj, twoColSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
        // Verify the full output schema
        TupleMetadata expectedSchema = new SchemaBuilder().add("filename", MinorType.VARCHAR).add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
        // Verify
        List<ResolvedColumn> columns = rootTuple.columns();
        assertEquals(3, columns.size());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
        assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name());
        assertEquals("a.csv", ((FileMetadataColumn) columns.get(0)).value());
        assertTrue(columns.get(1) instanceof ResolvedTableColumn);
    }
    {
        // Define a file b.csv
        metadataManager.startFile(filePathB);
        // Build the output schema from the (a) table schema
        TupleMetadata oneColSchema = new SchemaBuilder().add("a", MinorType.INT).buildSchema();
        final NullColumnBuilder builder = new NullBuilderBuilder().build();
        ResolvedRow rootTuple = new ResolvedRow(builder);
        new ExplicitSchemaProjection(scanProj, oneColSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
        // Verify the full output schema
        // Since this mode is "discrete", we don't remember the type
        // of the missing column. (Instead, it is filled in at the
        // vector level as part of vector persistence.) During projection, it is
        // marked with type NULL so that the null column builder will fill in
        // the proper type.
        TupleMetadata expectedSchema = new SchemaBuilder().add("filename", MinorType.VARCHAR).add("a", MinorType.INT).addNullable("b", MinorType.NULL).buildSchema();
        // Verify
        List<ResolvedColumn> columns = rootTuple.columns();
        assertEquals(3, columns.size());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
        assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name());
        assertEquals("b.csv", ((FileMetadataColumn) columns.get(0)).value());
        assertTrue(columns.get(1) instanceof ResolvedTableColumn);
        assertTrue(columns.get(2) instanceof ResolvedNullColumn);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) List(java.util.List) FileMetadataColumn(org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 9 with FileMetadataColumn

use of org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn in project drill by apache.

the class TestConstantColumnLoader method testFileMetadata.

@Test
public void testFileMetadata() {
    FileMetadata fileInfo = new FileMetadata(new Path("hdfs:///w/x/y/z.csv"), new Path("hdfs:///w"));
    List<ConstantColumnSpec> defns = new ArrayList<>();
    FileMetadataColumnDefn iDefn = new FileMetadataColumnDefn(ScanTestUtils.SUFFIX_COL, ImplicitFileColumns.SUFFIX);
    FileMetadataColumn iCol = new FileMetadataColumn(ScanTestUtils.SUFFIX_COL, iDefn, fileInfo, null, 0);
    defns.add(iCol);
    String partColName = ScanTestUtils.partitionColName(1);
    PartitionColumn pCol = new PartitionColumn(partColName, 1, fileInfo, null, 0);
    defns.add(pCol);
    ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator());
    ConstantColumnLoader staticLoader = new ConstantColumnLoader(cache, defns);
    // Create a batch
    staticLoader.load(2);
    // Verify
    TupleMetadata expectedSchema = new SchemaBuilder().add(ScanTestUtils.SUFFIX_COL, MinorType.VARCHAR).addNullable(partColName, MinorType.VARCHAR).buildSchema();
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow("csv", "y").addRow("csv", "y").build();
    new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(staticLoader.load(2)));
    staticLoader.close();
}
Also used : Path(org.apache.hadoop.fs.Path) FileMetadataColumnDefn(org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumnDefn) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) FileMetadata(org.apache.drill.exec.physical.impl.scan.file.FileMetadata) ArrayList(java.util.ArrayList) PartitionColumn(org.apache.drill.exec.physical.impl.scan.file.PartitionColumn) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) ConstantColumnSpec(org.apache.drill.exec.physical.impl.scan.project.ConstantColumnLoader.ConstantColumnSpec) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) FileMetadataColumn(org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn) ResultVectorCacheImpl(org.apache.drill.exec.physical.resultSet.impl.ResultVectorCacheImpl) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

FileMetadataColumn (org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn)9 SubOperatorTest (org.apache.drill.test.SubOperatorTest)8 Path (org.apache.hadoop.fs.Path)8 Test (org.junit.Test)8 ImplicitColumnManager (org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager)7 ScanLevelProjection (org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection)6 SchemaPath (org.apache.drill.common.expression.SchemaPath)5 PartitionColumn (org.apache.drill.exec.physical.impl.scan.file.PartitionColumn)4 ColumnProjection (org.apache.drill.exec.physical.impl.scan.project.ColumnProjection)4 UnresolvedColumn (org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedColumn)3 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)3 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)3 FileMetadataColumnDefn (org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumnDefn)2 ImplicitColumnOptions (org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager.ImplicitColumnOptions)2 UnresolvedWildcardColumn (org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedWildcardColumn)2 NullBuilderBuilder (org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder)2 ResolvedRow (org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow)2 ArrayList (java.util.ArrayList)1 List (java.util.List)1 ColumnsArrayParser (org.apache.drill.exec.physical.impl.scan.columns.ColumnsArrayParser)1