use of org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn in project drill by apache.
the class TestImplicitColumnParser method testLegacyWildcardAndFileMetadataMixed.
/**
* As above, but include implicit columns before and after the
* wildcard.
*/
@Test
public void testLegacyWildcardAndFileMetadataMixed() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
ImplicitColumnOptions options = standardOptions(filePath);
options.useLegacyWildcardExpansion(true);
ImplicitColumnManager implictColManager = new ImplicitColumnManager(fixture.getOptionManager(), options);
ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, SchemaPath.DYNAMIC_STAR, ScanTestUtils.SUFFIX_COL), Lists.newArrayList(implictColManager.projectionParser()));
List<ColumnProjection> cols = scanProj.columns();
assertEquals(5, cols.size());
assertTrue(scanProj.columns().get(0) instanceof FileMetadataColumn);
assertTrue(scanProj.columns().get(1) instanceof UnresolvedWildcardColumn);
assertTrue(scanProj.columns().get(2) instanceof FileMetadataColumn);
assertTrue(scanProj.columns().get(3) instanceof PartitionColumn);
assertTrue(scanProj.columns().get(4) instanceof PartitionColumn);
}
use of org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn in project drill by apache.
the class TestImplicitColumnParser method testLegacyWildcardAndFileMetadata.
/**
* Combine wildcard and file metadata columns. The wildcard expands
* table columns but not metadata columns.
*/
@Test
public void testLegacyWildcardAndFileMetadata() {
Path filePath = new Path("hdfs:///w/x/y/z.csv");
ImplicitColumnOptions options = standardOptions(filePath);
options.useLegacyWildcardExpansion(true);
ImplicitColumnManager implictColManager = new ImplicitColumnManager(fixture.getOptionManager(), options);
ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, ScanTestUtils.FILE_NAME_COL, ScanTestUtils.SUFFIX_COL), Lists.newArrayList(implictColManager.projectionParser()));
List<ColumnProjection> cols = scanProj.columns();
assertEquals(5, cols.size());
assertTrue(scanProj.columns().get(0) instanceof UnresolvedWildcardColumn);
assertTrue(scanProj.columns().get(1) instanceof FileMetadataColumn);
assertTrue(scanProj.columns().get(2) instanceof FileMetadataColumn);
assertTrue(scanProj.columns().get(3) instanceof PartitionColumn);
assertTrue(scanProj.columns().get(4) instanceof PartitionColumn);
}
use of org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn in project drill by apache.
the class TestSchemaSmoothing method testDiscrete.
/**
* Sanity test for the simple, discrete case. The purpose of
* discrete is just to run the basic lifecycle in a way that
* is compatible with the schema-persistence version.
*/
@Test
public void testDiscrete() {
// Set up the file metadata manager
Path filePathA = new Path("hdfs:///w/x/y/a.csv");
Path filePathB = new Path("hdfs:///w/x/y/b.csv");
ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(Lists.newArrayList(filePathA, filePathB)));
// Set up the scan level projection
ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, "a", "b"), ScanTestUtils.parsers(metadataManager.projectionParser()));
{
// Define a file a.csv
metadataManager.startFile(filePathA);
// Build the output schema from the (a, b) table schema
TupleMetadata twoColSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
final NullColumnBuilder builder = new NullBuilderBuilder().build();
ResolvedRow rootTuple = new ResolvedRow(builder);
new ExplicitSchemaProjection(scanProj, twoColSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
// Verify the full output schema
TupleMetadata expectedSchema = new SchemaBuilder().add("filename", MinorType.VARCHAR).add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
// Verify
List<ResolvedColumn> columns = rootTuple.columns();
assertEquals(3, columns.size());
assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name());
assertEquals("a.csv", ((FileMetadataColumn) columns.get(0)).value());
assertTrue(columns.get(1) instanceof ResolvedTableColumn);
}
{
// Define a file b.csv
metadataManager.startFile(filePathB);
// Build the output schema from the (a) table schema
TupleMetadata oneColSchema = new SchemaBuilder().add("a", MinorType.INT).buildSchema();
final NullColumnBuilder builder = new NullBuilderBuilder().build();
ResolvedRow rootTuple = new ResolvedRow(builder);
new ExplicitSchemaProjection(scanProj, oneColSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
// Verify the full output schema
// Since this mode is "discrete", we don't remember the type
// of the missing column. (Instead, it is filled in at the
// vector level as part of vector persistence.) During projection, it is
// marked with type NULL so that the null column builder will fill in
// the proper type.
TupleMetadata expectedSchema = new SchemaBuilder().add("filename", MinorType.VARCHAR).add("a", MinorType.INT).addNullable("b", MinorType.NULL).buildSchema();
// Verify
List<ResolvedColumn> columns = rootTuple.columns();
assertEquals(3, columns.size());
assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name());
assertEquals("b.csv", ((FileMetadataColumn) columns.get(0)).value());
assertTrue(columns.get(1) instanceof ResolvedTableColumn);
assertTrue(columns.get(2) instanceof ResolvedNullColumn);
}
}
use of org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn in project drill by apache.
the class TestConstantColumnLoader method testFileMetadata.
@Test
public void testFileMetadata() {
FileMetadata fileInfo = new FileMetadata(new Path("hdfs:///w/x/y/z.csv"), new Path("hdfs:///w"));
List<ConstantColumnSpec> defns = new ArrayList<>();
FileMetadataColumnDefn iDefn = new FileMetadataColumnDefn(ScanTestUtils.SUFFIX_COL, ImplicitFileColumns.SUFFIX);
FileMetadataColumn iCol = new FileMetadataColumn(ScanTestUtils.SUFFIX_COL, iDefn, fileInfo, null, 0);
defns.add(iCol);
String partColName = ScanTestUtils.partitionColName(1);
PartitionColumn pCol = new PartitionColumn(partColName, 1, fileInfo, null, 0);
defns.add(pCol);
ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator());
ConstantColumnLoader staticLoader = new ConstantColumnLoader(cache, defns);
// Create a batch
staticLoader.load(2);
// Verify
TupleMetadata expectedSchema = new SchemaBuilder().add(ScanTestUtils.SUFFIX_COL, MinorType.VARCHAR).addNullable(partColName, MinorType.VARCHAR).buildSchema();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow("csv", "y").addRow("csv", "y").build();
new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(staticLoader.load(2)));
staticLoader.close();
}
Aggregations