use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.
the class TestScanOrchestratorImplicitColumns method testSelectNone.
/**
* Test SELECT c FROM table(a, b)
* The result set will be one null column for each record, but
* no file data.
*/
@Test
public void testSelectNone() {
ScanOrchestratorBuilder builder = new MockScanBuilder();
File file = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "csv", "1994", "Q1", "orders_94_q1.csv"), Paths.get("x", "y", "z.csv"));
Path filePath = new Path(file.toURI().getPath());
ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
builder.withImplicitColumns(metadataManager);
// SELECT c ...
builder.projection(RowSetTestUtils.projectList("c"));
ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
// ... FROM file
metadataManager.startFile(filePath);
ReaderSchemaOrchestrator reader = scanner.startReader();
// file schema (a, b)
TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
// Create the table loader
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
TupleMetadata expectedSchema = new SchemaBuilder().addNullable("c", MinorType.INT).buildSchema();
// Create a batch of data.
reader.startBatch();
loader.writer().addRow(1, "fred").addRow(2, "wilma");
reader.endBatch();
// Verify
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addSingleCol(null).addSingleCol(null).build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
scanner.close();
}
use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.
the class TestScanOrchestratorImplicitColumns method testEarlySchemaSelectAllAndMetadata.
/**
* Test SELECT a, b, dir0, suffix FROM table(a, b)
* dir0, suffix are file metadata columns
*/
@Test
public void testEarlySchemaSelectAllAndMetadata() {
// Null columns of type VARCHAR
MajorType nullType = MajorType.newBuilder().setMinorType(MinorType.VARCHAR).setMode(DataMode.OPTIONAL).build();
ScanOrchestratorBuilder builder = new MockScanBuilder();
builder.nullType(nullType);
File file = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "csv", "1994", "Q1", "orders_94_q1.csv"), Paths.get("x", "y", "z.csv"));
Path filePath = new Path(file.toURI().getPath());
ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
builder.withImplicitColumns(metadataManager);
// SELECT a, b, dir0, suffix ...
builder.projection(RowSetTestUtils.projectList("a", "b", "dir0", "suffix"));
ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
// ... FROM file
metadataManager.startFile(filePath);
ReaderSchemaOrchestrator reader = scanner.startReader();
// file schema (a, b)
TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
// Create the table loader
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
// Verify empty batch.
reader.defineSchema();
TupleMetadata expectedSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addNullable("dir0", MinorType.VARCHAR).add("suffix", MinorType.VARCHAR).buildSchema();
{
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).build();
assertNotNull(scanner.output());
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
}
// Create a batch of data.
reader.startBatch();
loader.writer().addRow(1, "fred").addRow(2, "wilma");
reader.endBatch();
// Verify
{
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(1, "fred", "x", "csv").addRow(2, "wilma", "x", "csv").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
}
scanner.close();
}
use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.
the class TestScanOrchestratorImplicitColumns method testWildcardWithMetadata.
/**
* Resolve a selection list using SELECT *.
*/
@Test
public void testWildcardWithMetadata() throws IOException {
File file = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "csv", "1994", "Q1", "orders_94_q1.csv"), Paths.get("x", "y", "z.csv"));
Path filePath = new Path(file.toURI().getPath());
DrillFileSystem fileSystem = new DrillFileSystem(new Configuration());
ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath), fileSystem);
ScanOrchestratorBuilder builder = new MockScanBuilder();
builder.withImplicitColumns(metadataManager);
// SELECT *, filename, suffix ...
builder.projection(RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, ScanTestUtils.FULLY_QUALIFIED_NAME_COL, ScanTestUtils.FILE_PATH_COL, ScanTestUtils.FILE_NAME_COL, ScanTestUtils.SUFFIX_COL, ScanTestUtils.LAST_MODIFIED_TIME_COL, ScanTestUtils.PROJECT_METADATA_COL, ScanTestUtils.partitionColName(0), ScanTestUtils.partitionColName(1)));
ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
// ... FROM file
metadataManager.startFile(filePath);
ReaderSchemaOrchestrator reader = scanner.startReader();
// file schema (a, b)
TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
// Create a batch of data.
reader.startBatch();
loader.writer().addRow(1, "fred").addRow(2, "wilma");
reader.endBatch();
// Verify
TupleMetadata expectedSchema = ScanTestUtils.expandImplicit(tableSchema, metadataManager, 2);
String fqn = ImplicitFileColumns.FQN.getValue(filePath);
String filePathValue = ImplicitFileColumns.FILEPATH.getValue(filePath);
String fileName = ImplicitFileColumns.FILENAME.getValue(filePath);
String suffix = ImplicitFileColumns.SUFFIX.getValue(filePath);
String lastModifiedTime = ColumnExplorer.getImplicitColumnValue(ImplicitInternalFileColumns.LAST_MODIFIED_TIME, filePath, fileSystem);
String projectMetadata = ColumnExplorer.getImplicitColumnValue(ImplicitInternalFileColumns.USE_METADATA, filePath, fileSystem);
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(1, "fred", fqn, filePathValue, fileName, suffix, lastModifiedTime, projectMetadata, "x", "y").addRow(2, "wilma", fqn, filePathValue, fileName, suffix, lastModifiedTime, projectMetadata, "x", "y").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
scanner.close();
}
use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.
the class TestScanOrchestratorImplicitColumns method testMetadataMulti.
/**
* Verify that metadata columns follow distinct files
* <br>
* SELECT dir0, filename, b FROM (a.csv, b.csv)
*/
@Test
public void testMetadataMulti() {
ScanOrchestratorBuilder builder = new MockScanBuilder();
File file = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "csv", "1994", "Q1", "orders_94_q1.csv"), Paths.get("x", "y", "a.csv"));
Path filePathA = new Path(file.toURI().getPath());
File file2 = dirTestWatcher.copyResourceToRoot(Paths.get("multilevel", "csv", "1994", "Q2", "orders_94_q2.csv"), Paths.get("x", "b.csv"));
Path filePathB = new Path(file2.toURI().getPath());
ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(Lists.newArrayList(filePathA, filePathB)));
builder.withImplicitColumns(metadataManager);
// SELECT dir0, dir1, filename, b ...
builder.projection(RowSetTestUtils.projectList(ScanTestUtils.partitionColName(0), ScanTestUtils.partitionColName(1), ScanTestUtils.FILE_NAME_COL, "b"));
ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
// file schema (a, b)
TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
TupleMetadata expectedSchema = new SchemaBuilder().addNullable(ScanTestUtils.partitionColName(0), MinorType.VARCHAR).addNullable(ScanTestUtils.partitionColName(1), MinorType.VARCHAR).add(ScanTestUtils.FILE_NAME_COL, MinorType.VARCHAR).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
SchemaTracker tracker = new SchemaTracker();
int schemaVersion;
{
// ... FROM file a.csv
metadataManager.startFile(filePathA);
ReaderSchemaOrchestrator reader = scanner.startReader();
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
reader.startBatch();
loader.writer().addRow(10, "fred").addRow(20, "wilma");
reader.endBatch();
tracker.trackSchema(scanner.output());
schemaVersion = tracker.schemaVersion();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow("x", "y", "a.csv", "fred").addRow("x", "y", "a.csv", "wilma").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
// Do explicit close (as in real code) to avoid an implicit
// close which will blow away the current file info...
scanner.closeReader();
}
{
// ... FROM file b.csv
metadataManager.startFile(filePathB);
ReaderSchemaOrchestrator reader = scanner.startReader();
ResultSetLoader loader = reader.makeTableLoader(tableSchema);
reader.startBatch();
loader.writer().addRow(30, "bambam").addRow(40, "betty");
reader.endBatch();
tracker.trackSchema(scanner.output());
assertEquals(schemaVersion, tracker.schemaVersion());
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow("x", null, "b.csv", "bambam").addRow("x", null, "b.csv", "betty").build();
RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
scanner.closeReader();
}
scanner.close();
}
use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.
the class TestImplicitColumnProjection method testPartitionColumnTwoDigits.
/**
* Test the obscure case that the partition column contains two digits:
* dir11. Also tests the obscure case that the output only has partition
* columns.
*/
@Test
public void testPartitionColumnTwoDigits() {
Path filePath = new Path("hdfs:///x/0/1/2/3/4/5/6/7/8/9/10/d11/z.csv");
ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList("dir11"), ScanTestUtils.parsers(metadataManager.projectionParser()));
TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.VARCHAR).buildSchema();
metadataManager.startFile(filePath);
NullColumnBuilder builder = new NullBuilderBuilder().build();
ResolvedRow rootTuple = new ResolvedRow(builder);
new ExplicitSchemaProjection(scanProj, tableSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
List<ResolvedColumn> columns = rootTuple.columns();
assertEquals(1, columns.size());
assertEquals("d11", ((MetadataColumn) columns.get(0)).value());
}
Aggregations