Search in sources :

Example 11 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestImplicitColumnParser method testPreferedWildcardExpansionWithOverlap.

@Test
public void testPreferedWildcardExpansionWithOverlap() {
    Path filePath = new Path("hdfs:///w/x/y/z.csv");
    ImplicitColumnOptions options = standardOptions(filePath);
    options.useLegacyWildcardExpansion(true);
    ImplicitColumnManager implictColManager = new ImplicitColumnManager(fixture.getOptionManager(), options);
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, ScanTestUtils.partitionColName(1)), Lists.newArrayList(implictColManager.projectionParser()));
    List<ColumnProjection> cols = scanProj.columns();
    assertEquals(3, cols.size());
    assertTrue(scanProj.columns().get(0) instanceof UnresolvedWildcardColumn);
    assertTrue(scanProj.columns().get(1) instanceof PartitionColumn);
    assertEquals(1, ((PartitionColumn) cols.get(1)).partition());
    assertTrue(scanProj.columns().get(2) instanceof PartitionColumn);
    assertEquals(0, ((PartitionColumn) cols.get(2)).partition());
}
Also used : SchemaPath(org.apache.drill.common.expression.SchemaPath) Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ImplicitColumnOptions(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager.ImplicitColumnOptions) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) ColumnProjection(org.apache.drill.exec.physical.impl.scan.project.ColumnProjection) UnresolvedWildcardColumn(org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedWildcardColumn) PartitionColumn(org.apache.drill.exec.physical.impl.scan.file.PartitionColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 12 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestImplicitColumnParser method testPreferredPartitionExpansion.

@Test
public void testPreferredPartitionExpansion() {
    Path filePath = new Path("hdfs:///w/x/y/z.csv");
    ImplicitColumnOptions options = standardOptions(filePath);
    options.useLegacyWildcardExpansion(true);
    ImplicitColumnManager implictColManager = new ImplicitColumnManager(fixture.getOptionManager(), options);
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(SchemaPath.DYNAMIC_STAR, ScanTestUtils.partitionColName(8)), Lists.newArrayList(implictColManager.projectionParser()));
    List<ColumnProjection> cols = scanProj.columns();
    assertEquals(4, cols.size());
    assertTrue(scanProj.columns().get(0) instanceof UnresolvedWildcardColumn);
    assertTrue(scanProj.columns().get(1) instanceof PartitionColumn);
    assertEquals(8, ((PartitionColumn) cols.get(1)).partition());
    assertTrue(scanProj.columns().get(2) instanceof PartitionColumn);
    assertEquals(0, ((PartitionColumn) cols.get(2)).partition());
    assertTrue(scanProj.columns().get(3) instanceof PartitionColumn);
    assertEquals(1, ((PartitionColumn) cols.get(3)).partition());
}
Also used : SchemaPath(org.apache.drill.common.expression.SchemaPath) Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ImplicitColumnOptions(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager.ImplicitColumnOptions) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) ColumnProjection(org.apache.drill.exec.physical.impl.scan.project.ColumnProjection) UnresolvedWildcardColumn(org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedWildcardColumn) PartitionColumn(org.apache.drill.exec.physical.impl.scan.file.PartitionColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 13 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestSchemaSmoothing method testLongerPartitionLength.

/**
 * If using the legacy wildcard expansion, we are able to use the same
 * schema even if the new partition path is longer than the previous.
 * Because all file names are provided up front.
 */
@Test
public void testLongerPartitionLength() {
    // Set up the file metadata manager
    Path filePathA = new Path("hdfs:///w/x/a.csv");
    Path filePathB = new Path("hdfs:///w/x/y/b.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(Lists.newArrayList(filePathA, filePathB)));
    // Set up the scan level projection
    ScanLevelProjection scanProj = ScanLevelProjection.build(ScanTestUtils.projectAllWithAllImplicit(2), ScanTestUtils.parsers(metadataManager.projectionParser()));
    // Define the schema smoother
    SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers(metadataManager));
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    TupleMetadata expectedSchema = ScanTestUtils.expandImplicit(tableSchema, metadataManager, 2);
    {
        metadataManager.startFile(filePathA);
        ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
    }
    {
        metadataManager.startFile(filePathB);
        ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertEquals(1, smoother.schemaVersion());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 14 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestSchemaSmoothing method testShorterPartitionLength.

/**
 * If using the legacy wildcard expansion, reuse schema if the new partition path
 * is shorter than the previous. (Unneeded partitions will be set to null by the
 * scan projector.)
 */
@Test
public void testShorterPartitionLength() {
    // Set up the file metadata manager
    Path filePathA = new Path("hdfs:///w/x/y/a.csv");
    Path filePathB = new Path("hdfs:///w/x/b.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(Lists.newArrayList(filePathA, filePathB)));
    // Set up the scan level projection
    ScanLevelProjection scanProj = ScanLevelProjection.build(ScanTestUtils.projectAllWithAllImplicit(2), ScanTestUtils.parsers(metadataManager.projectionParser()));
    // Define the schema smoother
    SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers(metadataManager));
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    TupleMetadata expectedSchema = ScanTestUtils.expandImplicit(tableSchema, metadataManager, 2);
    {
        metadataManager.startFile(filePathA);
        ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
    }
    {
        metadataManager.startFile(filePathB);
        ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertEquals(1, smoother.schemaVersion());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 15 with ImplicitColumnManager

use of org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager in project drill by apache.

the class TestSchemaSmoothing method testSamePartitionLength.

/**
 * If using the legacy wildcard expansion, reuse schema if partition paths
 * are the same length.
 */
@Test
public void testSamePartitionLength() {
    // Set up the file metadata manager
    Path filePathA = new Path("hdfs:///w/x/y/a.csv");
    Path filePathB = new Path("hdfs:///w/x/y/b.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(Lists.newArrayList(filePathA, filePathB)));
    // Set up the scan level projection
    ScanLevelProjection scanProj = ScanLevelProjection.build(ScanTestUtils.projectAllWithAllImplicit(2), ScanTestUtils.parsers(metadataManager.projectionParser()));
    // Define the schema smoother
    SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers(metadataManager));
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    TupleMetadata expectedSchema = ScanTestUtils.expandImplicit(tableSchema, metadataManager, 2);
    {
        metadataManager.startFile(filePathA);
        ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
    }
    {
        metadataManager.startFile(filePathB);
        ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertEquals(1, smoother.schemaVersion());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

ImplicitColumnManager (org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager)24 Path (org.apache.hadoop.fs.Path)24 SubOperatorTest (org.apache.drill.test.SubOperatorTest)23 Test (org.junit.Test)23 SchemaPath (org.apache.drill.common.expression.SchemaPath)17 ScanLevelProjection (org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection)14 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)13 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)13 ColumnProjection (org.apache.drill.exec.physical.impl.scan.project.ColumnProjection)8 FileMetadataColumn (org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn)7 PartitionColumn (org.apache.drill.exec.physical.impl.scan.file.PartitionColumn)7 ResolvedRow (org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow)7 MockScanBuilder (org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder)6 UnresolvedWildcardColumn (org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedWildcardColumn)6 ScanSchemaOrchestrator (org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator)6 ScanOrchestratorBuilder (org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder)6 File (java.io.File)5 ReaderSchemaOrchestrator (org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator)5 ResultSetLoader (org.apache.drill.exec.physical.resultSet.ResultSetLoader)5 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)5