Search in sources :

Example 16 with ResolvedRow

use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.

the class TestSchemaSmoothing method testReordering.

/**
 * Preserve the prior schema if table is a subset. Map the table
 * columns to the output using the prior schema ordering.
 */
@Test
public void testReordering() {
    final ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectAll(), ScanTestUtils.parsers());
    final SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers());
    final TupleMetadata priorSchema = new SchemaBuilder().addNullable("a", MinorType.INT).add("b", MinorType.VARCHAR).addArray("c", MinorType.BIGINT).buildSchema();
    final TupleMetadata tableSchema = new SchemaBuilder().add("b", MinorType.VARCHAR).addNullable("a", MinorType.INT).buildSchema();
    {
        doResolve(smoother, priorSchema);
    }
    {
        final ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertEquals(1, smoother.schemaVersion());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema));
    }
}
Also used : ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 17 with ResolvedRow

use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.

the class TestSchemaSmoothing method testMissingNullableColumns.

/**
 * Preserve the prior schema if table is a subset and missing columns
 * are nullable or repeated.
 */
@Test
public void testMissingNullableColumns() {
    final ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectAll(), ScanTestUtils.parsers());
    final SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers());
    final TupleMetadata priorSchema = new SchemaBuilder().addNullable("a", MinorType.INT).add("b", MinorType.VARCHAR).addArray("c", MinorType.BIGINT).buildSchema();
    final TupleMetadata tableSchema = new SchemaBuilder().add("b", MinorType.VARCHAR).buildSchema();
    {
        doResolve(smoother, priorSchema);
    }
    {
        final ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertEquals(1, smoother.schemaVersion());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema));
    }
}
Also used : ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 18 with ResolvedRow

use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.

the class TestSchemaSmoothing method testLongerPartitionLength.

/**
 * If using the legacy wildcard expansion, we are able to use the same
 * schema even if the new partition path is longer than the previous.
 * Because all file names are provided up front.
 */
@Test
public void testLongerPartitionLength() {
    // Set up the file metadata manager
    Path filePathA = new Path("hdfs:///w/x/a.csv");
    Path filePathB = new Path("hdfs:///w/x/y/b.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(Lists.newArrayList(filePathA, filePathB)));
    // Set up the scan level projection
    ScanLevelProjection scanProj = ScanLevelProjection.build(ScanTestUtils.projectAllWithAllImplicit(2), ScanTestUtils.parsers(metadataManager.projectionParser()));
    // Define the schema smoother
    SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers(metadataManager));
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    TupleMetadata expectedSchema = ScanTestUtils.expandImplicit(tableSchema, metadataManager, 2);
    {
        metadataManager.startFile(filePathA);
        ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
    }
    {
        metadataManager.startFile(filePathB);
        ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertEquals(1, smoother.schemaVersion());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 19 with ResolvedRow

use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.

the class TestSchemaSmoothing method testSmaller.

/**
 * Case in which the table schema is a superset of the prior
 * schema. Discard prior schema. Turn off auto expansion of
 * metadata for a simpler test.
 */
@Test
public void testSmaller() {
    final ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectAll(), ScanTestUtils.parsers());
    final SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers());
    final TupleMetadata priorSchema = new SchemaBuilder().add("a", MinorType.INT).buildSchema();
    final TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    {
        final NullColumnBuilder builder = new NullBuilderBuilder().build();
        final ResolvedRow rootTuple = new ResolvedRow(builder);
        smoother.resolve(priorSchema, rootTuple);
        assertEquals(1, smoother.schemaVersion());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema));
    }
    {
        final NullColumnBuilder builder = new NullBuilderBuilder().build();
        final ResolvedRow rootTuple = new ResolvedRow(builder);
        smoother.resolve(tableSchema, rootTuple);
        assertEquals(2, smoother.schemaVersion());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(tableSchema));
    }
}
Also used : ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 20 with ResolvedRow

use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.

the class TestSchemaSmoothing method testShorterPartitionLength.

/**
 * If using the legacy wildcard expansion, reuse schema if the new partition path
 * is shorter than the previous. (Unneeded partitions will be set to null by the
 * scan projector.)
 */
@Test
public void testShorterPartitionLength() {
    // Set up the file metadata manager
    Path filePathA = new Path("hdfs:///w/x/y/a.csv");
    Path filePathB = new Path("hdfs:///w/x/b.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(Lists.newArrayList(filePathA, filePathB)));
    // Set up the scan level projection
    ScanLevelProjection scanProj = ScanLevelProjection.build(ScanTestUtils.projectAllWithAllImplicit(2), ScanTestUtils.parsers(metadataManager.projectionParser()));
    // Define the schema smoother
    SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers(metadataManager));
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    TupleMetadata expectedSchema = ScanTestUtils.expandImplicit(tableSchema, metadataManager, 2);
    {
        metadataManager.startFile(filePathA);
        ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
    }
    {
        metadataManager.startFile(filePathB);
        ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertEquals(1, smoother.schemaVersion());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

ResolvedRow (org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow)37 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)36 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)36 SubOperatorTest (org.apache.drill.test.SubOperatorTest)36 Test (org.junit.Test)36 NullBuilderBuilder (org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder)24 ImplicitColumnManager (org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager)7 Path (org.apache.hadoop.fs.Path)7 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)6 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)6 VectorContainer (org.apache.drill.exec.record.VectorContainer)6 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)6 ResultVectorCache (org.apache.drill.exec.physical.resultSet.ResultVectorCache)4 NullResultVectorCacheImpl (org.apache.drill.exec.physical.resultSet.impl.NullResultVectorCacheImpl)4 ExplicitSchemaProjection (org.apache.drill.exec.physical.impl.scan.project.ExplicitSchemaProjection)3 NullColumnBuilder (org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder)3 ResolvedColumn (org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn)3 ScanLevelProjection (org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection)3 List (java.util.List)2 UserException (org.apache.drill.common.exceptions.UserException)2