Search in sources :

Example 26 with ResolvedRow

use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.

the class TestSchemaSmoothing method testDiscrete.

/**
 * Sanity test for the simple, discrete case. The purpose of
 * discrete is just to run the basic lifecycle in a way that
 * is compatible with the schema-persistence version.
 */
@Test
public void testDiscrete() {
    // Set up the file metadata manager
    Path filePathA = new Path("hdfs:///w/x/y/a.csv");
    Path filePathB = new Path("hdfs:///w/x/y/b.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(Lists.newArrayList(filePathA, filePathB)));
    // Set up the scan level projection
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, "a", "b"), ScanTestUtils.parsers(metadataManager.projectionParser()));
    {
        // Define a file a.csv
        metadataManager.startFile(filePathA);
        // Build the output schema from the (a, b) table schema
        TupleMetadata twoColSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
        final NullColumnBuilder builder = new NullBuilderBuilder().build();
        ResolvedRow rootTuple = new ResolvedRow(builder);
        new ExplicitSchemaProjection(scanProj, twoColSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
        // Verify the full output schema
        TupleMetadata expectedSchema = new SchemaBuilder().add("filename", MinorType.VARCHAR).add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
        // Verify
        List<ResolvedColumn> columns = rootTuple.columns();
        assertEquals(3, columns.size());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
        assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name());
        assertEquals("a.csv", ((FileMetadataColumn) columns.get(0)).value());
        assertTrue(columns.get(1) instanceof ResolvedTableColumn);
    }
    {
        // Define a file b.csv
        metadataManager.startFile(filePathB);
        // Build the output schema from the (a) table schema
        TupleMetadata oneColSchema = new SchemaBuilder().add("a", MinorType.INT).buildSchema();
        final NullColumnBuilder builder = new NullBuilderBuilder().build();
        ResolvedRow rootTuple = new ResolvedRow(builder);
        new ExplicitSchemaProjection(scanProj, oneColSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
        // Verify the full output schema
        // Since this mode is "discrete", we don't remember the type
        // of the missing column. (Instead, it is filled in at the
        // vector level as part of vector persistence.) During projection, it is
        // marked with type NULL so that the null column builder will fill in
        // the proper type.
        TupleMetadata expectedSchema = new SchemaBuilder().add("filename", MinorType.VARCHAR).add("a", MinorType.INT).addNullable("b", MinorType.NULL).buildSchema();
        // Verify
        List<ResolvedColumn> columns = rootTuple.columns();
        assertEquals(3, columns.size());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
        assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name());
        assertEquals("b.csv", ((FileMetadataColumn) columns.get(0)).value());
        assertTrue(columns.get(1) instanceof ResolvedTableColumn);
        assertTrue(columns.get(2) instanceof ResolvedNullColumn);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) List(java.util.List) FileMetadataColumn(org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 27 with ResolvedRow

use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.

the class TestSchemaSmoothing method testDifferentTypes.

/**
 * Column names match, but types differ. Discard the prior schema.
 */
@Test
public void testDifferentTypes() {
    final ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectAll(), ScanTestUtils.parsers());
    final SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers());
    final TupleMetadata priorSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    final TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).buildSchema();
    {
        doResolve(smoother, priorSchema);
    }
    {
        final ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertEquals(2, smoother.schemaVersion());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(tableSchema));
    }
}
Also used : ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 28 with ResolvedRow

use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.

the class TestSchemaSmoothing method testRequired.

/**
 * Can't preserve the prior schema if it had required columns
 * where the new schema has no columns.
 */
@Test
public void testRequired() {
    final ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectAll(), ScanTestUtils.parsers());
    final SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers());
    final TupleMetadata priorSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).buildSchema();
    final TupleMetadata tableSchema = new SchemaBuilder().addNullable("b", MinorType.VARCHAR).buildSchema();
    {
        doResolve(smoother, priorSchema);
    }
    {
        final ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertEquals(2, smoother.schemaVersion());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(tableSchema));
    }
}
Also used : ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 29 with ResolvedRow

use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.

the class TestSchemaSmoothing method testDifferentCase.

/**
 * The prior and table schemas are identical, but the cases of names differ.
 * Preserve the case of the first schema.
 */
@Test
public void testDifferentCase() {
    final ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectAll(), ScanTestUtils.parsers());
    final SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers());
    final TupleMetadata priorSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    final TupleMetadata tableSchema = new SchemaBuilder().add("A", MinorType.INT).add("B", MinorType.VARCHAR).buildSchema();
    {
        doResolve(smoother, priorSchema);
    }
    {
        final ResolvedRow rootTuple = doResolve(smoother, tableSchema);
        assertEquals(1, smoother.schemaVersion());
        assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema));
        final List<ResolvedColumn> columns = rootTuple.columns();
        assertEquals("a", columns.get(0).name());
    }
}
Also used : ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) List(java.util.List) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 30 with ResolvedRow

use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.

the class TestSchemaSmoothing method testSmoothableSchemaBatches.

/**
 * Integrated test across multiple schemas at the batch level.
 */
@Test
public void testSmoothableSchemaBatches() {
    final ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectAll(), ScanTestUtils.parsers());
    final SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers());
    // Table 1: (a: bigint, b)
    final TupleMetadata schema1 = new SchemaBuilder().addNullable("a", MinorType.BIGINT).addNullable("b", MinorType.VARCHAR).add("c", MinorType.FLOAT8).buildSchema();
    {
        final ResolvedRow rootTuple = doResolve(smoother, schema1);
        // Just use the original schema.
        assertTrue(schema1.isEquivalent(ScanTestUtils.schema(rootTuple)));
        assertEquals(1, smoother.schemaVersion());
    }
    // Table 2: (a: nullable bigint, c), column ommitted, original schema preserved
    final TupleMetadata schema2 = new SchemaBuilder().addNullable("a", MinorType.BIGINT).add("c", MinorType.FLOAT8).buildSchema();
    {
        final ResolvedRow rootTuple = doResolve(smoother, schema2);
        assertTrue(schema1.isEquivalent(ScanTestUtils.schema(rootTuple)));
        assertEquals(1, smoother.schemaVersion());
    }
    // Table 3: (a, c, d), column added, must replan schema
    final TupleMetadata schema3 = new SchemaBuilder().addNullable("a", MinorType.BIGINT).addNullable("b", MinorType.VARCHAR).add("c", MinorType.FLOAT8).add("d", MinorType.INT).buildSchema();
    {
        final ResolvedRow rootTuple = doResolve(smoother, schema3);
        assertTrue(schema3.isEquivalent(ScanTestUtils.schema(rootTuple)));
        assertEquals(2, smoother.schemaVersion());
    }
    // Table 4: Drop a non-nullable column, must replan
    final TupleMetadata schema4 = new SchemaBuilder().addNullable("a", MinorType.BIGINT).addNullable("b", MinorType.VARCHAR).buildSchema();
    {
        final ResolvedRow rootTuple = doResolve(smoother, schema4);
        assertTrue(schema4.isEquivalent(ScanTestUtils.schema(rootTuple)));
        assertEquals(3, smoother.schemaVersion());
    }
    // Table 5: (a: double), change type must replan schema
    final TupleMetadata schema5 = new SchemaBuilder().addNullable("a", MinorType.FLOAT8).addNullable("b", MinorType.VARCHAR).buildSchema();
    {
        final ResolvedRow rootTuple = doResolve(smoother, schema5);
        assertTrue(schema5.isEquivalent(ScanTestUtils.schema(rootTuple)));
        assertEquals(4, smoother.schemaVersion());
    }
}
Also used : ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

ResolvedRow (org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow)37 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)36 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)36 SubOperatorTest (org.apache.drill.test.SubOperatorTest)36 Test (org.junit.Test)36 NullBuilderBuilder (org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder)24 ImplicitColumnManager (org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager)7 Path (org.apache.hadoop.fs.Path)7 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)6 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)6 VectorContainer (org.apache.drill.exec.record.VectorContainer)6 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)6 ResultVectorCache (org.apache.drill.exec.physical.resultSet.ResultVectorCache)4 NullResultVectorCacheImpl (org.apache.drill.exec.physical.resultSet.impl.NullResultVectorCacheImpl)4 ExplicitSchemaProjection (org.apache.drill.exec.physical.impl.scan.project.ExplicitSchemaProjection)3 NullColumnBuilder (org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder)3 ResolvedColumn (org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn)3 ScanLevelProjection (org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection)3 List (java.util.List)2 UserException (org.apache.drill.common.exceptions.UserException)2