Search in sources :

Example 1 with NullBuilderBuilder

use of org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder in project drill by apache.

the class TestImplicitColumnProjection method testPartitionColumnTwoDigits.

/**
 * Test the obscure case that the partition column contains two digits:
 * dir11. Also tests the obscure case that the output only has partition
 * columns.
 */
@Test
public void testPartitionColumnTwoDigits() {
    Path filePath = new Path("hdfs:///x/0/1/2/3/4/5/6/7/8/9/10/d11/z.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList("dir11"), ScanTestUtils.parsers(metadataManager.projectionParser()));
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.VARCHAR).buildSchema();
    metadataManager.startFile(filePath);
    NullColumnBuilder builder = new NullBuilderBuilder().build();
    ResolvedRow rootTuple = new ResolvedRow(builder);
    new ExplicitSchemaProjection(scanProj, tableSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
    List<ResolvedColumn> columns = rootTuple.columns();
    assertEquals(1, columns.size());
    assertEquals("d11", ((MetadataColumn) columns.get(0)).value());
}
Also used : Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) NullColumnBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder) ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) ExplicitSchemaProjection(org.apache.drill.exec.physical.impl.scan.project.ExplicitSchemaProjection) ResolvedColumn(org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 2 with NullBuilderBuilder

use of org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder in project drill by apache.

the class TestImplicitColumnProjection method testProjectList.

/**
 * Test the file projection planner with metadata.
 */
@Test
public void testProjectList() {
    Path filePath = new Path("hdfs:///w/x/y/z.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, "a", ScanTestUtils.partitionColName(0)), ScanTestUtils.parsers(metadataManager.projectionParser()));
    assertEquals(3, scanProj.columns().size());
    // Scan-level projection: defines the columns
    {
        assertTrue(scanProj.columns().get(0) instanceof FileMetadataColumn);
        FileMetadataColumn col0 = (FileMetadataColumn) scanProj.columns().get(0);
        assertTrue(col0 instanceof FileMetadataColumn);
        assertEquals(ScanTestUtils.FILE_NAME_COL, col0.name());
        assertEquals(MinorType.VARCHAR, col0.schema().getType().getMinorType());
        assertEquals(DataMode.REQUIRED, col0.schema().getType().getMode());
        ColumnProjection col1 = scanProj.columns().get(1);
        assertTrue(col1 instanceof UnresolvedColumn);
        assertEquals("a", col1.name());
        assertTrue(scanProj.columns().get(2) instanceof PartitionColumn);
        PartitionColumn col2 = (PartitionColumn) scanProj.columns().get(2);
        assertTrue(col2 instanceof PartitionColumn);
        assertEquals(ScanTestUtils.partitionColName(0), col2.name());
        assertEquals(MinorType.VARCHAR, col2.schema().getType().getMinorType());
        assertEquals(DataMode.OPTIONAL, col2.schema().getType().getMode());
    }
    // Schema-level projection, fills in values.
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.VARCHAR).buildSchema();
    metadataManager.startFile(filePath);
    NullColumnBuilder builder = new NullBuilderBuilder().build();
    ResolvedRow rootTuple = new ResolvedRow(builder);
    new ExplicitSchemaProjection(scanProj, tableSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
    List<ResolvedColumn> columns = rootTuple.columns();
    assertEquals(3, columns.size());
    {
        assertTrue(columns.get(0) instanceof FileMetadataColumn);
        FileMetadataColumn col0 = (FileMetadataColumn) columns.get(0);
        assertTrue(col0 instanceof FileMetadataColumn);
        assertEquals(ScanTestUtils.FILE_NAME_COL, col0.name());
        assertEquals("z.csv", col0.value());
        assertEquals(MinorType.VARCHAR, col0.schema().getType().getMinorType());
        assertEquals(DataMode.REQUIRED, col0.schema().getType().getMode());
        ResolvedColumn col1 = columns.get(1);
        assertEquals("a", col1.name());
        assertTrue(columns.get(2) instanceof PartitionColumn);
        PartitionColumn col2 = (PartitionColumn) columns.get(2);
        assertTrue(col2 instanceof PartitionColumn);
        assertEquals(ScanTestUtils.partitionColName(0), col2.name());
        assertEquals("x", col2.value());
        assertEquals(MinorType.VARCHAR, col2.schema().getType().getMinorType());
        assertEquals(DataMode.OPTIONAL, col2.schema().getType().getMode());
    }
    // Verify that the file metadata columns were picked out
    assertEquals(2, metadataManager.metadataColumns().size());
    assertSame(columns.get(0), metadataManager.metadataColumns().get(0));
    assertSame(columns.get(2), metadataManager.metadataColumns().get(1));
}
Also used : Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) PartitionColumn(org.apache.drill.exec.physical.impl.scan.file.PartitionColumn) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) ExplicitSchemaProjection(org.apache.drill.exec.physical.impl.scan.project.ExplicitSchemaProjection) ResolvedColumn(org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn) NullColumnBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder) ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) UnresolvedColumn(org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedColumn) ColumnProjection(org.apache.drill.exec.physical.impl.scan.project.ColumnProjection) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) FileMetadataColumn(org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 3 with NullBuilderBuilder

use of org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder in project drill by apache.

the class TestNullColumnLoader method testSchemaWithConflicts.

/**
 * Test the various conflicts that can occur:
 * <ul>
 * <li>Schema is required, but no default value for null column.</li>
 * <li>Query wants a different type than that in the schema.</li>
 * <li>Query wants a different mode than that in the schema.</li>
 * <ul>
 *
 * The type and mode provided to the builder is that which would result from
 * schema smoothing. The types and modes should usually match, but verify
 * the rules when they don't.
 * <p>
 * Defaults for null columns are ignored: null columns use NULL as the
 * null value.
 */
@Test
public void testSchemaWithConflicts() {
    // Note: upper case names in schema, lower case in "projection" list
    final TupleMetadata outputSchema = new SchemaBuilder().add("IntReq", MinorType.INT).add("StrReq", // No default
    MinorType.VARCHAR).addNullable("IntOpt", MinorType.INT).addNullable("StrOpt", MinorType.VARCHAR).buildSchema();
    outputSchema.metadata("intReq").setDefaultValue("10");
    outputSchema.metadata("intOpt").setDefaultValue("20");
    outputSchema.metadata("strOpt").setDefaultValue("bar");
    final ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
    final NullColumnBuilder builder = new NullBuilderBuilder().setNullType(Types.optional(MinorType.VARCHAR)).setOutputSchema(outputSchema).build();
    // Defined, required, no default so --> optional
    builder.add("strReq");
    builder.add("strOpt");
    // Defined, has default, but conflicting type, so default --> null, so --> optional
    builder.add("intReq", Types.required(MinorType.BIGINT));
    // Defined, has default, conflicting mode, so keep default
    builder.add("intOpt", Types.required(MinorType.INT));
    builder.build(cache);
    // Create a batch
    builder.load(2);
    // Verify values and types
    final TupleMetadata expectedSchema = new SchemaBuilder().addNullable("strReq", MinorType.VARCHAR).addNullable("strOpt", MinorType.VARCHAR).addNullable("intReq", MinorType.BIGINT).add("intOpt", MinorType.INT).buildSchema();
    final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(null, null, null, 20).addRow(null, null, null, 20).build();
    RowSetUtilities.verify(expected, fixture.wrap(builder.output()));
    builder.close();
}
Also used : ResultVectorCache(org.apache.drill.exec.physical.resultSet.ResultVectorCache) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) NullResultVectorCacheImpl(org.apache.drill.exec.physical.resultSet.impl.NullResultVectorCacheImpl) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 4 with NullBuilderBuilder

use of org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder in project drill by apache.

the class TestNullColumnLoader method testNullColumnBuilderWithSchema.

/**
 * Test using an output schema, along with a default value property,
 * to define a default value for missing columns.
 */
@Test
public void testNullColumnBuilderWithSchema() {
    // Note: upper case names in schema, lower case in "projection" list
    final TupleMetadata outputSchema = new SchemaBuilder().add("IntReq", MinorType.INT).add("StrReq", MinorType.VARCHAR).addNullable("IntOpt", MinorType.INT).addNullable("StrOpt", MinorType.VARCHAR).addNullable("DubOpt", // No default
    MinorType.FLOAT8).buildSchema();
    outputSchema.metadata("intReq").setDefaultValue("10");
    outputSchema.metadata("strReq").setDefaultValue("foo");
    outputSchema.metadata("intOpt").setDefaultValue("20");
    outputSchema.metadata("strOpt").setDefaultValue("bar");
    final ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
    final NullColumnBuilder builder = new NullBuilderBuilder().setNullType(Types.optional(MinorType.VARCHAR)).setOutputSchema(outputSchema).build();
    builder.add("strReq");
    builder.add("strOpt");
    builder.add("dubOpt");
    builder.add("intReq");
    builder.add("intOpt");
    builder.add("extra");
    builder.build(cache);
    // Create a batch
    builder.load(2);
    // Verify values and types
    final TupleMetadata expectedSchema = new SchemaBuilder().add("strReq", MinorType.VARCHAR).addNullable("strOpt", MinorType.VARCHAR).addNullable("dubOpt", MinorType.FLOAT8).add("intReq", MinorType.INT).addNullable("intOpt", MinorType.INT).addNullable("extra", MinorType.VARCHAR).buildSchema();
    final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow("foo", null, null, 10, null, null).addRow("foo", null, null, 10, null, null).build();
    RowSetUtilities.verify(expected, fixture.wrap(builder.output()));
    builder.close();
}
Also used : ResultVectorCache(org.apache.drill.exec.physical.resultSet.ResultVectorCache) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) NullResultVectorCacheImpl(org.apache.drill.exec.physical.resultSet.impl.NullResultVectorCacheImpl) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 5 with NullBuilderBuilder

use of org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder in project drill by apache.

the class TestReaderLevelProjection method testSubset.

/**
 * Test an explicit projection (providing columns) in which the
 * names in the project lists are a different case than the data
 * source, the order of columns differs, and we ask for a
 * subset of data source columns.
 */
@Test
public void testSubset() {
    // Simulate SELECT c, a ...
    final ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList("c", "a"), ScanTestUtils.parsers());
    assertEquals(2, scanProj.columns().size());
    // Simulate a data source, with early schema, of (a, b, c)
    final TupleMetadata tableSchema = new SchemaBuilder().add("A", MinorType.VARCHAR).add("B", MinorType.VARCHAR).add("C", MinorType.VARCHAR).buildSchema();
    final NullColumnBuilder builder = new NullBuilderBuilder().build();
    final ResolvedRow rootTuple = new ResolvedRow(builder);
    new ExplicitSchemaProjection(scanProj, tableSchema, rootTuple, ScanTestUtils.resolvers());
    final List<ResolvedColumn> columns = rootTuple.columns();
    assertEquals(2, columns.size());
    assertEquals("c", columns.get(0).name());
    assertEquals(2, columns.get(0).sourceIndex());
    assertSame(rootTuple, columns.get(0).source());
    assertEquals("a", columns.get(1).name());
    assertEquals(0, columns.get(1).sourceIndex());
    assertSame(rootTuple, columns.get(1).source());
}
Also used : ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

NullBuilderBuilder (org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder)27 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)26 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)26 SubOperatorTest (org.apache.drill.test.SubOperatorTest)26 Test (org.junit.Test)26 ResolvedRow (org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow)24 ResultVectorCache (org.apache.drill.exec.physical.resultSet.ResultVectorCache)7 NullResultVectorCacheImpl (org.apache.drill.exec.physical.resultSet.impl.NullResultVectorCacheImpl)7 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)7 ImplicitColumnManager (org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager)4 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)4 VectorContainer (org.apache.drill.exec.record.VectorContainer)4 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)4 Path (org.apache.hadoop.fs.Path)4 ExplicitSchemaProjection (org.apache.drill.exec.physical.impl.scan.project.ExplicitSchemaProjection)3 NullColumnBuilder (org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder)3 ResolvedColumn (org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn)3 ScanLevelProjection (org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection)3 UserException (org.apache.drill.common.exceptions.UserException)2 FileMetadataColumn (org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn)2