Search in sources :

Example 26 with SchemaBuilder

use of org.apache.drill.exec.record.metadata.SchemaBuilder in project drill by apache.

the class TestFileScanFramework method testLateSchemaFileWildcards.

@Test
public void testLateSchemaFileWildcards() {
    // Create a mock reader, return two batches: one schema-only, another with data.
    MockLateSchemaReader reader = new MockLateSchemaReader();
    reader.batchLimit = 2;
    reader.returnDataOnFirst = false;
    // Create the scan operator
    FileScanFixtureBuilder builder = new FileScanFixtureBuilder();
    builder.projectAllWithAllImplicit(2);
    builder.addReader(reader);
    ScanFixture scanFixture = builder.build();
    ScanOperatorExec scan = scanFixture.scanOp;
    // Standard startup
    assertFalse(reader.openCalled);
    // First batch: build schema. The reader helps: it returns an
    // empty first batch.
    assertTrue(scan.buildSchema());
    assertTrue(reader.openCalled);
    assertEquals(1, reader.batchCount);
    assertEquals(0, scan.batchAccessor().rowCount());
    // Create the expected result.
    TupleMetadata expectedSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).add(ScanTestUtils.FULLY_QUALIFIED_NAME_COL, MinorType.VARCHAR).add(ScanTestUtils.FILE_PATH_COL, MinorType.VARCHAR).add(ScanTestUtils.FILE_NAME_COL, MinorType.VARCHAR).add(ScanTestUtils.SUFFIX_COL, MinorType.VARCHAR).add(ScanTestUtils.LAST_MODIFIED_TIME_COL, MinorType.VARCHAR).addNullable(ScanTestUtils.PROJECT_METADATA_COL, MinorType.VARCHAR).addNullable(ScanTestUtils.partitionColName(0), MinorType.VARCHAR).addNullable(ScanTestUtils.partitionColName(1), MinorType.VARCHAR).addNullable(ScanTestUtils.partitionColName(2), MinorType.VARCHAR).buildSchema();
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(30, "fred", fqn, pathToFile, MOCK_FILE_NAME, MOCK_SUFFIX, lastModifiedTime, null, MOCK_DIR0, MOCK_DIR1, null).addRow(40, "wilma", fqn, pathToFile, MOCK_FILE_NAME, MOCK_SUFFIX, lastModifiedTime, null, MOCK_DIR0, MOCK_DIR1, null).build();
    assertEquals(expected.batchSchema(), scan.batchAccessor().schema());
    // Next call, return with data.
    assertTrue(scan.next());
    RowSetUtilities.verify(expected, fixture.wrap(scan.batchAccessor().container()));
    // EOF
    assertFalse(scan.next());
    assertTrue(reader.closeCalled);
    assertEquals(0, scan.batchAccessor().rowCount());
    scanFixture.close();
}
Also used : ScanFixture(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.ScanFixture) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 27 with SchemaBuilder

use of org.apache.drill.exec.record.metadata.SchemaBuilder in project drill by apache.

the class TestImplicitColumnProjection method testPartitionColumnTwoDigits.

/**
 * Test the obscure case that the partition column contains two digits:
 * dir11. Also tests the obscure case that the output only has partition
 * columns.
 */
@Test
public void testPartitionColumnTwoDigits() {
    Path filePath = new Path("hdfs:///x/0/1/2/3/4/5/6/7/8/9/10/d11/z.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList("dir11"), ScanTestUtils.parsers(metadataManager.projectionParser()));
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.VARCHAR).buildSchema();
    metadataManager.startFile(filePath);
    NullColumnBuilder builder = new NullBuilderBuilder().build();
    ResolvedRow rootTuple = new ResolvedRow(builder);
    new ExplicitSchemaProjection(scanProj, tableSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
    List<ResolvedColumn> columns = rootTuple.columns();
    assertEquals(1, columns.size());
    assertEquals("d11", ((MetadataColumn) columns.get(0)).value());
}
Also used : Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) NullColumnBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder) ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) ExplicitSchemaProjection(org.apache.drill.exec.physical.impl.scan.project.ExplicitSchemaProjection) ResolvedColumn(org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 28 with SchemaBuilder

use of org.apache.drill.exec.record.metadata.SchemaBuilder in project drill by apache.

the class TestImplicitColumnProjection method testProjectList.

/**
 * Test the file projection planner with metadata.
 */
@Test
public void testProjectList() {
    Path filePath = new Path("hdfs:///w/x/y/z.csv");
    ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(filePath));
    ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, "a", ScanTestUtils.partitionColName(0)), ScanTestUtils.parsers(metadataManager.projectionParser()));
    assertEquals(3, scanProj.columns().size());
    // Scan-level projection: defines the columns
    {
        assertTrue(scanProj.columns().get(0) instanceof FileMetadataColumn);
        FileMetadataColumn col0 = (FileMetadataColumn) scanProj.columns().get(0);
        assertTrue(col0 instanceof FileMetadataColumn);
        assertEquals(ScanTestUtils.FILE_NAME_COL, col0.name());
        assertEquals(MinorType.VARCHAR, col0.schema().getType().getMinorType());
        assertEquals(DataMode.REQUIRED, col0.schema().getType().getMode());
        ColumnProjection col1 = scanProj.columns().get(1);
        assertTrue(col1 instanceof UnresolvedColumn);
        assertEquals("a", col1.name());
        assertTrue(scanProj.columns().get(2) instanceof PartitionColumn);
        PartitionColumn col2 = (PartitionColumn) scanProj.columns().get(2);
        assertTrue(col2 instanceof PartitionColumn);
        assertEquals(ScanTestUtils.partitionColName(0), col2.name());
        assertEquals(MinorType.VARCHAR, col2.schema().getType().getMinorType());
        assertEquals(DataMode.OPTIONAL, col2.schema().getType().getMode());
    }
    // Schema-level projection, fills in values.
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.VARCHAR).buildSchema();
    metadataManager.startFile(filePath);
    NullColumnBuilder builder = new NullBuilderBuilder().build();
    ResolvedRow rootTuple = new ResolvedRow(builder);
    new ExplicitSchemaProjection(scanProj, tableSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
    List<ResolvedColumn> columns = rootTuple.columns();
    assertEquals(3, columns.size());
    {
        assertTrue(columns.get(0) instanceof FileMetadataColumn);
        FileMetadataColumn col0 = (FileMetadataColumn) columns.get(0);
        assertTrue(col0 instanceof FileMetadataColumn);
        assertEquals(ScanTestUtils.FILE_NAME_COL, col0.name());
        assertEquals("z.csv", col0.value());
        assertEquals(MinorType.VARCHAR, col0.schema().getType().getMinorType());
        assertEquals(DataMode.REQUIRED, col0.schema().getType().getMode());
        ResolvedColumn col1 = columns.get(1);
        assertEquals("a", col1.name());
        assertTrue(columns.get(2) instanceof PartitionColumn);
        PartitionColumn col2 = (PartitionColumn) columns.get(2);
        assertTrue(col2 instanceof PartitionColumn);
        assertEquals(ScanTestUtils.partitionColName(0), col2.name());
        assertEquals("x", col2.value());
        assertEquals(MinorType.VARCHAR, col2.schema().getType().getMinorType());
        assertEquals(DataMode.OPTIONAL, col2.schema().getType().getMode());
    }
    // Verify that the file metadata columns were picked out
    assertEquals(2, metadataManager.metadataColumns().size());
    assertSame(columns.get(0), metadataManager.metadataColumns().get(0));
    assertSame(columns.get(2), metadataManager.metadataColumns().get(1));
}
Also used : Path(org.apache.hadoop.fs.Path) ImplicitColumnManager(org.apache.drill.exec.physical.impl.scan.file.ImplicitColumnManager) ScanLevelProjection(org.apache.drill.exec.physical.impl.scan.project.ScanLevelProjection) PartitionColumn(org.apache.drill.exec.physical.impl.scan.file.PartitionColumn) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) ExplicitSchemaProjection(org.apache.drill.exec.physical.impl.scan.project.ExplicitSchemaProjection) ResolvedColumn(org.apache.drill.exec.physical.impl.scan.project.ResolvedColumn) NullColumnBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder) ResolvedRow(org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow) UnresolvedColumn(org.apache.drill.exec.physical.impl.scan.project.AbstractUnresolvedColumn.UnresolvedColumn) ColumnProjection(org.apache.drill.exec.physical.impl.scan.project.ColumnProjection) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) FileMetadataColumn(org.apache.drill.exec.physical.impl.scan.file.FileMetadataColumn) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 29 with SchemaBuilder

use of org.apache.drill.exec.record.metadata.SchemaBuilder in project drill by apache.

the class TestDirectConverter method testBasicConversionType.

/**
 * Test the conversion type for a subset of type pairs.
 */
@Test
public void testBasicConversionType() {
    StandardConversions conversions = StandardConversions.builder().build();
    TupleMetadata schema = new SchemaBuilder().add("ti", MinorType.TINYINT).add("si", MinorType.SMALLINT).add("int", MinorType.INT).add("bi", MinorType.BIGINT).add("fl", MinorType.FLOAT4).add("db", MinorType.FLOAT8).add("dec", MinorType.VARDECIMAL, 10, 0).add("str", MinorType.VARCHAR).buildSchema();
    ColumnMetadata tinyIntCol = schema.metadata("ti");
    ColumnMetadata smallIntCol = schema.metadata("si");
    ColumnMetadata intCol = schema.metadata("int");
    ColumnMetadata bigIntCol = schema.metadata("bi");
    ColumnMetadata float4Col = schema.metadata("fl");
    ColumnMetadata float8Col = schema.metadata("db");
    ColumnMetadata decimalCol = schema.metadata("dec");
    ColumnMetadata stringCol = schema.metadata("str");
    // TinyInt --> x
    expect(ConversionType.NONE, conversions.analyze(tinyIntCol, tinyIntCol));
    expect(ConversionType.IMPLICIT, conversions.analyze(tinyIntCol, smallIntCol));
    expect(ConversionType.IMPLICIT, conversions.analyze(tinyIntCol, intCol));
    expect(ConversionType.IMPLICIT, conversions.analyze(tinyIntCol, bigIntCol));
    expect(ConversionType.IMPLICIT, conversions.analyze(tinyIntCol, float4Col));
    expect(ConversionType.IMPLICIT, conversions.analyze(tinyIntCol, float8Col));
    expect(ConversionType.EXPLICIT, conversions.analyze(tinyIntCol, decimalCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(tinyIntCol, stringCol));
    // SmallInt --> x
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(smallIntCol, tinyIntCol));
    expect(ConversionType.NONE, conversions.analyze(smallIntCol, smallIntCol));
    expect(ConversionType.IMPLICIT, conversions.analyze(smallIntCol, intCol));
    expect(ConversionType.IMPLICIT, conversions.analyze(smallIntCol, bigIntCol));
    expect(ConversionType.IMPLICIT, conversions.analyze(smallIntCol, float4Col));
    expect(ConversionType.IMPLICIT, conversions.analyze(smallIntCol, float8Col));
    expect(ConversionType.EXPLICIT, conversions.analyze(smallIntCol, decimalCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(smallIntCol, stringCol));
    // Int --> x
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(intCol, tinyIntCol));
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(intCol, smallIntCol));
    expect(ConversionType.NONE, conversions.analyze(intCol, intCol));
    expect(ConversionType.IMPLICIT, conversions.analyze(intCol, bigIntCol));
    expect(ConversionType.IMPLICIT, conversions.analyze(intCol, float4Col));
    expect(ConversionType.IMPLICIT, conversions.analyze(intCol, float8Col));
    expect(ConversionType.EXPLICIT, conversions.analyze(intCol, decimalCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(intCol, stringCol));
    // BigInt --> x
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(bigIntCol, tinyIntCol));
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(bigIntCol, smallIntCol));
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(bigIntCol, intCol));
    expect(ConversionType.NONE, conversions.analyze(bigIntCol, bigIntCol));
    expect(ConversionType.IMPLICIT, conversions.analyze(bigIntCol, float4Col));
    expect(ConversionType.IMPLICIT, conversions.analyze(bigIntCol, float8Col));
    expect(ConversionType.EXPLICIT, conversions.analyze(bigIntCol, decimalCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(bigIntCol, stringCol));
    // Float4 --> x
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(float4Col, tinyIntCol));
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(float4Col, smallIntCol));
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(float4Col, intCol));
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(float4Col, bigIntCol));
    expect(ConversionType.NONE, conversions.analyze(float4Col, float4Col));
    expect(ConversionType.IMPLICIT, conversions.analyze(float4Col, float8Col));
    expect(ConversionType.EXPLICIT, conversions.analyze(float4Col, decimalCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(float4Col, stringCol));
    // Float8 --> x
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(float8Col, tinyIntCol));
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(float8Col, smallIntCol));
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(float8Col, intCol));
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(float8Col, bigIntCol));
    expect(ConversionType.IMPLICIT_UNSAFE, conversions.analyze(float8Col, float4Col));
    expect(ConversionType.NONE, conversions.analyze(float8Col, float8Col));
    expect(ConversionType.EXPLICIT, conversions.analyze(float8Col, decimalCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(float8Col, stringCol));
    // Decimal --> x
    expect(ConversionType.EXPLICIT, conversions.analyze(decimalCol, tinyIntCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(decimalCol, smallIntCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(decimalCol, intCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(decimalCol, bigIntCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(decimalCol, float4Col));
    expect(ConversionType.EXPLICIT, conversions.analyze(decimalCol, float8Col));
    expect(ConversionType.NONE, conversions.analyze(decimalCol, decimalCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(decimalCol, stringCol));
    // VarChar --> x
    expect(ConversionType.EXPLICIT, conversions.analyze(stringCol, tinyIntCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(stringCol, smallIntCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(stringCol, intCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(stringCol, bigIntCol));
    expect(ConversionType.EXPLICIT, conversions.analyze(stringCol, float4Col));
    expect(ConversionType.EXPLICIT, conversions.analyze(stringCol, float8Col));
    expect(ConversionType.EXPLICIT, conversions.analyze(stringCol, decimalCol));
    expect(ConversionType.NONE, conversions.analyze(stringCol, stringCol));
}
Also used : ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 30 with SchemaBuilder

use of org.apache.drill.exec.record.metadata.SchemaBuilder in project drill by apache.

the class TestDirectConverter method testImplicitConversion.

/**
 * Tests the implicit conversions provided by the column writer itself.
 * No conversion mechanism is needed in this case.
 */
@Test
public void testImplicitConversion() {
    TupleMetadata schema = new SchemaBuilder().add("ti", MinorType.TINYINT).add("si", MinorType.SMALLINT).add("int", MinorType.INT).add("bi", MinorType.BIGINT).add("fl", MinorType.FLOAT4).add("db", MinorType.FLOAT8).add("dec", MinorType.VARDECIMAL, 10, 0).buildSchema();
    // Test allowed implicit conversions.
    RowSet actual = new RowSetBuilder(fixture.allocator(), schema).addRow(11, 12, 13, 14, 15, 16, // int
    17).addRow(21L, 22L, 23L, 24L, 25L, 26L, // long
    27L).addRow(31F, 32F, 33F, 34F, 35F, 36F, // float
    37F).addRow(41D, 42D, 43D, 44D, 45D, 46D, // double
    47D).addRow(dec(51), dec(52), dec(53), dec(54), dec(55), dec(56), // decimal
    dec(57)).build();
    final SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(11, 12, 13, 14L, 15F, 16D, dec(17)).addRow(21, 22, 23, 24L, 25F, 26D, dec(27)).addRow(31, 32, 33, 34L, 35F, 36D, dec(37)).addRow(41, 42, 43, 44L, 45F, 46D, dec(47)).addRow(51, 52, 53, 54L, 55L, 56D, dec(57)).build();
    RowSetUtilities.verify(expected, actual);
}
Also used : RowSetBuilder(org.apache.drill.exec.physical.rowSet.RowSetBuilder) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)1095 Test (org.junit.Test)1020 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)1008 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)588 SubOperatorTest (org.apache.drill.test.SubOperatorTest)407 RowSetBuilder (org.apache.drill.exec.physical.rowSet.RowSetBuilder)288 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)263 ClusterTest (org.apache.drill.test.ClusterTest)245 EvfTest (org.apache.drill.categories.EvfTest)203 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)188 JsonTest (org.apache.drill.categories.JsonTest)110 ResultSetLoader (org.apache.drill.exec.physical.resultSet.ResultSetLoader)108 DirectRowSet (org.apache.drill.exec.physical.rowSet.DirectRowSet)108 RowSetLoader (org.apache.drill.exec.physical.resultSet.RowSetLoader)85 BatchSchemaBuilder (org.apache.drill.exec.record.BatchSchemaBuilder)83 ScalarReader (org.apache.drill.exec.vector.accessor.ScalarReader)68 UserException (org.apache.drill.common.exceptions.UserException)62 BatchSchema (org.apache.drill.exec.record.BatchSchema)62 VectorContainer (org.apache.drill.exec.record.VectorContainer)58 BaseTest (org.apache.drill.test.BaseTest)57