use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.
the class TestSchemaSmoothing method testDiscrete.
/**
* Sanity test for the simple, discrete case. The purpose of
* discrete is just to run the basic lifecycle in a way that
* is compatible with the schema-persistence version.
*/
@Test
public void testDiscrete() {
// Set up the file metadata manager
Path filePathA = new Path("hdfs:///w/x/y/a.csv");
Path filePathB = new Path("hdfs:///w/x/y/b.csv");
ImplicitColumnManager metadataManager = new ImplicitColumnManager(fixture.getOptionManager(), standardOptions(Lists.newArrayList(filePathA, filePathB)));
// Set up the scan level projection
ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectList(ScanTestUtils.FILE_NAME_COL, "a", "b"), ScanTestUtils.parsers(metadataManager.projectionParser()));
{
// Define a file a.csv
metadataManager.startFile(filePathA);
// Build the output schema from the (a, b) table schema
TupleMetadata twoColSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
final NullColumnBuilder builder = new NullBuilderBuilder().build();
ResolvedRow rootTuple = new ResolvedRow(builder);
new ExplicitSchemaProjection(scanProj, twoColSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
// Verify the full output schema
TupleMetadata expectedSchema = new SchemaBuilder().add("filename", MinorType.VARCHAR).add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).buildSchema();
// Verify
List<ResolvedColumn> columns = rootTuple.columns();
assertEquals(3, columns.size());
assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name());
assertEquals("a.csv", ((FileMetadataColumn) columns.get(0)).value());
assertTrue(columns.get(1) instanceof ResolvedTableColumn);
}
{
// Define a file b.csv
metadataManager.startFile(filePathB);
// Build the output schema from the (a) table schema
TupleMetadata oneColSchema = new SchemaBuilder().add("a", MinorType.INT).buildSchema();
final NullColumnBuilder builder = new NullBuilderBuilder().build();
ResolvedRow rootTuple = new ResolvedRow(builder);
new ExplicitSchemaProjection(scanProj, oneColSchema, rootTuple, ScanTestUtils.resolvers(metadataManager));
// Verify the full output schema
// Since this mode is "discrete", we don't remember the type
// of the missing column. (Instead, it is filled in at the
// vector level as part of vector persistence.) During projection, it is
// marked with type NULL so that the null column builder will fill in
// the proper type.
TupleMetadata expectedSchema = new SchemaBuilder().add("filename", MinorType.VARCHAR).add("a", MinorType.INT).addNullable("b", MinorType.NULL).buildSchema();
// Verify
List<ResolvedColumn> columns = rootTuple.columns();
assertEquals(3, columns.size());
assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(expectedSchema));
assertEquals(ScanTestUtils.FILE_NAME_COL, columns.get(0).name());
assertEquals("b.csv", ((FileMetadataColumn) columns.get(0)).value());
assertTrue(columns.get(1) instanceof ResolvedTableColumn);
assertTrue(columns.get(2) instanceof ResolvedNullColumn);
}
}
use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.
the class TestSchemaSmoothing method testDifferentTypes.
/**
* Column names match, but types differ. Discard the prior schema.
*/
@Test
public void testDifferentTypes() {
final ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectAll(), ScanTestUtils.parsers());
final SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers());
final TupleMetadata priorSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
final TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).buildSchema();
{
doResolve(smoother, priorSchema);
}
{
final ResolvedRow rootTuple = doResolve(smoother, tableSchema);
assertEquals(2, smoother.schemaVersion());
assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(tableSchema));
}
}
use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.
the class TestSchemaSmoothing method testRequired.
/**
* Can't preserve the prior schema if it had required columns
* where the new schema has no columns.
*/
@Test
public void testRequired() {
final ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectAll(), ScanTestUtils.parsers());
final SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers());
final TupleMetadata priorSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).buildSchema();
final TupleMetadata tableSchema = new SchemaBuilder().addNullable("b", MinorType.VARCHAR).buildSchema();
{
doResolve(smoother, priorSchema);
}
{
final ResolvedRow rootTuple = doResolve(smoother, tableSchema);
assertEquals(2, smoother.schemaVersion());
assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(tableSchema));
}
}
use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.
the class TestSchemaSmoothing method testDifferentCase.
/**
* The prior and table schemas are identical, but the cases of names differ.
* Preserve the case of the first schema.
*/
@Test
public void testDifferentCase() {
final ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectAll(), ScanTestUtils.parsers());
final SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers());
final TupleMetadata priorSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
final TupleMetadata tableSchema = new SchemaBuilder().add("A", MinorType.INT).add("B", MinorType.VARCHAR).buildSchema();
{
doResolve(smoother, priorSchema);
}
{
final ResolvedRow rootTuple = doResolve(smoother, tableSchema);
assertEquals(1, smoother.schemaVersion());
assertTrue(ScanTestUtils.schema(rootTuple).isEquivalent(priorSchema));
final List<ResolvedColumn> columns = rootTuple.columns();
assertEquals("a", columns.get(0).name());
}
}
use of org.apache.drill.exec.physical.impl.scan.project.ResolvedTuple.ResolvedRow in project drill by apache.
the class TestSchemaSmoothing method testSmoothableSchemaBatches.
/**
* Integrated test across multiple schemas at the batch level.
*/
@Test
public void testSmoothableSchemaBatches() {
final ScanLevelProjection scanProj = ScanLevelProjection.build(RowSetTestUtils.projectAll(), ScanTestUtils.parsers());
final SchemaSmoother smoother = new SchemaSmoother(scanProj, ScanTestUtils.resolvers());
// Table 1: (a: bigint, b)
final TupleMetadata schema1 = new SchemaBuilder().addNullable("a", MinorType.BIGINT).addNullable("b", MinorType.VARCHAR).add("c", MinorType.FLOAT8).buildSchema();
{
final ResolvedRow rootTuple = doResolve(smoother, schema1);
// Just use the original schema.
assertTrue(schema1.isEquivalent(ScanTestUtils.schema(rootTuple)));
assertEquals(1, smoother.schemaVersion());
}
// Table 2: (a: nullable bigint, c), column ommitted, original schema preserved
final TupleMetadata schema2 = new SchemaBuilder().addNullable("a", MinorType.BIGINT).add("c", MinorType.FLOAT8).buildSchema();
{
final ResolvedRow rootTuple = doResolve(smoother, schema2);
assertTrue(schema1.isEquivalent(ScanTestUtils.schema(rootTuple)));
assertEquals(1, smoother.schemaVersion());
}
// Table 3: (a, c, d), column added, must replan schema
final TupleMetadata schema3 = new SchemaBuilder().addNullable("a", MinorType.BIGINT).addNullable("b", MinorType.VARCHAR).add("c", MinorType.FLOAT8).add("d", MinorType.INT).buildSchema();
{
final ResolvedRow rootTuple = doResolve(smoother, schema3);
assertTrue(schema3.isEquivalent(ScanTestUtils.schema(rootTuple)));
assertEquals(2, smoother.schemaVersion());
}
// Table 4: Drop a non-nullable column, must replan
final TupleMetadata schema4 = new SchemaBuilder().addNullable("a", MinorType.BIGINT).addNullable("b", MinorType.VARCHAR).buildSchema();
{
final ResolvedRow rootTuple = doResolve(smoother, schema4);
assertTrue(schema4.isEquivalent(ScanTestUtils.schema(rootTuple)));
assertEquals(3, smoother.schemaVersion());
}
// Table 5: (a: double), change type must replan schema
final TupleMetadata schema5 = new SchemaBuilder().addNullable("a", MinorType.FLOAT8).addNullable("b", MinorType.VARCHAR).buildSchema();
{
final ResolvedRow rootTuple = doResolve(smoother, schema5);
assertTrue(schema5.isEquivalent(ScanTestUtils.schema(rootTuple)));
assertEquals(4, smoother.schemaVersion());
}
}
Aggregations