Search in sources :

Example 31 with SingleRowSet

use of org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet in project drill by apache.

the class TestNullColumnLoader method testCachedTypesMapToNullable.

/**
 * Drill requires "schema persistence": if a scan operator
 * reads two files, F1 and F2, then the scan operator must
 * provide the same vectors from both readers. Not just the
 * same types, the same value vector instances (but, of course,
 * populated with different data.)
 * <p>
 * Test the case in which the reader for F1 found columns
 * (a, b, c) but, F2 found only (a, b), requiring that we
 * fill in column c, filled with nulls, but of the same type that it
 * was in file F1. We use a vector cache to pull off this trick.
 * This test ensures that the null column mechanism looks in that
 * vector cache when asked to create a nullable column.
 */
@Test
public void testCachedTypesMapToNullable() {
    final List<ResolvedNullColumn> defns = new ArrayList<>();
    defns.add(makeNullCol("req"));
    defns.add(makeNullCol("opt"));
    defns.add(makeNullCol("rep"));
    defns.add(makeNullCol("unk"));
    // Populate the cache with a column of each mode.
    final ResultVectorCacheImpl cache = new ResultVectorCacheImpl(fixture.allocator());
    cache.vectorFor(SchemaBuilder.columnSchema("req", MinorType.FLOAT8, DataMode.REQUIRED));
    final ValueVector opt = cache.vectorFor(SchemaBuilder.columnSchema("opt", MinorType.FLOAT8, DataMode.OPTIONAL));
    final ValueVector rep = cache.vectorFor(SchemaBuilder.columnSchema("rep", MinorType.FLOAT8, DataMode.REPEATED));
    // Use nullable Varchar for unknown null columns.
    final MajorType nullType = Types.optional(MinorType.VARCHAR);
    final NullColumnLoader staticLoader = new NullColumnLoader(cache, defns, nullType, false);
    // Create a batch
    final VectorContainer output = staticLoader.load(2);
    // Verify vectors are reused
    assertSame(opt, output.getValueVector(1).getValueVector());
    assertSame(rep, output.getValueVector(2).getValueVector());
    // Verify values and types
    final TupleMetadata expectedSchema = new SchemaBuilder().addNullable("req", MinorType.FLOAT8).addNullable("opt", MinorType.FLOAT8).addArray("rep", MinorType.FLOAT8).addNullable("unk", MinorType.VARCHAR).buildSchema();
    final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(null, null, new int[] {}, null).addRow(null, null, new int[] {}, null).build();
    RowSetUtilities.verify(expected, fixture.wrap(output));
    staticLoader.close();
}
Also used : ValueVector(org.apache.drill.exec.vector.ValueVector) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) MajorType(org.apache.drill.common.types.TypeProtos.MajorType) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) ArrayList(java.util.ArrayList) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ResultVectorCacheImpl(org.apache.drill.exec.physical.resultSet.impl.ResultVectorCacheImpl) NullResultVectorCacheImpl(org.apache.drill.exec.physical.resultSet.impl.NullResultVectorCacheImpl) VectorContainer(org.apache.drill.exec.record.VectorContainer) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 32 with SingleRowSet

use of org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet in project drill by apache.

the class TestNullColumnLoader method testSchemaWithConflicts.

/**
 * Test the various conflicts that can occur:
 * <ul>
 * <li>Schema is required, but no default value for null column.</li>
 * <li>Query wants a different type than that in the schema.</li>
 * <li>Query wants a different mode than that in the schema.</li>
 * <ul>
 *
 * The type and mode provided to the builder is that which would result from
 * schema smoothing. The types and modes should usually match, but verify
 * the rules when they don't.
 * <p>
 * Defaults for null columns are ignored: null columns use NULL as the
 * null value.
 */
@Test
public void testSchemaWithConflicts() {
    // Note: upper case names in schema, lower case in "projection" list
    final TupleMetadata outputSchema = new SchemaBuilder().add("IntReq", MinorType.INT).add("StrReq", // No default
    MinorType.VARCHAR).addNullable("IntOpt", MinorType.INT).addNullable("StrOpt", MinorType.VARCHAR).buildSchema();
    outputSchema.metadata("intReq").setDefaultValue("10");
    outputSchema.metadata("intOpt").setDefaultValue("20");
    outputSchema.metadata("strOpt").setDefaultValue("bar");
    final ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
    final NullColumnBuilder builder = new NullBuilderBuilder().setNullType(Types.optional(MinorType.VARCHAR)).setOutputSchema(outputSchema).build();
    // Defined, required, no default so --> optional
    builder.add("strReq");
    builder.add("strOpt");
    // Defined, has default, but conflicting type, so default --> null, so --> optional
    builder.add("intReq", Types.required(MinorType.BIGINT));
    // Defined, has default, conflicting mode, so keep default
    builder.add("intOpt", Types.required(MinorType.INT));
    builder.build(cache);
    // Create a batch
    builder.load(2);
    // Verify values and types
    final TupleMetadata expectedSchema = new SchemaBuilder().addNullable("strReq", MinorType.VARCHAR).addNullable("strOpt", MinorType.VARCHAR).addNullable("intReq", MinorType.BIGINT).add("intOpt", MinorType.INT).buildSchema();
    final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(null, null, null, 20).addRow(null, null, null, 20).build();
    RowSetUtilities.verify(expected, fixture.wrap(builder.output()));
    builder.close();
}
Also used : ResultVectorCache(org.apache.drill.exec.physical.resultSet.ResultVectorCache) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) NullResultVectorCacheImpl(org.apache.drill.exec.physical.resultSet.impl.NullResultVectorCacheImpl) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 33 with SingleRowSet

use of org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet in project drill by apache.

the class TestNullColumnLoader method testNullColumnBuilderWithSchema.

/**
 * Test using an output schema, along with a default value property,
 * to define a default value for missing columns.
 */
@Test
public void testNullColumnBuilderWithSchema() {
    // Note: upper case names in schema, lower case in "projection" list
    final TupleMetadata outputSchema = new SchemaBuilder().add("IntReq", MinorType.INT).add("StrReq", MinorType.VARCHAR).addNullable("IntOpt", MinorType.INT).addNullable("StrOpt", MinorType.VARCHAR).addNullable("DubOpt", // No default
    MinorType.FLOAT8).buildSchema();
    outputSchema.metadata("intReq").setDefaultValue("10");
    outputSchema.metadata("strReq").setDefaultValue("foo");
    outputSchema.metadata("intOpt").setDefaultValue("20");
    outputSchema.metadata("strOpt").setDefaultValue("bar");
    final ResultVectorCache cache = new NullResultVectorCacheImpl(fixture.allocator());
    final NullColumnBuilder builder = new NullBuilderBuilder().setNullType(Types.optional(MinorType.VARCHAR)).setOutputSchema(outputSchema).build();
    builder.add("strReq");
    builder.add("strOpt");
    builder.add("dubOpt");
    builder.add("intReq");
    builder.add("intOpt");
    builder.add("extra");
    builder.build(cache);
    // Create a batch
    builder.load(2);
    // Verify values and types
    final TupleMetadata expectedSchema = new SchemaBuilder().add("strReq", MinorType.VARCHAR).addNullable("strOpt", MinorType.VARCHAR).addNullable("dubOpt", MinorType.FLOAT8).add("intReq", MinorType.INT).addNullable("intOpt", MinorType.INT).addNullable("extra", MinorType.VARCHAR).buildSchema();
    final SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow("foo", null, null, 10, null, null).addRow("foo", null, null, 10, null, null).build();
    RowSetUtilities.verify(expected, fixture.wrap(builder.output()));
    builder.close();
}
Also used : ResultVectorCache(org.apache.drill.exec.physical.resultSet.ResultVectorCache) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) NullResultVectorCacheImpl(org.apache.drill.exec.physical.resultSet.impl.NullResultVectorCacheImpl) NullBuilderBuilder(org.apache.drill.exec.physical.impl.scan.project.NullColumnBuilder.NullBuilderBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 34 with SingleRowSet

use of org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet in project drill by apache.

the class TestColumnsArray method testReqularCol.

/**
 * Verify that if the columns column is not required, that `columns`
 * is treated like any other column.
 */
@Test
public void testReqularCol() {
    ScanSchemaOrchestrator scanner = buildScan(false, RowSetTestUtils.projectList(ColumnsScanFramework.COLUMNS_COL));
    TupleMetadata tableSchema = new SchemaBuilder().add(ColumnsScanFramework.COLUMNS_COL, MinorType.VARCHAR).buildSchema();
    ReaderSchemaOrchestrator reader = scanner.startReader();
    ResultSetLoader rsLoader = reader.makeTableLoader(tableSchema);
    reader.defineSchema();
    reader.startBatch();
    rsLoader.writer().addRow("fred");
    reader.endBatch();
    SingleRowSet expected = fixture.rowSetBuilder(tableSchema).addRow("fred").build();
    RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
    scanner.close();
}
Also used : SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) ScanSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator) ReaderSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 35 with SingleRowSet

use of org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet in project drill by apache.

the class TestScanOperExecLateSchema method testLateSchemaLifecycle.

/**
 * Most basic test of a reader that discovers its schema as it goes along.
 * The purpose is to validate the most basic life-cycle steps before trying
 * more complex variations.
 */
@Test
public void testLateSchemaLifecycle() {
    // Create a mock reader, return two batches: one schema-only, another with data.
    MockLateSchemaReader reader = new MockLateSchemaReader();
    reader.batchLimit = 2;
    reader.returnDataOnFirst = false;
    // Create the scan operator
    ScanFixture scanFixture = simpleFixture(reader);
    ScanOperatorExec scan = scanFixture.scanOp;
    // Standard startup
    assertFalse(reader.openCalled);
    // First batch: build schema. The reader does not help: it returns an
    // empty first batch.
    assertTrue(scan.buildSchema());
    assertTrue(reader.openCalled);
    assertEquals(1, reader.batchCount);
    assertEquals(0, scan.batchAccessor().rowCount());
    // Create the expected result.
    SingleRowSet expected = makeExpected(20);
    RowSetComparison verifier = new RowSetComparison(expected);
    assertEquals(expected.batchSchema(), scan.batchAccessor().schema());
    // Next call, return with data.
    assertTrue(scan.next());
    verifier.verifyAndClearAll(fixture.wrap(scan.batchAccessor().container()));
    // EOF
    assertFalse(scan.next());
    assertTrue(reader.closeCalled);
    assertEquals(0, scan.batchAccessor().rowCount());
    scanFixture.close();
}
Also used : ScanFixture(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.ScanFixture) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) Test(org.junit.Test)

Aggregations

SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)257 Test (org.junit.Test)241 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)237 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)234 SubOperatorTest (org.apache.drill.test.SubOperatorTest)207 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)69 ScalarReader (org.apache.drill.exec.vector.accessor.ScalarReader)62 ResultSetLoader (org.apache.drill.exec.physical.resultSet.ResultSetLoader)61 RowSetLoader (org.apache.drill.exec.physical.resultSet.RowSetLoader)54 ValueVector (org.apache.drill.exec.vector.ValueVector)32 EvfTest (org.apache.drill.categories.EvfTest)30 ScalarWriter (org.apache.drill.exec.vector.accessor.ScalarWriter)29 RowSetBuilder (org.apache.drill.exec.physical.rowSet.RowSetBuilder)27 TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)27 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)27 ExtendableRowSet (org.apache.drill.exec.physical.rowSet.RowSet.ExtendableRowSet)25 RepeatedValueVector (org.apache.drill.exec.vector.complex.RepeatedValueVector)24 BatchSchemaBuilder (org.apache.drill.exec.record.BatchSchemaBuilder)23 VectorContainer (org.apache.drill.exec.record.VectorContainer)22 ArrayReader (org.apache.drill.exec.vector.accessor.ArrayReader)22