Examples with BatchSchemaBuilder - org.apache.drill.exec.record.BatchSchemaBuilder

Example 1 with BatchSchemaBuilder

use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.

the class TestScanOrchestratorEarlySchema method testEarlySchemaSelectNone.

/**
 * Test SELECT - FROM table(a, b)
 */
@Test
public void testEarlySchemaSelectNone() {
    ScanOrchestratorBuilder builder = new MockScanBuilder();
    // SELECT ...
    // (Like SELECT COUNT(*) ...
    builder.projection(RowSetTestUtils.projectList());
    ScanSchemaOrchestrator scanner = new ScanSchemaOrchestrator(fixture.allocator(), builder);
    // ... FROM table
    ReaderSchemaOrchestrator reader = scanner.startReader();
    // file schema (a, b)
    TupleMetadata tableSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).buildSchema();
    // Create the table loader
    ResultSetLoader loader = reader.makeTableLoader(tableSchema);
    // Verify that unprojected column is unprojected in the
    // table loader.
    assertTrue(loader.isProjectionEmpty());
    assertFalse(loader.writer().column("a").isProjected());
    assertFalse(loader.writer().column("b").isProjected());
    // Verify empty batch.
    BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(new SchemaBuilder()).build();
    // Create a batch of data.
    reader.startBatch();
    loader.writer().addRow(1, "fred").addRow(2, "wilma");
    reader.endBatch();
    // Verify
    {
        // Two rows, no data.
        SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow().addRow().build();
        RowSetUtilities.verify(expected, fixture.wrap(scanner.output()));
    }
    // Fast path to fill in empty rows
    reader.startBatch();
    loader.skipRows(10);
    reader.endBatch();
    // Verify
    {
        VectorContainer output = scanner.output();
        assertEquals(10, output.getRecordCount());
        output.zeroVectors();
    }
    scanner.close();
}

Also used : SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) BatchSchema(org.apache.drill.exec.record.BatchSchema) ScanOrchestratorBuilder(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator.ScanOrchestratorBuilder) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) MockScanBuilder(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.MockScanBuilder) ScanSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ScanSchemaOrchestrator) ReaderSchemaOrchestrator(org.apache.drill.exec.physical.impl.scan.project.ReaderSchemaOrchestrator) VectorContainer(org.apache.drill.exec.record.VectorContainer) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 2 with BatchSchemaBuilder

use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.

the class TestOperatorRecordBatch method testBatchAccessor.

/**
 * The record batch abstraction has a bunch of methods to work with a vector container.
 * Rather than simply exposing the container itself, the batch instead exposes various
 * container operations. Probably an artifact of its history. In any event, make
 * sure those methods are passed through to the container accessor.
 */
@Test
public void testBatchAccessor() {
    SchemaBuilder schemaBuilder = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR);
    BatchSchema schema = new BatchSchemaBuilder().withSchemaBuilder(schemaBuilder).build();
    SingleRowSet rs = fixture.rowSetBuilder(schema).addRow(10, "fred").addRow(20, "wilma").build();
    MockOperatorExec opExec = new MockOperatorExec(rs.container());
    opExec.nextCalls = 1;
    try (OperatorRecordBatch opBatch = makeOpBatch(opExec)) {
        assertEquals(IterOutcome.OK_NEW_SCHEMA, opBatch.next());
        assertEquals(schema, opBatch.getSchema());
        assertEquals(2, opBatch.getRecordCount());
        assertSame(rs.container(), opBatch.getOutgoingContainer());
        Iterator<VectorWrapper<?>> iter = opBatch.iterator();
        assertEquals("a", iter.next().getValueVector().getField().getName());
        assertEquals("b", iter.next().getValueVector().getField().getName());
        // Not a full test of the schema path; just make sure that the
        // pass-through to the Vector Container works.
        SchemaPath path = SchemaPath.create(NamePart.newBuilder().setName("a").build());
        TypedFieldId id = opBatch.getValueVectorId(path);
        assertEquals(MinorType.INT, id.getFinalType().getMinorType());
        assertEquals(1, id.getFieldIds().length);
        assertEquals(0, id.getFieldIds()[0]);
        path = SchemaPath.create(NamePart.newBuilder().setName("b").build());
        id = opBatch.getValueVectorId(path);
        assertEquals(MinorType.VARCHAR, id.getFinalType().getMinorType());
        assertEquals(1, id.getFieldIds().length);
        assertEquals(1, id.getFieldIds()[0]);
        // Sanity check of getValueAccessorById()
        VectorWrapper<?> w = opBatch.getValueAccessorById(IntVector.class, 0);
        assertNotNull(w);
        assertEquals("a", w.getValueVector().getField().getName());
        w = opBatch.getValueAccessorById(VarCharVector.class, 1);
        assertNotNull(w);
        assertEquals("b", w.getValueVector().getField().getName());
        try {
            opBatch.getSelectionVector2();
            fail();
        } catch (UnsupportedOperationException e) {
        // Expected
        }
        try {
            opBatch.getSelectionVector4();
            fail();
        } catch (UnsupportedOperationException e) {
        // Expected
        }
    } catch (Exception e) {
        fail(e.getMessage());
    }
    assertTrue(opExec.closeCalled);
}

Also used : SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) VectorWrapper(org.apache.drill.exec.record.VectorWrapper) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) VarCharVector(org.apache.drill.exec.vector.VarCharVector) UserException(org.apache.drill.common.exceptions.UserException) BatchSchema(org.apache.drill.exec.record.BatchSchema) SchemaPath(org.apache.drill.common.expression.SchemaPath) TypedFieldId(org.apache.drill.exec.record.TypedFieldId) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 3 with BatchSchemaBuilder

use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.

the class TestFileScanFramework method testMetadataColumns.

/**
 * Basic sanity test of a couple of implicit columns, along
 * with all table columns in table order. Full testing of implicit
 * columns is done on lower-level components.
 */
@Test
public void testMetadataColumns() {
    MockEarlySchemaReader reader = new MockEarlySchemaReader();
    reader.batchLimit = 1;
    // Select table and implicit columns.
    FileScanFixtureBuilder builder = new FileScanFixtureBuilder();
    builder.setProjection("a", "b", "filename", "suffix");
    builder.addReader(reader);
    ScanFixture scanFixture = builder.build();
    ScanOperatorExec scan = scanFixture.scanOp;
    // Expect data and implicit columns
    SchemaBuilder schemaBuilder = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR, 10).add("filename", MinorType.VARCHAR).add("suffix", MinorType.VARCHAR);
    BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(schemaBuilder).build();
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(10, "fred", MOCK_FILE_NAME, MOCK_SUFFIX).addRow(20, "wilma", MOCK_FILE_NAME, MOCK_SUFFIX).build();
    // Schema should include implicit columns.
    assertTrue(scan.buildSchema());
    assertEquals(expectedSchema, scan.batchAccessor().schema());
    scan.batchAccessor().release();
    // Read one batch, should contain implicit columns
    assertTrue(scan.next());
    RowSetUtilities.verify(expected, fixture.wrap(scan.batchAccessor().container()));
    // EOF
    assertFalse(scan.next());
    assertEquals(0, scan.batchAccessor().rowCount());
    scanFixture.close();
}

Also used : ScanFixture(org.apache.drill.exec.physical.impl.scan.ScanTestUtils.ScanFixture) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) BatchSchema(org.apache.drill.exec.record.BatchSchema) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 4 with BatchSchemaBuilder

use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.

the class TestParquetMetadataCache method testEmptyDirectoryWithMetadataDirFile.

@Test
public void testEmptyDirectoryWithMetadataDirFile() throws Exception {
    final String emptyDirNameWithMetadataFile = "empty_directory";
    dirTestWatcher.makeTestTmpSubDir(Paths.get(emptyDirNameWithMetadataFile));
    dirTestWatcher.makeTestTmpSubDir(Paths.get(emptyDirNameWithMetadataFile, "t2"));
    dirTestWatcher.makeTestTmpSubDir(Paths.get(emptyDirNameWithMetadataFile, "t1"));
    dirTestWatcher.copyResourceToTestTmp(Paths.get("parquet", "metadata_files_with_old_versions", "v3_1", "metadata_directories.requires_replace.txt"), Paths.get(emptyDirNameWithMetadataFile, Metadata.METADATA_DIRECTORIES_FILENAME));
    final BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(new SchemaBuilder()).build();
    testBuilder().sqlQuery("select * from dfs.tmp.`%s`", emptyDirNameWithMetadataFile).schemaBaseLine(expectedSchema).build().run();
}

Also used : BatchSchema(org.apache.drill.exec.record.BatchSchema) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) Test(org.junit.Test) UnlikelyTest(org.apache.drill.categories.UnlikelyTest)

Example 5 with BatchSchemaBuilder

use of org.apache.drill.exec.record.BatchSchemaBuilder in project drill by apache.

the class TestParquetWriterEmptyFiles method testWriteEmptySchemaChange.

@Test
public void testWriteEmptySchemaChange() throws Exception {
    final String outputFileName = "testparquetwriteremptyfiles_testwriteemptyschemachange";
    final File outputFile = FileUtils.getFile(dirTestWatcher.getDfsTestTmpDir(), outputFileName);
    test("CREATE TABLE dfs.tmp.%s AS select id, a, b from dfs.`schemachange/multi/*.json` WHERE id = 0", outputFileName);
    // Only the last scan scheme is written
    SchemaBuilder schemaBuilder = new SchemaBuilder().addNullable("id", TypeProtos.MinorType.BIGINT).addNullable("a", TypeProtos.MinorType.BIGINT).addNullable("b", TypeProtos.MinorType.BIT);
    BatchSchema expectedSchema = new BatchSchemaBuilder().withSchemaBuilder(schemaBuilder).build();
    testBuilder().unOrdered().sqlQuery("select * from dfs.tmp.%s", outputFileName).schemaBaseLine(expectedSchema).go();
    // Make sure that only 1 parquet file was created
    assertEquals(1, outputFile.list((dir, name) -> name.endsWith("parquet")).length);
}

Also used : BatchSchema(org.apache.drill.exec.record.BatchSchema) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) BatchSchemaBuilder(org.apache.drill.exec.record.BatchSchemaBuilder) File(java.io.File) ParquetTest(org.apache.drill.categories.ParquetTest) Test(org.junit.Test) UnlikelyTest(org.apache.drill.categories.UnlikelyTest)

Aggregations

BatchSchemaBuilder (org.apache.drill.exec.record.BatchSchemaBuilder)58 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)58 BatchSchema (org.apache.drill.exec.record.BatchSchema)56 Test (org.junit.Test)56 UnlikelyTest (org.apache.drill.categories.UnlikelyTest)20 RecordBatch (org.apache.drill.exec.record.RecordBatch)14 SubOperatorTest (org.apache.drill.test.SubOperatorTest)10 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)7 PlannerTest (org.apache.drill.categories.PlannerTest)5 SqlFunctionTest (org.apache.drill.categories.SqlFunctionTest)5 MaterializedField (org.apache.drill.exec.record.MaterializedField)5 SqlTest (org.apache.drill.categories.SqlTest)4 HashJoinPOP (org.apache.drill.exec.physical.config.HashJoinPOP)4 ScanFixture (org.apache.drill.exec.physical.impl.scan.ScanTestUtils.ScanFixture)4 RecordBatchLoader (org.apache.drill.exec.record.RecordBatchLoader)4 ClusterTest (org.apache.drill.test.ClusterTest)4 OperatorTest (org.apache.drill.categories.OperatorTest)3 ParquetTest (org.apache.drill.categories.ParquetTest)3 VectorTest (org.apache.drill.categories.VectorTest)3 ExecTest (org.apache.drill.exec.ExecTest)3