Examples with RowSetLoader - org.apache.drill.exec.physical.resultSet.RowSetLoader

Example 21 with RowSetLoader

use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.

the class TestResultSetLoaderDicts method testKeyOverflow.

@Test
public void testKeyOverflow() {
    TupleMetadata schema = new SchemaBuilder().addDict("d", MinorType.VARCHAR).value(MinorType.INT).resumeSchema().buildSchema();
    ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().rowCountLimit(ValueVector.MAX_ROW_COUNT).readerSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    rsLoader.startBatch();
    byte[] key = new byte[523];
    Arrays.fill(key, (byte) 'X');
    // number of entries in each dict
    int dictSize = 4;
    // Number of rows should be driven by vector size.
    // Our row count should include the overflow row
    DictWriter dictWriter = rootWriter.dict(0);
    ScalarWriter keyWriter = dictWriter.keyWriter();
    ScalarWriter valueWriter = dictWriter.valueWriter().scalar();
    int expectedCount = ValueVector.MAX_BUFFER_SIZE / (key.length * dictSize);
    {
        int count = 0;
        while (!rootWriter.isFull()) {
            rootWriter.start();
            for (int i = 0; i < dictSize; i++) {
                keyWriter.setBytes(key, key.length);
                // acts as a placeholder, the actual value is not important
                valueWriter.setInt(0);
                // not necessary for scalars, just for completeness
                dictWriter.save();
            }
            rootWriter.save();
            count++;
        }
        assertEquals(expectedCount + 1, count);
        // Loader's row count should include only "visible" rows
        assertEquals(expectedCount, rootWriter.rowCount());
        // Total count should include invisible and look-ahead rows.
        assertEquals(expectedCount + 1, rsLoader.totalRowCount());
        // Result should exclude the overflow row
        VectorContainer container = rsLoader.harvest();
        BatchValidator.validate(container);
        RowSet result = fixture.wrap(container);
        assertEquals(expectedCount, result.rowCount());
        result.clear();
    }
    // Next batch should start with the overflow row
    {
        rsLoader.startBatch();
        assertEquals(1, rootWriter.rowCount());
        assertEquals(expectedCount + 1, rsLoader.totalRowCount());
        VectorContainer container = rsLoader.harvest();
        BatchValidator.validate(container);
        RowSet result = fixture.wrap(container);
        assertEquals(1, result.rowCount());
        result.clear();
    }
    rsLoader.close();
}

Also used : DictWriter(org.apache.drill.exec.vector.accessor.DictWriter) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) VectorContainer(org.apache.drill.exec.record.VectorContainer) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) ScalarWriter(org.apache.drill.exec.vector.accessor.ScalarWriter) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 22 with RowSetLoader

use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.

the class TestResultSetLoaderDicts method testDictValue.

@Test
public void testDictValue() {
    final TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addDict("d", MinorType.INT).dictValue().key(MinorType.INT).nullableValue(MinorType.VARCHAR).resumeDict().resumeSchema().buildSchema();
    final ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().readerSchema(schema).build();
    final ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    final RowSetLoader rootWriter = rsLoader.writer();
    // Write some rows
    rsLoader.startBatch();
    rootWriter.addRow(10, map(1, map(1, "a", 2, "b", 4, "c"), 2, map(2, "a2", 1, "c2"))).addRow(20, map()).addRow(30, map(1, map(), 2, map(1, "a3"), 3, map(2, "b4", 4, "n4", 1, null), 4, map(3, "m5", 1, "a5", 2, "c5", 8, "m5", 21, "h5")));
    // Validate first batch
    RowSet actual = fixture.wrap(rsLoader.harvest());
    SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(10, map(1, map(1, "a", 2, "b", 4, "c"), 2, map(2, "a2", 1, "c2"))).addRow(20, map()).addRow(30, map(1, map(), 2, map(1, "a3"), 3, map(2, "b4", 4, "n4", 1, null), 4, map(3, "m5", 1, "a5", 2, "c5", 8, "m5", 21, "h5"))).build();
    RowSetUtilities.verify(expected, actual);
    // Add another rows in the second batch.
    rsLoader.startBatch();
    rootWriter.addRow(40, map(1, map(1, "j6", 0, "k6"))).addRow(50, map(1, map(2, "l7"), 2, map(1, "o8", 5, "p8", 7, "u8")));
    // Validate first batch. The new dict should have been back-filled with
    // empty offsets for the missing rows.
    actual = fixture.wrap(rsLoader.harvest());
    expected = fixture.rowSetBuilder(actual.schema()).addRow(40, map(1, map(1, "j6", 0, "k6"))).addRow(50, map(1, map(2, "l7"), 2, map(1, "o8", 5, "p8", 7, "u8"))).build();
    RowSetUtilities.verify(expected, actual);
    rsLoader.close();
}

Also used : SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 23 with RowSetLoader

use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.

the class TestResultSetLoaderLimits method testRowLimit.

/**
 * Verify that the writer stops when reaching the row limit.
 * In this case there is no look-ahead row.
 */
@Test
public void testRowLimit() {
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator());
    assertEquals(ResultSetLoaderImpl.DEFAULT_ROW_COUNT, rsLoader.targetRowCount());
    RowSetLoader rootWriter = rsLoader.writer();
    rootWriter.addColumn(SchemaBuilder.columnSchema("s", MinorType.VARCHAR, DataMode.REQUIRED));
    byte[] value = new byte[200];
    Arrays.fill(value, (byte) 'X');
    int count = 0;
    rsLoader.startBatch();
    while (!rootWriter.isFull()) {
        rootWriter.start();
        rootWriter.scalar(0).setBytes(value, value.length);
        rootWriter.save();
        count++;
    }
    assertEquals(ResultSetLoaderImpl.DEFAULT_ROW_COUNT, count);
    assertEquals(count, rootWriter.rowCount());
    rsLoader.harvest().clear();
    // Do it again, a different way.
    count = 0;
    rsLoader.startBatch();
    assertEquals(0, rootWriter.rowCount());
    while (rootWriter.start()) {
        rootWriter.scalar(0).setBytes(value, value.length);
        rootWriter.save();
        count++;
    }
    assertEquals(ResultSetLoaderImpl.DEFAULT_ROW_COUNT, count);
    assertEquals(count, rootWriter.rowCount());
    rsLoader.harvest().clear();
    rsLoader.close();
}

Also used : ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 24 with RowSetLoader

use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.

the class TestResultSetLoaderLimits method testLimit100.

/**
 * Test filling one batch normally, then hitting the scan limit on the second.
 */
@Test
public void testLimit100() {
    ResultSetOptions options = new ResultSetOptionBuilder().rowCountLimit(75).limit(100).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    rootWriter.addColumn(SchemaBuilder.columnSchema("s", MinorType.VARCHAR, DataMode.REQUIRED));
    rsLoader.startBatch();
    int count = fillToLimit(rootWriter);
    assertEquals(75, count);
    assertEquals(count, rootWriter.rowCount());
    rsLoader.harvest().clear();
    assertFalse(rsLoader.atLimit());
    // Second batch will hit the limit
    rsLoader.startBatch();
    count = fillToLimit(rootWriter);
    assertEquals(25, count);
    assertEquals(count, rootWriter.rowCount());
    rsLoader.harvest().clear();
    assertTrue(rsLoader.atLimit());
    rsLoader.close();
}

Also used : ResultSetLoader(org.apache.drill.exec.physical.resultSet.ResultSetLoader) RowSetLoader(org.apache.drill.exec.physical.resultSet.RowSetLoader) ResultSetOptions(org.apache.drill.exec.physical.resultSet.impl.ResultSetLoaderImpl.ResultSetOptions) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 25 with RowSetLoader

use of org.apache.drill.exec.physical.resultSet.RowSetLoader in project drill by apache.

the class TestResultSetLoaderLimits method testDynamicLimit.

/**
 * Test that the row limit can change between batches.
 */
@Test
public void testDynamicLimit() {
    // Start with a small limit.
    ResultSetOptions options = new ResultSetOptionBuilder().rowCountLimit(TEST_ROW_LIMIT).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    assertEquals(TEST_ROW_LIMIT, rsLoader.targetRowCount());
    RowSetLoader rootWriter = rsLoader.writer();
    rootWriter.addColumn(SchemaBuilder.columnSchema("s", MinorType.VARCHAR, DataMode.REQUIRED));
    rsLoader.startBatch();
    int count = fillToLimit(rootWriter);
    assertEquals(TEST_ROW_LIMIT, count);
    assertEquals(count, rootWriter.rowCount());
    rsLoader.harvest().clear();
    // Reset the batch size larger and fill a second batch
    int newLimit = 8000;
    rsLoader.setTargetRowCount(newLimit);
    rsLoader.startBatch();
    count = fillToLimit(rootWriter);
    assertEquals(newLimit, count);
    assertEquals(count, rootWriter.rowCount());
    rsLoader.harvest().clear();
    // Put the limit back to a lower number.
    newLimit = 1000;
    rsLoader.setTargetRowCount(newLimit);
    rsLoader.startBatch();
    count = fillToLimit(rootWriter);
    assertEquals(newLimit, count);
    assertEquals(count, rootWriter.rowCount());
    rsLoader.harvest().clear();
    // Test limits
    rsLoader.setTargetRowCount(-3);
    assertEquals(1, rsLoader.targetRowCount());
    rsLoader.setTargetRowCount(Integer.MAX_VALUE);
    assertEquals(ValueVector.MAX_ROW_COUNT, rsLoader.targetRowCount());
    rsLoader.close();
}

Aggregations

RowSetLoader (org.apache.drill.exec.physical.resultSet.RowSetLoader)98 ResultSetLoader (org.apache.drill.exec.physical.resultSet.ResultSetLoader)90 Test (org.junit.Test)86 SubOperatorTest (org.apache.drill.test.SubOperatorTest)85 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)82 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)82 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)66 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)63 ScalarWriter (org.apache.drill.exec.vector.accessor.ScalarWriter)25 TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)25 ResultSetOptions (org.apache.drill.exec.physical.resultSet.impl.ResultSetLoaderImpl.ResultSetOptions)23 RowSetReader (org.apache.drill.exec.physical.rowSet.RowSetReader)17 ArrayWriter (org.apache.drill.exec.vector.accessor.ArrayWriter)16 VectorContainer (org.apache.drill.exec.record.VectorContainer)15 SchemaPath (org.apache.drill.common.expression.SchemaPath)12 DictWriter (org.apache.drill.exec.vector.accessor.DictWriter)11 EvfTest (org.apache.drill.categories.EvfTest)10 MaterializedField (org.apache.drill.exec.record.MaterializedField)9 ColumnMetadata (org.apache.drill.exec.record.metadata.ColumnMetadata)6 ArrayReader (org.apache.drill.exec.vector.accessor.ArrayReader)5