Search in sources :

Example 26 with RowSetLoader

use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.

the class TestResultSetLoaderOmittedValues method testSkipRows.

/**
 * Test that omitting the call to saveRow() effectively discards
 * the row. Note that the vectors still contain values in the
 * discarded position; just the various pointers are unset. If
 * the batch ends before the discarded values are overwritten, the
 * discarded values just exist at the end of the vector. Since vectors
 * start with garbage contents, the discarded values are simply a different
 * kind of garbage. But, if the client writes a new row, then the new
 * row overwrites the discarded row. This works because we only change
 * the tail part of a vector; never the internals.
 */
@Test
public void testSkipRows() {
    TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).buildSchema();
    ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    rsLoader.startBatch();
    int rowNumber = 0;
    for (int i = 0; i < 14; i++) {
        rootWriter.start();
        rowNumber++;
        rootWriter.scalar(0).setInt(rowNumber);
        if (i % 3 == 0) {
            rootWriter.scalar(1).setNull();
        } else {
            rootWriter.scalar(1).setString("b-" + rowNumber);
        }
        if (i % 2 == 0) {
            rootWriter.save();
        }
    }
    RowSet result = fixture.wrap(rsLoader.harvest());
    // result.print();
    SingleRowSet expected = fixture.rowSetBuilder(result.batchSchema()).addRow(1, null).addRow(3, "b-3").addRow(5, "b-5").addRow(7, null).addRow(9, "b-9").addRow(11, "b-11").addRow(13, null).build();
    // expected.print();
    new RowSetComparison(expected).verifyAndClearAll(result);
    rsLoader.close();
}
Also used : SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 27 with RowSetLoader

use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.

the class TestResultSetLoaderOmittedValues method testSkipOverflowRow.

/**
 * Test that discarding a row works even if that row happens to be an
 * overflow row.
 */
@Test
public void testSkipOverflowRow() {
    TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).buildSchema();
    ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    rsLoader.startBatch();
    byte[] value = new byte[512];
    Arrays.fill(value, (byte) 'X');
    int count = 0;
    while (!rootWriter.isFull()) {
        rootWriter.start();
        rootWriter.scalar(0).setInt(count);
        rootWriter.scalar(1).setBytes(value, value.length);
        if (!rootWriter.isFull()) {
            rootWriter.save();
        }
        count++;
    }
    // Discard the results.
    rsLoader.harvest().zeroVectors();
    // Harvest the next batch. Will be empty (because overflow row
    // was discarded.)
    rsLoader.startBatch();
    RowSet result = fixture.wrap(rsLoader.harvest());
    assertEquals(0, result.rowCount());
    result.clear();
    rsLoader.close();
}
Also used : ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 28 with RowSetLoader

use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.

the class TestResultSetLoaderOverflow method testBatchSizeLimit.

/**
 * Test that the writer detects a vector overflow. The offending column
 * value should be moved to the next batch.
 */
@Test
public void testBatchSizeLimit() {
    TupleMetadata schema = new SchemaBuilder().add("s", MinorType.VARCHAR).buildSchema();
    ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).setBatchSizeLimit(// Data
    8 * 1024 * 1024 + // Offsets, doubled because of +1
    2 * ValueVector.MAX_ROW_COUNT * 4).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    rsLoader.startBatch();
    byte[] value = new byte[512];
    Arrays.fill(value, (byte) 'X');
    int count = 0;
    while (!rootWriter.isFull()) {
        rootWriter.start();
        rootWriter.scalar(0).setBytes(value, value.length);
        rootWriter.save();
        count++;
    }
    // Our row count should include the overflow row
    int expectedCount = 8 * 1024 * 1024 / value.length;
    assertEquals(expectedCount + 1, count);
    // Loader's row count should include only "visible" rows
    assertEquals(expectedCount, rootWriter.rowCount());
    // Total count should include invisible and look-ahead rows.
    assertEquals(expectedCount + 1, rsLoader.totalRowCount());
    // Result should exclude the overflow row
    RowSet result = fixture.wrap(rsLoader.harvest());
    assertEquals(expectedCount, result.rowCount());
    result.clear();
    // Next batch should start with the overflow row
    rsLoader.startBatch();
    assertEquals(1, rootWriter.rowCount());
    assertEquals(expectedCount + 1, rsLoader.totalRowCount());
    result = fixture.wrap(rsLoader.harvest());
    assertEquals(1, result.rowCount());
    result.clear();
    rsLoader.close();
}
Also used : ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RowSet(org.apache.drill.test.rowSet.RowSet) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) ResultSetOptions(org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 29 with RowSetLoader

use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.

the class TestResultSetLoaderOverflow method testLargeArray.

/**
 * Create an array that contains more than 64K values. Drill has no numeric
 * limit on array lengths. (Well, it does, but the limit is about 2 billion
 * which, even for bytes, is too large to fit into a vector...)
 */
@Test
public void testLargeArray() {
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator());
    RowSetLoader rootWriter = rsLoader.writer();
    MaterializedField field = SchemaBuilder.columnSchema("a", MinorType.INT, DataMode.REPEATED);
    rootWriter.addColumn(field);
    // Create a single array as the column value in the first row. When
    // this overflows, an exception is thrown since overflow is not possible.
    rsLoader.startBatch();
    rootWriter.start();
    ScalarWriter array = rootWriter.array(0).scalar();
    try {
        for (int i = 0; i < Integer.MAX_VALUE; i++) {
            array.setInt(i + 1);
        }
        fail();
    } catch (UserException e) {
    // Expected
    }
    rsLoader.close();
}
Also used : ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) MaterializedField(org.apache.drill.exec.record.MaterializedField) UserException(org.apache.drill.common.exceptions.UserException) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) ScalarWriter(org.apache.drill.exec.vector.accessor.ScalarWriter) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 30 with RowSetLoader

use of org.apache.drill.exec.physical.rowSet.RowSetLoader in project drill by axbaretto.

the class TestResultSetLoaderOverflow method testMissingArrayValues.

/**
 * Test the case that an array has "missing values" before the overflow.
 */
@Test
public void testMissingArrayValues() {
    TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addArray("c", MinorType.INT).buildSchema();
    ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    byte[] value = new byte[512];
    Arrays.fill(value, (byte) 'X');
    int blankAfter = ValueVector.MAX_BUFFER_SIZE / 512 * 2 / 3;
    ScalarWriter cWriter = rootWriter.array("c").scalar();
    rsLoader.startBatch();
    int rowId = 0;
    while (rootWriter.start()) {
        rootWriter.scalar("a").setInt(rowId);
        rootWriter.scalar("b").setBytes(value, value.length);
        if (rowId < blankAfter) {
            for (int i = 0; i < 3; i++) {
                cWriter.setInt(rowId * 3 + i);
            }
        }
        rootWriter.save();
        rowId++;
    }
    RowSet result = fixture.wrap(rsLoader.harvest());
    assertEquals(rowId - 1, result.rowCount());
    RowSetReader reader = result.reader();
    ScalarElementReader cReader = reader.array("c").elements();
    while (reader.next()) {
        assertEquals(reader.rowIndex(), reader.scalar("a").getInt());
        assertTrue(Arrays.equals(value, reader.scalar("b").getBytes()));
        if (reader.rowIndex() < blankAfter) {
            assertEquals(3, cReader.size());
            for (int i = 0; i < 3; i++) {
                assertEquals(reader.rowIndex() * 3 + i, cReader.getInt(i));
            }
        } else {
            assertEquals(0, cReader.size());
        }
    }
    result.clear();
    rsLoader.close();
}
Also used : ScalarElementReader(org.apache.drill.exec.vector.accessor.ScalarElementReader) RowSet(org.apache.drill.test.rowSet.RowSet) ResultSetOptions(org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions) ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) RowSetReader(org.apache.drill.test.rowSet.RowSetReader) ScalarWriter(org.apache.drill.exec.vector.accessor.ScalarWriter) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

RowSetLoader (org.apache.drill.exec.physical.rowSet.RowSetLoader)45 ResultSetLoader (org.apache.drill.exec.physical.rowSet.ResultSetLoader)44 SubOperatorTest (org.apache.drill.test.SubOperatorTest)43 Test (org.junit.Test)43 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)38 SchemaBuilder (org.apache.drill.test.rowSet.schema.SchemaBuilder)38 RowSet (org.apache.drill.test.rowSet.RowSet)35 SingleRowSet (org.apache.drill.test.rowSet.RowSet.SingleRowSet)29 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)18 ResultSetOptions (org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions)15 TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)14 ScalarWriter (org.apache.drill.exec.vector.accessor.ScalarWriter)13 RowSetReader (org.apache.drill.test.rowSet.RowSetReader)12 BatchSchema (org.apache.drill.exec.record.BatchSchema)7 TupleReader (org.apache.drill.exec.vector.accessor.TupleReader)5 SchemaPath (org.apache.drill.common.expression.SchemaPath)4 ArrayWriter (org.apache.drill.exec.vector.accessor.ArrayWriter)4 ScalarElementReader (org.apache.drill.exec.vector.accessor.ScalarElementReader)4 MaterializedField (org.apache.drill.exec.record.MaterializedField)3 ArrayReader (org.apache.drill.exec.vector.accessor.ArrayReader)3