Search in sources :

Example 31 with ResultSetLoader

use of org.apache.drill.exec.physical.rowSet.ResultSetLoader in project drill by axbaretto.

the class TestResultSetLoaderOverflow method testArrayOverflowWithOtherArrays.

/**
 * Test the complete set of array overflow cases:
 * <ul>
 * <li>Array a is written before the column that has overflow,
 * and must be copied, in its entirety, to the overflow row.</li>
 * <li>Column b causes the overflow.</li>
 * <li>Column c is written after the overflow, and should go
 * to the look-ahead row.</li>
 * <li>Column d is written for a while, then has empties before
 * the overflow row, but is written in the overflow row.<li>
 * <li>Column e is like d, but is not written in the overflow
 * row.</li>
 */
@Test
public void testArrayOverflowWithOtherArrays() {
    TupleMetadata schema = new SchemaBuilder().addArray("a", MinorType.INT).addArray("b", MinorType.VARCHAR).addArray("c", MinorType.INT).addArray("d", MinorType.INT).buildSchema();
    ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    // Fill batch with rows of with a single array, three values each. Tack on
    // a suffix to each so we can be sure the proper data is written and moved
    // to the overflow batch.
    byte[] value = new byte[512];
    Arrays.fill(value, (byte) 'X');
    String strValue = new String(value, Charsets.UTF_8);
    int aCount = 3;
    int bCount = 11;
    int cCount = 5;
    int dCount = 7;
    int cCutoff = ValueVector.MAX_BUFFER_SIZE / value.length / bCount / 2;
    ScalarWriter aWriter = rootWriter.array("a").scalar();
    ScalarWriter bWriter = rootWriter.array("b").scalar();
    ScalarWriter cWriter = rootWriter.array("c").scalar();
    ScalarWriter dWriter = rootWriter.array("d").scalar();
    int count = 0;
    rsLoader.startBatch();
    while (rootWriter.start()) {
        if (rootWriter.rowCount() == 2952) {
            count = count + 0;
        }
        for (int i = 0; i < aCount; i++) {
            aWriter.setInt(count * aCount + i);
        }
        for (int i = 0; i < bCount; i++) {
            String cellValue = strValue + (count * bCount + i);
            bWriter.setString(cellValue);
        }
        if (count < cCutoff) {
            for (int i = 0; i < cCount; i++) {
                cWriter.setInt(count * cCount + i);
            }
        }
        if (count < cCutoff || rootWriter.isFull()) {
            for (int i = 0; i < dCount; i++) {
                dWriter.setInt(count * dCount + i);
            }
        }
        rootWriter.save();
        count++;
    }
    // Verify
    RowSet result = fixture.wrap(rsLoader.harvest());
    assertEquals(count - 1, result.rowCount());
    RowSetReader reader = result.reader();
    ScalarElementReader aReader = reader.array("a").elements();
    ScalarElementReader bReader = reader.array("b").elements();
    ScalarElementReader cReader = reader.array("c").elements();
    ScalarElementReader dReader = reader.array("d").elements();
    while (reader.next()) {
        int rowId = reader.rowIndex();
        assertEquals(aCount, aReader.size());
        for (int i = 0; i < aCount; i++) {
            assertEquals(rowId * aCount + i, aReader.getInt(i));
        }
        assertEquals(bCount, bReader.size());
        for (int i = 0; i < bCount; i++) {
            String cellValue = strValue + (rowId * bCount + i);
            assertEquals(cellValue, bReader.getString(i));
        }
        if (rowId < cCutoff) {
            assertEquals(cCount, cReader.size());
            for (int i = 0; i < cCount; i++) {
                assertEquals(rowId * cCount + i, cReader.getInt(i));
            }
            assertEquals(dCount, dReader.size());
            for (int i = 0; i < dCount; i++) {
                assertEquals(rowId * dCount + i, dReader.getInt(i));
            }
        } else {
            assertEquals(0, cReader.size());
            assertEquals(0, dReader.size());
        }
    }
    result.clear();
    int firstCount = count - 1;
    // One row is in the batch. Write more, skipping over the
    // initial few values for columns c and d. Column d has a
    // roll-over value, c has an empty roll-over.
    rsLoader.startBatch();
    for (int j = 0; j < 5; j++) {
        rootWriter.start();
        for (int i = 0; i < aCount; i++) {
            aWriter.setInt(count * aCount + i);
        }
        for (int i = 0; i < bCount; i++) {
            String cellValue = strValue + (count * bCount + i);
            bWriter.setString(cellValue);
        }
        if (j > 3) {
            for (int i = 0; i < cCount; i++) {
                cWriter.setInt(count * cCount + i);
            }
            for (int i = 0; i < dCount; i++) {
                dWriter.setInt(count * dCount + i);
            }
        }
        rootWriter.save();
        count++;
    }
    result = fixture.wrap(rsLoader.harvest());
    assertEquals(6, result.rowCount());
    reader = result.reader();
    aReader = reader.array("a").elements();
    bReader = reader.array("b").elements();
    cReader = reader.array("c").elements();
    dReader = reader.array("d").elements();
    int j = 0;
    while (reader.next()) {
        int rowId = firstCount + reader.rowIndex();
        assertEquals(aCount, aReader.size());
        for (int i = 0; i < aCount; i++) {
            assertEquals("Index " + i, rowId * aCount + i, aReader.getInt(i));
        }
        assertEquals(bCount, bReader.size());
        for (int i = 0; i < bCount; i++) {
            String cellValue = strValue + (rowId * bCount + i);
            assertEquals(cellValue, bReader.getString(i));
        }
        if (j > 4) {
            assertEquals(cCount, cReader.size());
            for (int i = 0; i < cCount; i++) {
                assertEquals(rowId * cCount + i, cReader.getInt(i));
            }
        } else {
            assertEquals(0, cReader.size());
        }
        if (j == 0 || j > 4) {
            assertEquals(dCount, dReader.size());
            for (int i = 0; i < dCount; i++) {
                assertEquals(rowId * dCount + i, dReader.getInt(i));
            }
        } else {
            assertEquals(0, dReader.size());
        }
        j++;
    }
    result.clear();
    rsLoader.close();
}
Also used : ScalarElementReader(org.apache.drill.exec.vector.accessor.ScalarElementReader) RowSet(org.apache.drill.test.rowSet.RowSet) ResultSetOptions(org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions) ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) RowSetReader(org.apache.drill.test.rowSet.RowSetReader) ScalarWriter(org.apache.drill.exec.vector.accessor.ScalarWriter) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 32 with ResultSetLoader

use of org.apache.drill.exec.physical.rowSet.ResultSetLoader in project drill by axbaretto.

the class TestResultSetLoaderOverflow method testOverflowWithNullables.

@Test
public void testOverflowWithNullables() {
    TupleMetadata schema = new SchemaBuilder().add("n", MinorType.INT).addNullable("a", MinorType.VARCHAR).addNullable("b", MinorType.VARCHAR).addNullable("c", MinorType.VARCHAR).buildSchema();
    ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    rsLoader.startBatch();
    byte[] value = new byte[512];
    Arrays.fill(value, (byte) 'X');
    int count = 0;
    while (!rootWriter.isFull()) {
        rootWriter.start();
        rootWriter.scalar(0).setInt(count);
        rootWriter.scalar(1).setNull();
        rootWriter.scalar(2).setBytes(value, value.length);
        rootWriter.scalar(3).setNull();
        rootWriter.save();
        count++;
    }
    // Result should exclude the overflow row
    RowSet result = fixture.wrap(rsLoader.harvest());
    assertEquals(count - 1, result.rowCount());
    RowSetReader reader = result.reader();
    while (reader.next()) {
        assertEquals(reader.rowIndex(), reader.scalar(0).getInt());
        assertTrue(reader.scalar(1).isNull());
        assertTrue(Arrays.equals(value, reader.scalar(2).getBytes()));
        assertTrue(reader.scalar(3).isNull());
    }
    result.clear();
    // Next batch should start with the overflow row
    rsLoader.startBatch();
    result = fixture.wrap(rsLoader.harvest());
    reader = result.reader();
    assertEquals(1, result.rowCount());
    assertTrue(reader.next());
    assertEquals(count - 1, reader.scalar(0).getInt());
    assertTrue(reader.scalar(1).isNull());
    assertTrue(Arrays.equals(value, reader.scalar(2).getBytes()));
    assertTrue(reader.scalar(3).isNull());
    result.clear();
    rsLoader.close();
}
Also used : ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RowSet(org.apache.drill.test.rowSet.RowSet) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) RowSetReader(org.apache.drill.test.rowSet.RowSetReader) ResultSetOptions(org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 33 with ResultSetLoader

use of org.apache.drill.exec.physical.rowSet.ResultSetLoader in project drill by axbaretto.

the class TestResultSetLoaderProjection method testMapProjection.

@Test
public void testMapProjection() {
    List<SchemaPath> selection = Lists.newArrayList(SchemaPath.getSimplePath("m1"), SchemaPath.getCompoundPath("m2", "d"));
    TupleMetadata schema = new SchemaBuilder().addMap("m1").add("a", MinorType.INT).add("b", MinorType.INT).resumeSchema().addMap("m2").add("c", MinorType.INT).add("d", MinorType.INT).resumeSchema().addMap("m3").add("e", MinorType.INT).add("f", MinorType.INT).resumeSchema().buildSchema();
    ResultSetOptions options = new OptionBuilder().setProjection(selection).setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    // Verify the projected columns
    TupleMetadata actualSchema = rootWriter.schema();
    ColumnMetadata m1Md = actualSchema.metadata("m1");
    assertTrue(m1Md.isMap());
    assertTrue(m1Md.isProjected());
    assertEquals(2, m1Md.mapSchema().size());
    assertTrue(m1Md.mapSchema().metadata("a").isProjected());
    assertTrue(m1Md.mapSchema().metadata("b").isProjected());
    ColumnMetadata m2Md = actualSchema.metadata("m2");
    assertTrue(m2Md.isMap());
    assertTrue(m2Md.isProjected());
    assertEquals(2, m2Md.mapSchema().size());
    assertFalse(m2Md.mapSchema().metadata("c").isProjected());
    assertTrue(m2Md.mapSchema().metadata("d").isProjected());
    ColumnMetadata m3Md = actualSchema.metadata("m3");
    assertTrue(m3Md.isMap());
    assertFalse(m3Md.isProjected());
    assertEquals(2, m3Md.mapSchema().size());
    assertFalse(m3Md.mapSchema().metadata("e").isProjected());
    assertFalse(m3Md.mapSchema().metadata("f").isProjected());
    // Write a couple of rows.
    rsLoader.startBatch();
    rootWriter.start();
    rootWriter.tuple("m1").scalar("a").setInt(1);
    rootWriter.tuple("m1").scalar("b").setInt(2);
    rootWriter.tuple("m2").scalar("c").setInt(3);
    rootWriter.tuple("m2").scalar("d").setInt(4);
    rootWriter.tuple("m3").scalar("e").setInt(5);
    rootWriter.tuple("m3").scalar("f").setInt(6);
    rootWriter.save();
    rootWriter.start();
    rootWriter.tuple("m1").scalar("a").setInt(11);
    rootWriter.tuple("m1").scalar("b").setInt(12);
    rootWriter.tuple("m2").scalar("c").setInt(13);
    rootWriter.tuple("m2").scalar("d").setInt(14);
    rootWriter.tuple("m3").scalar("e").setInt(15);
    rootWriter.tuple("m3").scalar("f").setInt(16);
    rootWriter.save();
    // Verify. Only the projected columns appear in the result set.
    BatchSchema expectedSchema = new SchemaBuilder().addMap("m1").add("a", MinorType.INT).add("b", MinorType.INT).resumeSchema().addMap("m2").add("d", MinorType.INT).resumeSchema().build();
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(objArray(1, 2), objArray(4)).addRow(objArray(11, 12), objArray(14)).build();
    new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(rsLoader.harvest()));
    rsLoader.close();
}
Also used : ColumnMetadata(org.apache.drill.exec.record.metadata.ColumnMetadata) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) ResultSetOptions(org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) SchemaPath(org.apache.drill.common.expression.SchemaPath) BatchSchema(org.apache.drill.exec.record.BatchSchema) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 34 with ResultSetLoader

use of org.apache.drill.exec.physical.rowSet.ResultSetLoader in project drill by axbaretto.

the class TestResultSetLoaderProjection method testMapArrayProjection.

/**
 * Test a map array. Use the convenience methods to set values.
 * Only the projected array members should appear in the harvested
 * results.
 */
@Test
public void testMapArrayProjection() {
    List<SchemaPath> selection = Lists.newArrayList(SchemaPath.getSimplePath("m1"), SchemaPath.getCompoundPath("m2", "d"));
    TupleMetadata schema = new SchemaBuilder().addMapArray("m1").add("a", MinorType.INT).add("b", MinorType.INT).resumeSchema().addMapArray("m2").add("c", MinorType.INT).add("d", MinorType.INT).resumeSchema().addMapArray("m3").add("e", MinorType.INT).add("f", MinorType.INT).resumeSchema().buildSchema();
    ResultSetOptions options = new OptionBuilder().setProjection(selection).setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    // Write a couple of rows.
    rsLoader.startBatch();
    rootWriter.addRow(objArray(objArray(10, 20), objArray(11, 21)), objArray(objArray(30, 40), objArray(31, 42)), objArray(objArray(50, 60), objArray(51, 62)));
    rootWriter.addRow(objArray(objArray(110, 120), objArray(111, 121)), objArray(objArray(130, 140), objArray(131, 142)), objArray(objArray(150, 160), objArray(151, 162)));
    // Verify. Only the projected columns appear in the result set.
    BatchSchema expectedSchema = new SchemaBuilder().addMapArray("m1").add("a", MinorType.INT).add("b", MinorType.INT).resumeSchema().addMapArray("m2").add("d", MinorType.INT).resumeSchema().build();
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(objArray(objArray(10, 20), objArray(11, 21)), objArray(objArray(40), objArray(42))).addRow(objArray(objArray(110, 120), objArray(111, 121)), objArray(objArray(140), objArray(142))).build();
    new RowSetComparison(expected).verifyAndClearAll(fixture.wrap(rsLoader.harvest()));
    rsLoader.close();
}
Also used : SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) SchemaPath(org.apache.drill.common.expression.SchemaPath) BatchSchema(org.apache.drill.exec.record.BatchSchema) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) ResultSetOptions(org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 35 with ResultSetLoader

use of org.apache.drill.exec.physical.rowSet.ResultSetLoader in project drill by axbaretto.

the class TestResultSetLoaderProjection method testProjectWithOverflow.

/**
 * Verify that the projection code plays nice with vector overflow. Overflow
 * is the most complex operation in this subsystem with many specialized
 * methods that must work together flawlessly. This test ensures that
 * non-projected columns stay in the background and don't interfere
 * with overflow logic.
 */
@Test
public void testProjectWithOverflow() {
    List<SchemaPath> selection = Lists.newArrayList(SchemaPath.getSimplePath("small"), SchemaPath.getSimplePath("dummy"));
    TupleMetadata schema = new SchemaBuilder().add("big", MinorType.VARCHAR).add("small", MinorType.VARCHAR).buildSchema();
    ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setProjection(selection).setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    byte[] big = new byte[600];
    Arrays.fill(big, (byte) 'X');
    byte[] small = new byte[512];
    Arrays.fill(small, (byte) 'X');
    rsLoader.startBatch();
    int count = 0;
    while (!rootWriter.isFull()) {
        rootWriter.start();
        rootWriter.scalar(0).setBytes(big, big.length);
        rootWriter.scalar(1).setBytes(small, small.length);
        rootWriter.save();
        count++;
    }
    // Number of rows should be driven by size of the
    // projected vector ("small"), not by the larger, unprojected
    // "big" vector.
    // Our row count should include the overflow row
    int expectedCount = ValueVector.MAX_BUFFER_SIZE / small.length;
    assertEquals(expectedCount + 1, count);
    // Loader's row count should include only "visible" rows
    assertEquals(expectedCount, rootWriter.rowCount());
    // Total count should include invisible and look-ahead rows.
    assertEquals(expectedCount + 1, rsLoader.totalRowCount());
    // Result should exclude the overflow row
    RowSet result = fixture.wrap(rsLoader.harvest());
    assertEquals(expectedCount, result.rowCount());
    result.clear();
    // Next batch should start with the overflow row
    rsLoader.startBatch();
    assertEquals(1, rootWriter.rowCount());
    assertEquals(expectedCount + 1, rsLoader.totalRowCount());
    result = fixture.wrap(rsLoader.harvest());
    assertEquals(1, result.rowCount());
    result.clear();
    rsLoader.close();
}
Also used : ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) SchemaPath(org.apache.drill.common.expression.SchemaPath) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) ResultSetOptions(org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

ResultSetLoader (org.apache.drill.exec.physical.rowSet.ResultSetLoader)45 RowSetLoader (org.apache.drill.exec.physical.rowSet.RowSetLoader)44 SubOperatorTest (org.apache.drill.test.SubOperatorTest)44 Test (org.junit.Test)44 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)38 SchemaBuilder (org.apache.drill.test.rowSet.schema.SchemaBuilder)38 RowSet (org.apache.drill.test.rowSet.RowSet)34 SingleRowSet (org.apache.drill.test.rowSet.RowSet.SingleRowSet)28 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)17 ResultSetOptions (org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions)16 TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)14 ScalarWriter (org.apache.drill.exec.vector.accessor.ScalarWriter)13 RowSetReader (org.apache.drill.test.rowSet.RowSetReader)12 BatchSchema (org.apache.drill.exec.record.BatchSchema)6 SchemaPath (org.apache.drill.common.expression.SchemaPath)5 TupleReader (org.apache.drill.exec.vector.accessor.TupleReader)5 ArrayWriter (org.apache.drill.exec.vector.accessor.ArrayWriter)4 ScalarElementReader (org.apache.drill.exec.vector.accessor.ScalarElementReader)4 MaterializedField (org.apache.drill.exec.record.MaterializedField)3 ArrayReader (org.apache.drill.exec.vector.accessor.ArrayReader)3