Search in sources :

Example 96 with SchemaBuilder

use of org.apache.drill.test.rowSet.schema.SchemaBuilder in project drill by axbaretto.

the class TestResultSetLoaderOmittedValues method testSkipOverflowRow.

/**
 * Test that discarding a row works even if that row happens to be an
 * overflow row.
 */
@Test
public void testSkipOverflowRow() {
    TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).buildSchema();
    ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    rsLoader.startBatch();
    byte[] value = new byte[512];
    Arrays.fill(value, (byte) 'X');
    int count = 0;
    while (!rootWriter.isFull()) {
        rootWriter.start();
        rootWriter.scalar(0).setInt(count);
        rootWriter.scalar(1).setBytes(value, value.length);
        if (!rootWriter.isFull()) {
            rootWriter.save();
        }
        count++;
    }
    // Discard the results.
    rsLoader.harvest().zeroVectors();
    // Harvest the next batch. Will be empty (because overflow row
    // was discarded.)
    rsLoader.startBatch();
    RowSet result = fixture.wrap(rsLoader.harvest());
    assertEquals(0, result.rowCount());
    result.clear();
    rsLoader.close();
}
Also used : ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 97 with SchemaBuilder

use of org.apache.drill.test.rowSet.schema.SchemaBuilder in project drill by axbaretto.

the class TestResultSetLoaderOverflow method testBatchSizeLimit.

/**
 * Test that the writer detects a vector overflow. The offending column
 * value should be moved to the next batch.
 */
@Test
public void testBatchSizeLimit() {
    TupleMetadata schema = new SchemaBuilder().add("s", MinorType.VARCHAR).buildSchema();
    ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).setBatchSizeLimit(// Data
    8 * 1024 * 1024 + // Offsets, doubled because of +1
    2 * ValueVector.MAX_ROW_COUNT * 4).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    rsLoader.startBatch();
    byte[] value = new byte[512];
    Arrays.fill(value, (byte) 'X');
    int count = 0;
    while (!rootWriter.isFull()) {
        rootWriter.start();
        rootWriter.scalar(0).setBytes(value, value.length);
        rootWriter.save();
        count++;
    }
    // Our row count should include the overflow row
    int expectedCount = 8 * 1024 * 1024 / value.length;
    assertEquals(expectedCount + 1, count);
    // Loader's row count should include only "visible" rows
    assertEquals(expectedCount, rootWriter.rowCount());
    // Total count should include invisible and look-ahead rows.
    assertEquals(expectedCount + 1, rsLoader.totalRowCount());
    // Result should exclude the overflow row
    RowSet result = fixture.wrap(rsLoader.harvest());
    assertEquals(expectedCount, result.rowCount());
    result.clear();
    // Next batch should start with the overflow row
    rsLoader.startBatch();
    assertEquals(1, rootWriter.rowCount());
    assertEquals(expectedCount + 1, rsLoader.totalRowCount());
    result = fixture.wrap(rsLoader.harvest());
    assertEquals(1, result.rowCount());
    result.clear();
    rsLoader.close();
}
Also used : ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RowSet(org.apache.drill.test.rowSet.RowSet) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) ResultSetOptions(org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 98 with SchemaBuilder

use of org.apache.drill.test.rowSet.schema.SchemaBuilder in project drill by axbaretto.

the class TestResultSetLoaderOverflow method testMissingArrayValues.

/**
 * Test the case that an array has "missing values" before the overflow.
 */
@Test
public void testMissingArrayValues() {
    TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addArray("c", MinorType.INT).buildSchema();
    ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    byte[] value = new byte[512];
    Arrays.fill(value, (byte) 'X');
    int blankAfter = ValueVector.MAX_BUFFER_SIZE / 512 * 2 / 3;
    ScalarWriter cWriter = rootWriter.array("c").scalar();
    rsLoader.startBatch();
    int rowId = 0;
    while (rootWriter.start()) {
        rootWriter.scalar("a").setInt(rowId);
        rootWriter.scalar("b").setBytes(value, value.length);
        if (rowId < blankAfter) {
            for (int i = 0; i < 3; i++) {
                cWriter.setInt(rowId * 3 + i);
            }
        }
        rootWriter.save();
        rowId++;
    }
    RowSet result = fixture.wrap(rsLoader.harvest());
    assertEquals(rowId - 1, result.rowCount());
    RowSetReader reader = result.reader();
    ScalarElementReader cReader = reader.array("c").elements();
    while (reader.next()) {
        assertEquals(reader.rowIndex(), reader.scalar("a").getInt());
        assertTrue(Arrays.equals(value, reader.scalar("b").getBytes()));
        if (reader.rowIndex() < blankAfter) {
            assertEquals(3, cReader.size());
            for (int i = 0; i < 3; i++) {
                assertEquals(reader.rowIndex() * 3 + i, cReader.getInt(i));
            }
        } else {
            assertEquals(0, cReader.size());
        }
    }
    result.clear();
    rsLoader.close();
}
Also used : ScalarElementReader(org.apache.drill.exec.vector.accessor.ScalarElementReader) RowSet(org.apache.drill.test.rowSet.RowSet) ResultSetOptions(org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions) ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) RowSetReader(org.apache.drill.test.rowSet.RowSetReader) ScalarWriter(org.apache.drill.exec.vector.accessor.ScalarWriter) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 99 with SchemaBuilder

use of org.apache.drill.test.rowSet.schema.SchemaBuilder in project drill by axbaretto.

the class TestResultSetLoaderOverflow method testArrayOverflowWithOtherArrays.

/**
 * Test the complete set of array overflow cases:
 * <ul>
 * <li>Array a is written before the column that has overflow,
 * and must be copied, in its entirety, to the overflow row.</li>
 * <li>Column b causes the overflow.</li>
 * <li>Column c is written after the overflow, and should go
 * to the look-ahead row.</li>
 * <li>Column d is written for a while, then has empties before
 * the overflow row, but is written in the overflow row.<li>
 * <li>Column e is like d, but is not written in the overflow
 * row.</li>
 */
@Test
public void testArrayOverflowWithOtherArrays() {
    TupleMetadata schema = new SchemaBuilder().addArray("a", MinorType.INT).addArray("b", MinorType.VARCHAR).addArray("c", MinorType.INT).addArray("d", MinorType.INT).buildSchema();
    ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    // Fill batch with rows of with a single array, three values each. Tack on
    // a suffix to each so we can be sure the proper data is written and moved
    // to the overflow batch.
    byte[] value = new byte[512];
    Arrays.fill(value, (byte) 'X');
    String strValue = new String(value, Charsets.UTF_8);
    int aCount = 3;
    int bCount = 11;
    int cCount = 5;
    int dCount = 7;
    int cCutoff = ValueVector.MAX_BUFFER_SIZE / value.length / bCount / 2;
    ScalarWriter aWriter = rootWriter.array("a").scalar();
    ScalarWriter bWriter = rootWriter.array("b").scalar();
    ScalarWriter cWriter = rootWriter.array("c").scalar();
    ScalarWriter dWriter = rootWriter.array("d").scalar();
    int count = 0;
    rsLoader.startBatch();
    while (rootWriter.start()) {
        if (rootWriter.rowCount() == 2952) {
            count = count + 0;
        }
        for (int i = 0; i < aCount; i++) {
            aWriter.setInt(count * aCount + i);
        }
        for (int i = 0; i < bCount; i++) {
            String cellValue = strValue + (count * bCount + i);
            bWriter.setString(cellValue);
        }
        if (count < cCutoff) {
            for (int i = 0; i < cCount; i++) {
                cWriter.setInt(count * cCount + i);
            }
        }
        if (count < cCutoff || rootWriter.isFull()) {
            for (int i = 0; i < dCount; i++) {
                dWriter.setInt(count * dCount + i);
            }
        }
        rootWriter.save();
        count++;
    }
    // Verify
    RowSet result = fixture.wrap(rsLoader.harvest());
    assertEquals(count - 1, result.rowCount());
    RowSetReader reader = result.reader();
    ScalarElementReader aReader = reader.array("a").elements();
    ScalarElementReader bReader = reader.array("b").elements();
    ScalarElementReader cReader = reader.array("c").elements();
    ScalarElementReader dReader = reader.array("d").elements();
    while (reader.next()) {
        int rowId = reader.rowIndex();
        assertEquals(aCount, aReader.size());
        for (int i = 0; i < aCount; i++) {
            assertEquals(rowId * aCount + i, aReader.getInt(i));
        }
        assertEquals(bCount, bReader.size());
        for (int i = 0; i < bCount; i++) {
            String cellValue = strValue + (rowId * bCount + i);
            assertEquals(cellValue, bReader.getString(i));
        }
        if (rowId < cCutoff) {
            assertEquals(cCount, cReader.size());
            for (int i = 0; i < cCount; i++) {
                assertEquals(rowId * cCount + i, cReader.getInt(i));
            }
            assertEquals(dCount, dReader.size());
            for (int i = 0; i < dCount; i++) {
                assertEquals(rowId * dCount + i, dReader.getInt(i));
            }
        } else {
            assertEquals(0, cReader.size());
            assertEquals(0, dReader.size());
        }
    }
    result.clear();
    int firstCount = count - 1;
    // One row is in the batch. Write more, skipping over the
    // initial few values for columns c and d. Column d has a
    // roll-over value, c has an empty roll-over.
    rsLoader.startBatch();
    for (int j = 0; j < 5; j++) {
        rootWriter.start();
        for (int i = 0; i < aCount; i++) {
            aWriter.setInt(count * aCount + i);
        }
        for (int i = 0; i < bCount; i++) {
            String cellValue = strValue + (count * bCount + i);
            bWriter.setString(cellValue);
        }
        if (j > 3) {
            for (int i = 0; i < cCount; i++) {
                cWriter.setInt(count * cCount + i);
            }
            for (int i = 0; i < dCount; i++) {
                dWriter.setInt(count * dCount + i);
            }
        }
        rootWriter.save();
        count++;
    }
    result = fixture.wrap(rsLoader.harvest());
    assertEquals(6, result.rowCount());
    reader = result.reader();
    aReader = reader.array("a").elements();
    bReader = reader.array("b").elements();
    cReader = reader.array("c").elements();
    dReader = reader.array("d").elements();
    int j = 0;
    while (reader.next()) {
        int rowId = firstCount + reader.rowIndex();
        assertEquals(aCount, aReader.size());
        for (int i = 0; i < aCount; i++) {
            assertEquals("Index " + i, rowId * aCount + i, aReader.getInt(i));
        }
        assertEquals(bCount, bReader.size());
        for (int i = 0; i < bCount; i++) {
            String cellValue = strValue + (rowId * bCount + i);
            assertEquals(cellValue, bReader.getString(i));
        }
        if (j > 4) {
            assertEquals(cCount, cReader.size());
            for (int i = 0; i < cCount; i++) {
                assertEquals(rowId * cCount + i, cReader.getInt(i));
            }
        } else {
            assertEquals(0, cReader.size());
        }
        if (j == 0 || j > 4) {
            assertEquals(dCount, dReader.size());
            for (int i = 0; i < dCount; i++) {
                assertEquals(rowId * dCount + i, dReader.getInt(i));
            }
        } else {
            assertEquals(0, dReader.size());
        }
        j++;
    }
    result.clear();
    rsLoader.close();
}
Also used : ScalarElementReader(org.apache.drill.exec.vector.accessor.ScalarElementReader) RowSet(org.apache.drill.test.rowSet.RowSet) ResultSetOptions(org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions) ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) RowSetReader(org.apache.drill.test.rowSet.RowSetReader) ScalarWriter(org.apache.drill.exec.vector.accessor.ScalarWriter) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 100 with SchemaBuilder

use of org.apache.drill.test.rowSet.schema.SchemaBuilder in project drill by axbaretto.

the class TestResultSetLoaderOverflow method testOverflowWithNullables.

@Test
public void testOverflowWithNullables() {
    TupleMetadata schema = new SchemaBuilder().add("n", MinorType.INT).addNullable("a", MinorType.VARCHAR).addNullable("b", MinorType.VARCHAR).addNullable("c", MinorType.VARCHAR).buildSchema();
    ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    rsLoader.startBatch();
    byte[] value = new byte[512];
    Arrays.fill(value, (byte) 'X');
    int count = 0;
    while (!rootWriter.isFull()) {
        rootWriter.start();
        rootWriter.scalar(0).setInt(count);
        rootWriter.scalar(1).setNull();
        rootWriter.scalar(2).setBytes(value, value.length);
        rootWriter.scalar(3).setNull();
        rootWriter.save();
        count++;
    }
    // Result should exclude the overflow row
    RowSet result = fixture.wrap(rsLoader.harvest());
    assertEquals(count - 1, result.rowCount());
    RowSetReader reader = result.reader();
    while (reader.next()) {
        assertEquals(reader.rowIndex(), reader.scalar(0).getInt());
        assertTrue(reader.scalar(1).isNull());
        assertTrue(Arrays.equals(value, reader.scalar(2).getBytes()));
        assertTrue(reader.scalar(3).isNull());
    }
    result.clear();
    // Next batch should start with the overflow row
    rsLoader.startBatch();
    result = fixture.wrap(rsLoader.harvest());
    reader = result.reader();
    assertEquals(1, result.rowCount());
    assertTrue(reader.next());
    assertEquals(count - 1, reader.scalar(0).getInt());
    assertTrue(reader.scalar(1).isNull());
    assertTrue(Arrays.equals(value, reader.scalar(2).getBytes()));
    assertTrue(reader.scalar(3).isNull());
    result.clear();
    rsLoader.close();
}
Also used : ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RowSet(org.apache.drill.test.rowSet.RowSet) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) RowSetReader(org.apache.drill.test.rowSet.RowSetReader) ResultSetOptions(org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

SchemaBuilder (org.apache.drill.test.rowSet.schema.SchemaBuilder)175 Test (org.junit.Test)154 BatchSchema (org.apache.drill.exec.record.BatchSchema)102 SingleRowSet (org.apache.drill.test.rowSet.RowSet.SingleRowSet)91 SubOperatorTest (org.apache.drill.test.SubOperatorTest)86 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)65 RowSet (org.apache.drill.test.rowSet.RowSet)52 RowSetReader (org.apache.drill.test.rowSet.RowSetReader)50 ResultSetLoader (org.apache.drill.exec.physical.rowSet.ResultSetLoader)38 RowSetLoader (org.apache.drill.exec.physical.rowSet.RowSetLoader)38 ScalarReader (org.apache.drill.exec.vector.accessor.ScalarReader)26 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)25 DrillTest (org.apache.drill.test.DrillTest)21 ColumnMetadata (org.apache.drill.exec.record.metadata.ColumnMetadata)20 ScalarWriter (org.apache.drill.exec.vector.accessor.ScalarWriter)18 RowSetBuilder (org.apache.drill.test.rowSet.RowSetBuilder)18 ScalarElementReader (org.apache.drill.exec.vector.accessor.ScalarElementReader)17 AbstractColumnMetadata (org.apache.drill.exec.record.metadata.AbstractColumnMetadata)16 TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)16 RecordBatch (org.apache.drill.exec.record.RecordBatch)14