Search in sources :

Example 91 with SchemaBuilder

use of org.apache.drill.test.rowSet.schema.SchemaBuilder in project drill by axbaretto.

the class TestResultSetLoaderMaps method testMapAddition.

/**
 * Test adding a map to a loader after writing the first row.
 */
@Test
public void testMapAddition() {
    TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).buildSchema();
    ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    assertEquals(1, rsLoader.schemaVersion());
    RowSetLoader rootWriter = rsLoader.writer();
    // Start without the map. Add a map after the first row.
    rsLoader.startBatch();
    rootWriter.addRow(10);
    int mapIndex = rootWriter.addColumn(SchemaBuilder.columnSchema("m", MinorType.MAP, DataMode.REQUIRED));
    TupleWriter mapWriter = rootWriter.tuple(mapIndex);
    // Add a column to the map with the same name as the top-level column.
    // Verifies that the name spaces are independent.
    mapWriter.addColumn(SchemaBuilder.columnSchema("a", MinorType.VARCHAR, DataMode.REQUIRED));
    rootWriter.addRow(20, objArray("fred")).addRow(30, objArray("barney"));
    RowSet actual = fixture.wrap(rsLoader.harvest());
    assertEquals(3, rsLoader.schemaVersion());
    assertEquals(3, actual.rowCount());
    // Validate first batch
    TupleMetadata expectedSchema = new SchemaBuilder().add("a", MinorType.INT).addMap("m").add("a", MinorType.VARCHAR).resumeSchema().buildSchema();
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(10, objArray("")).addRow(20, objArray("fred")).addRow(30, objArray("barney")).build();
    new RowSetComparison(expected).verifyAndClearAll(actual);
    rsLoader.close();
}
Also used : SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleWriter(org.apache.drill.exec.vector.accessor.TupleWriter) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 92 with SchemaBuilder

use of org.apache.drill.test.rowSet.schema.SchemaBuilder in project drill by axbaretto.

the class TestResultSetLoaderMaps method testMapWithArray.

/**
 * Test a map that contains a scalar array. No reason to suspect that this
 * will have problem as the array writer is fully tested in the accessor
 * subsystem. Still, need to test the cardinality methods of the loader
 * layer.
 */
@Test
public void testMapWithArray() {
    TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addMap("m").addArray("c", MinorType.INT).addArray("d", MinorType.VARCHAR).resumeSchema().buildSchema();
    ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    // Write some rows
    rsLoader.startBatch();
    rootWriter.addRow(10, objArray(intArray(110, 120, 130), strArray("d1.1", "d1.2", "d1.3", "d1.4"))).addRow(20, objArray(intArray(210), strArray())).addRow(30, objArray(intArray(), strArray("d3.1")));
    // Validate first batch
    RowSet actual = fixture.wrap(rsLoader.harvest());
    SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(10, objArray(intArray(110, 120, 130), strArray("d1.1", "d1.2", "d1.3", "d1.4"))).addRow(20, objArray(intArray(210), strArray())).addRow(30, objArray(intArray(), strArray("d3.1"))).build();
    new RowSetComparison(expected).verifyAndClearAll(actual);
    // Add another array after the first row in the second batch.
    rsLoader.startBatch();
    rootWriter.addRow(40, objArray(intArray(410, 420), strArray("d4.1", "d4.2"))).addRow(50, objArray(intArray(510), strArray("d5.1")));
    TupleWriter mapWriter = rootWriter.tuple("m");
    mapWriter.addColumn(SchemaBuilder.columnSchema("e", MinorType.VARCHAR, DataMode.REPEATED));
    rootWriter.addRow(60, objArray(intArray(610, 620), strArray("d6.1", "d6.2"), strArray("e6.1", "e6.2"))).addRow(70, objArray(intArray(710), strArray(), strArray("e7.1", "e7.2")));
    // Validate first batch. The new array should have been back-filled with
    // empty offsets for the missing rows.
    actual = fixture.wrap(rsLoader.harvest());
    // System.out.println(actual.schema().toString());
    expected = fixture.rowSetBuilder(actual.schema()).addRow(40, objArray(intArray(410, 420), strArray("d4.1", "d4.2"), strArray())).addRow(50, objArray(intArray(510), strArray("d5.1"), strArray())).addRow(60, objArray(intArray(610, 620), strArray("d6.1", "d6.2"), strArray("e6.1", "e6.2"))).addRow(70, objArray(intArray(710), strArray(), strArray("e7.1", "e7.2"))).build();
    // expected.print();
    new RowSetComparison(expected).verifyAndClearAll(actual);
    rsLoader.close();
}
Also used : SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleWriter(org.apache.drill.exec.vector.accessor.TupleWriter) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 93 with SchemaBuilder

use of org.apache.drill.test.rowSet.schema.SchemaBuilder in project drill by axbaretto.

the class TestResultSetLoaderOmittedValues method testOmittedValuesAtEnd.

/**
 * Test "holes" in the middle of a batch, and unset columns at
 * the end. Ending the batch should fill in missing values.
 */
@Test
public void testOmittedValuesAtEnd() {
    // Create columns up front
    TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addNullable("c", MinorType.VARCHAR).add("d", MinorType.INT).addNullable("e", MinorType.INT).addArray("f", MinorType.VARCHAR).buildSchema();
    ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    rsLoader.startBatch();
    int rowCount = 0;
    ScalarWriter arrayWriter;
    for (int i = 0; i < 2; i++) {
        // Row 0, 1
        rootWriter.start();
        rowCount++;
        rootWriter.scalar(0).setInt(rowCount);
        rootWriter.scalar(1).setString("b_" + rowCount);
        rootWriter.scalar(2).setString("c_" + rowCount);
        rootWriter.scalar(3).setInt(rowCount * 10);
        rootWriter.scalar(4).setInt(rowCount * 100);
        arrayWriter = rootWriter.column(5).array().scalar();
        arrayWriter.setString("f_" + rowCount + "-1");
        arrayWriter.setString("f_" + rowCount + "-2");
        rootWriter.save();
    }
    for (int i = 0; i < 2; i++) {
        // Rows 2, 3
        rootWriter.start();
        rowCount++;
        rootWriter.scalar(0).setInt(rowCount);
        rootWriter.scalar(1).setString("b_" + rowCount);
        rootWriter.scalar(3).setInt(rowCount * 10);
        arrayWriter = rootWriter.column(5).array().scalar();
        arrayWriter.setString("f_" + rowCount + "-1");
        arrayWriter.setString("f_" + rowCount + "-2");
        rootWriter.save();
    }
    for (int i = 0; i < 2; i++) {
        // Rows 4, 5
        rootWriter.start();
        rowCount++;
        rootWriter.scalar(0).setInt(rowCount);
        rootWriter.scalar(2).setString("c_" + rowCount);
        rootWriter.scalar(4).setInt(rowCount * 100);
        rootWriter.save();
    }
    for (int i = 0; i < 2; i++) {
        // Rows 6, 7
        rootWriter.start();
        rowCount++;
        rootWriter.scalar(0).setInt(rowCount);
        rootWriter.scalar(1).setString("b_" + rowCount);
        rootWriter.scalar(2).setString("c_" + rowCount);
        rootWriter.scalar(3).setInt(rowCount * 10);
        rootWriter.scalar(4).setInt(rowCount * 100);
        arrayWriter = rootWriter.column(5).array().scalar();
        arrayWriter.setString("f_" + rowCount + "-1");
        arrayWriter.setString("f_" + rowCount + "-2");
        rootWriter.save();
    }
    for (int i = 0; i < 2; i++) {
        // Rows 8, 9
        rootWriter.start();
        rowCount++;
        rootWriter.scalar(0).setInt(rowCount);
        rootWriter.save();
    }
    // Harvest the row and verify.
    RowSet actual = fixture.wrap(rsLoader.harvest());
    // actual.print();
    BatchSchema expectedSchema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addNullable("c", MinorType.VARCHAR).add("3", MinorType.INT).addNullable("e", MinorType.INT).addArray("f", MinorType.VARCHAR).build();
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(1, "b_1", "c_1", 10, 100, strArray("f_1-1", "f_1-2")).addRow(2, "b_2", "c_2", 20, 200, strArray("f_2-1", "f_2-2")).addRow(3, "b_3", null, 30, null, strArray("f_3-1", "f_3-2")).addRow(4, "b_4", null, 40, null, strArray("f_4-1", "f_4-2")).addRow(5, "", "c_5", 0, 500, strArray()).addRow(6, "", "c_6", 0, 600, strArray()).addRow(7, "b_7", "c_7", 70, 700, strArray("f_7-1", "f_7-2")).addRow(8, "b_8", "c_8", 80, 800, strArray("f_8-1", "f_8-2")).addRow(9, "", null, 0, null, strArray()).addRow(10, "", null, 0, null, strArray()).build();
    new RowSetComparison(expected).verifyAndClearAll(actual);
    rsLoader.close();
}
Also used : SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) BatchSchema(org.apache.drill.exec.record.BatchSchema) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) ScalarWriter(org.apache.drill.exec.vector.accessor.ScalarWriter) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 94 with SchemaBuilder

use of org.apache.drill.test.rowSet.schema.SchemaBuilder in project drill by axbaretto.

the class TestResultSetLoaderOmittedValues method testOmittedValuesAtEndWithOverflow.

/**
 * Test "holes" at the end of a batch when batch overflows. Completed
 * batch must be finalized correctly, new batch initialized correct,
 * for the missing values.
 */
@Test
public void testOmittedValuesAtEndWithOverflow() {
    TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).add("b", MinorType.VARCHAR).addNullable("c", MinorType.VARCHAR).addNullable("d", MinorType.VARCHAR).buildSchema();
    ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    // Fill the batch. Column d has some values. Column c is worst case: no values.
    rsLoader.startBatch();
    byte[] value = new byte[533];
    Arrays.fill(value, (byte) 'X');
    int rowNumber = 0;
    while (!rootWriter.isFull()) {
        rootWriter.start();
        rowNumber++;
        rootWriter.scalar(0).setInt(rowNumber);
        rootWriter.scalar(1).setBytes(value, value.length);
        if (rowNumber < 10_000) {
            rootWriter.scalar(3).setString("d-" + rowNumber);
        }
        rootWriter.save();
        assertEquals(rowNumber, rsLoader.totalRowCount());
    }
    // Harvest and verify
    RowSet result = fixture.wrap(rsLoader.harvest());
    assertEquals(rowNumber - 1, result.rowCount());
    RowSetReader reader = result.reader();
    int rowIndex = 0;
    while (reader.next()) {
        int expectedRowNumber = 1 + rowIndex;
        assertEquals(expectedRowNumber, reader.scalar(0).getInt());
        assertTrue(reader.scalar(2).isNull());
        if (expectedRowNumber < 10_000) {
            assertEquals("d-" + expectedRowNumber, reader.scalar(3).getString());
        } else {
            assertTrue(reader.scalar(3).isNull());
        }
        rowIndex++;
    }
    // Start count for this batch is one less than current
    // count, because of the overflow row.
    int startRowNumber = rowNumber;
    // Write a few more rows to the next batch
    rsLoader.startBatch();
    for (int i = 0; i < 10; i++) {
        rootWriter.start();
        rowNumber++;
        rootWriter.scalar(0).setInt(rowNumber);
        rootWriter.scalar(1).setBytes(value, value.length);
        if (i > 5) {
            rootWriter.scalar(3).setString("d-" + rowNumber);
        }
        rootWriter.save();
        assertEquals(rowNumber, rsLoader.totalRowCount());
    }
    // Verify that holes were preserved.
    result = fixture.wrap(rsLoader.harvest());
    assertEquals(rowNumber, rsLoader.totalRowCount());
    assertEquals(rowNumber - startRowNumber + 1, result.rowCount());
    // result.print();
    reader = result.reader();
    rowIndex = 0;
    while (reader.next()) {
        int expectedRowNumber = startRowNumber + rowIndex;
        assertEquals(expectedRowNumber, reader.scalar(0).getInt());
        assertTrue(reader.scalar(2).isNull());
        if (rowIndex > 6) {
            assertEquals("d-" + expectedRowNumber, reader.scalar(3).getString());
        } else {
            assertTrue("Row " + rowIndex + " col d should be null", reader.scalar(3).isNull());
        }
        rowIndex++;
    }
    assertEquals(rowIndex, 11);
    rsLoader.close();
}
Also used : ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) RowSetReader(org.apache.drill.test.rowSet.RowSetReader) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 95 with SchemaBuilder

use of org.apache.drill.test.rowSet.schema.SchemaBuilder in project drill by axbaretto.

the class TestResultSetLoaderOmittedValues method testSkipRows.

/**
 * Test that omitting the call to saveRow() effectively discards
 * the row. Note that the vectors still contain values in the
 * discarded position; just the various pointers are unset. If
 * the batch ends before the discarded values are overwritten, the
 * discarded values just exist at the end of the vector. Since vectors
 * start with garbage contents, the discarded values are simply a different
 * kind of garbage. But, if the client writes a new row, then the new
 * row overwrites the discarded row. This works because we only change
 * the tail part of a vector; never the internals.
 */
@Test
public void testSkipRows() {
    TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).buildSchema();
    ResultSetLoaderImpl.ResultSetOptions options = new OptionBuilder().setRowCountLimit(ValueVector.MAX_ROW_COUNT).setSchema(schema).build();
    ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
    RowSetLoader rootWriter = rsLoader.writer();
    rsLoader.startBatch();
    int rowNumber = 0;
    for (int i = 0; i < 14; i++) {
        rootWriter.start();
        rowNumber++;
        rootWriter.scalar(0).setInt(rowNumber);
        if (i % 3 == 0) {
            rootWriter.scalar(1).setNull();
        } else {
            rootWriter.scalar(1).setString("b-" + rowNumber);
        }
        if (i % 2 == 0) {
            rootWriter.save();
        }
    }
    RowSet result = fixture.wrap(rsLoader.harvest());
    // result.print();
    SingleRowSet expected = fixture.rowSetBuilder(result.batchSchema()).addRow(1, null).addRow(3, "b-3").addRow(5, "b-5").addRow(7, null).addRow(9, "b-9").addRow(11, "b-11").addRow(13, null).build();
    // expected.print();
    new RowSetComparison(expected).verifyAndClearAll(result);
    rsLoader.close();
}
Also used : SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) ResultSetLoader(org.apache.drill.exec.physical.rowSet.ResultSetLoader) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) RowSetLoader(org.apache.drill.exec.physical.rowSet.RowSetLoader) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

SchemaBuilder (org.apache.drill.test.rowSet.schema.SchemaBuilder)175 Test (org.junit.Test)154 BatchSchema (org.apache.drill.exec.record.BatchSchema)102 SingleRowSet (org.apache.drill.test.rowSet.RowSet.SingleRowSet)91 SubOperatorTest (org.apache.drill.test.SubOperatorTest)86 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)65 RowSet (org.apache.drill.test.rowSet.RowSet)52 RowSetReader (org.apache.drill.test.rowSet.RowSetReader)50 ResultSetLoader (org.apache.drill.exec.physical.rowSet.ResultSetLoader)38 RowSetLoader (org.apache.drill.exec.physical.rowSet.RowSetLoader)38 ScalarReader (org.apache.drill.exec.vector.accessor.ScalarReader)26 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)25 DrillTest (org.apache.drill.test.DrillTest)21 ColumnMetadata (org.apache.drill.exec.record.metadata.ColumnMetadata)20 ScalarWriter (org.apache.drill.exec.vector.accessor.ScalarWriter)18 RowSetBuilder (org.apache.drill.test.rowSet.RowSetBuilder)18 ScalarElementReader (org.apache.drill.exec.vector.accessor.ScalarElementReader)17 AbstractColumnMetadata (org.apache.drill.exec.record.metadata.AbstractColumnMetadata)16 TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)16 RecordBatch (org.apache.drill.exec.record.RecordBatch)14