Search in sources :

Example 66 with RowSet

use of org.apache.drill.test.rowSet.RowSet in project drill by axbaretto.

the class TestRecordBatchSizer method testSizerVariableWidth.

@Test
public void testSizerVariableWidth() {
    BatchSchema schema = new SchemaBuilder().add("a", MinorType.VARCHAR).build();
    RowSetBuilder builder = fixture.rowSetBuilder(schema);
    StringBuilder stringBuilder = new StringBuilder();
    // a, aa, aaa, ... aaaaaaaaaa. totalSize = (10*11)/2 = 55
    for (long i = 0; i < 10; i++) {
        stringBuilder.append("a");
        builder.addRow(stringBuilder.toString());
    }
    RowSet rows = builder.build();
    // Run the record batch sizer on the resulting batch.
    RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
    assertEquals(1, sizer.columns().size());
    ColumnSize aColumn = sizer.columns().get("a");
    /**
     * stdDataSize:50, stdNetSize:50+4, dataSizePerEntry:8, netSizePerEntry:8,
     * totalDataSize:(10*11)/2, totalNetSize:(10*11)/2 + 4*10, valueCount:10,
     * elementCount:10, estElementCountPerArray:1, isVariableWidth:true
     */
    verifyColumnValues(aColumn, 50, 54, 6, 10, 55, 95, 10, 10, 1, true);
    SingleRowSet empty = fixture.rowSet(schema);
    VectorAccessible accessible = empty.vectorAccessible();
    UInt4Vector offsetVector;
    for (VectorWrapper<?> vw : accessible) {
        ValueVector v = vw.getValueVector();
        RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
        // Allocates to nearest power of two
        colSize.allocateVector(v, testRowCount);
        offsetVector = ((VariableWidthVector) v).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCount << 1) - 1, v.getValueCapacity());
        v.clear();
        // Allocates the same as value passed since it is already power of two.
        // -1 is done for adjustment needed for offset vector.
        colSize.allocateVector(v, testRowCountPowerTwo - 1);
        offsetVector = ((VariableWidthVector) v).getOffsetVector();
        assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
        assertEquals(testRowCountPowerTwo - 1, v.getValueCapacity());
        v.clear();
        // Allocate for max rows.
        colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
        offsetVector = ((VariableWidthVector) v).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MAX_ROW_COUNT - 1, v.getValueCapacity());
        v.clear();
        // Allocate for 0 rows. should atleast do allocation for 1 row.
        colSize.allocateVector(v, 0);
        offsetVector = ((VariableWidthVector) v).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MIN_ROW_COUNT, v.getValueCapacity());
        v.clear();
    }
    empty.clear();
    rows.clear();
}
Also used : SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) ColumnSize(org.apache.drill.exec.record.RecordBatchSizer.ColumnSize) ColumnSize(org.apache.drill.exec.record.RecordBatchSizer.ColumnSize) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) ValueVector(org.apache.drill.exec.vector.ValueVector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) RowSetBuilder(org.apache.drill.test.rowSet.RowSetBuilder) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 67 with RowSet

use of org.apache.drill.test.rowSet.RowSet in project drill by axbaretto.

the class TestRecordBatchSizer method testSizerRepeatedMap.

@Test
public void testSizerRepeatedMap() {
    BatchSchema schema = new SchemaBuilder().addMapArray("map").add("key", MinorType.INT).add("value", MinorType.VARCHAR).resumeSchema().build();
    RowSetBuilder builder = fixture.rowSetBuilder(schema);
    for (int i = 0; i < 10; i++) {
        builder.addRow((Object) new Object[] { new Object[] { 110, "a" }, new Object[] { 120, "b" } });
    }
    RowSet rows = builder.build();
    // Run the record batch sizer on the resulting batch.
    RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
    assertEquals(1, sizer.columns().size());
    /**
     * stdDataSize:50+4, stdNetSize:50+4+4+4, dataSizePerEntry:(4+1)*2,
     * netSizePerEntry: 4*2+1*2+4*2+4,
     * totalDataSize:5*2*10, totalNetSize:netSizePerEntry*2,
     * valueCount:10,
     * elementCount:20, estElementCountPerArray:2, isVariableWidth:true
     */
    verifyColumnValues(sizer.columns().get("map"), 54, 62, 10, 22, 100, 220, 10, 20, 2, false);
    SingleRowSet empty = fixture.rowSet(schema);
    VectorAccessible accessible = empty.vectorAccessible();
    UInt4Vector offsetVector;
    for (VectorWrapper<?> vw : accessible) {
        ValueVector v = vw.getValueVector();
        RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
        // Allocates to nearest power of two
        colSize.allocateVector(v, testRowCount);
        RepeatedMapVector mapVector = (RepeatedMapVector) v;
        offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
        ValueVector keyVector = mapVector.getChild("key");
        ValueVector valueVector1 = mapVector.getChild("value");
        assertEquals(((Integer.highestOneBit(testRowCount) << 1) * 2), keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1) * 2, offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCount << 1) * 2 - 1, valueVector1.getValueCapacity());
        // Allocates the same as value passed since it is already power of two.
        colSize.allocateVector(v, testRowCountPowerTwo - 1);
        mapVector = (RepeatedMapVector) v;
        offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
        assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals(testRowCountPowerTwo * 2, keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity());
        assertEquals(testRowCountPowerTwo * 2 - 1, valueVector1.getValueCapacity());
        // Allocate for max rows.
        colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
        mapVector = (RepeatedMapVector) v;
        offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals(ValueVector.MAX_ROW_COUNT * 2, keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT * 2, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MAX_ROW_COUNT * 2 - 1, valueVector1.getValueCapacity());
        // Allocate for 0 rows. should atleast do allocation for 1 row.
        colSize.allocateVector(v, 0);
        mapVector = (RepeatedMapVector) v;
        offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity());
        v.clear();
    }
    empty.clear();
    rows.clear();
}
Also used : SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) ColumnSize(org.apache.drill.exec.record.RecordBatchSizer.ColumnSize) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) ValueVector(org.apache.drill.exec.vector.ValueVector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) RowSetBuilder(org.apache.drill.test.rowSet.RowSetBuilder) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 68 with RowSet

use of org.apache.drill.test.rowSet.RowSet in project drill by axbaretto.

the class TestVectorContainer method testContainerMerge.

/**
 * Test of the ability to merge two schemas and to merge
 * two vector containers. The merge is "horizontal", like
 * a row-by-row join. Since each container is a list of
 * vectors, we just combine the two lists to create the
 * merged result.
 */
@Test
public void testContainerMerge() {
    // Simulated data from a reader
    BatchSchema leftSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).build();
    SingleRowSet left = fixture.rowSetBuilder(leftSchema).addRow(10, "fred").addRow(20, "barney").addRow(30, "wilma").build();
    // Simulated "implicit" coumns: row number and file name
    BatchSchema rightSchema = new SchemaBuilder().add("x", MinorType.SMALLINT).add("y", MinorType.VARCHAR).build();
    SingleRowSet right = fixture.rowSetBuilder(rightSchema).addRow(1, "foo.txt").addRow(2, "bar.txt").addRow(3, "dino.txt").build();
    // The merge batch we expect to see
    BatchSchema expectedSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).add("x", MinorType.SMALLINT).add("y", MinorType.VARCHAR).build();
    SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(10, "fred", 1, "foo.txt").addRow(20, "barney", 2, "bar.txt").addRow(30, "wilma", 3, "dino.txt").build();
    // Merge containers without selection vector
    RowSet merged = fixture.wrap(left.container().merge(right.container()));
    RowSetComparison comparison = new RowSetComparison(expected);
    comparison.verify(merged);
    // Merge containers via row set facade
    RowSet mergedRs = DirectRowSet.fromContainer(left.container().merge(right.container()));
    comparison.verifyAndClearAll(mergedRs);
    // Add a selection vector. Merging is forbidden, in the present code,
    // for batches that have a selection vector.
    SingleRowSet leftIndirect = left.toIndirect();
    try {
        leftIndirect.container().merge(right.container());
        fail();
    } catch (IllegalArgumentException e) {
    // Expected
    }
    leftIndirect.clear();
    right.clear();
}
Also used : SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) DirectRowSet(org.apache.drill.test.rowSet.DirectRowSet) Test(org.junit.Test) VectorTest(org.apache.drill.categories.VectorTest) DrillTest(org.apache.drill.test.DrillTest)

Example 69 with RowSet

use of org.apache.drill.test.rowSet.RowSet in project drill by axbaretto.

the class TestCsv method testInvalidCsvHeaders.

@Test
public void testInvalidCsvHeaders() throws IOException {
    String fileName = "case3.csv";
    buildFile(fileName, invalidHeaders);
    RowSet actual = client.queryBuilder().sql(makeStatement(fileName)).rowSet();
    BatchSchema expectedSchema = new SchemaBuilder().add("column_1", MinorType.VARCHAR).add("column_2", MinorType.VARCHAR).add("col_9b", MinorType.VARCHAR).add("c", MinorType.VARCHAR).add("c_2", MinorType.VARCHAR).add("c_2_2", MinorType.VARCHAR).build();
    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema).addRow("10", "foo", "bar", "fourth", "fifth", "sixth").build();
    new RowSetComparison(expected).verifyAndClearAll(actual);
}
Also used : RowSetBuilder(org.apache.drill.test.rowSet.RowSetBuilder) RowSetComparison(org.apache.drill.test.rowSet.RowSetComparison) BatchSchema(org.apache.drill.exec.record.BatchSchema) RowSet(org.apache.drill.test.rowSet.RowSet) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) ClusterTest(org.apache.drill.test.ClusterTest) Test(org.junit.Test)

Aggregations

RowSet (org.apache.drill.test.rowSet.RowSet)69 Test (org.junit.Test)54 SchemaBuilder (org.apache.drill.test.rowSet.schema.SchemaBuilder)52 SubOperatorTest (org.apache.drill.test.SubOperatorTest)44 SingleRowSet (org.apache.drill.test.rowSet.RowSet.SingleRowSet)40 RowSetLoader (org.apache.drill.exec.physical.rowSet.RowSetLoader)35 ResultSetLoader (org.apache.drill.exec.physical.rowSet.ResultSetLoader)34 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)33 RowSetComparison (org.apache.drill.test.rowSet.RowSetComparison)26 RowSetBuilder (org.apache.drill.test.rowSet.RowSetBuilder)23 BatchSchema (org.apache.drill.exec.record.BatchSchema)18 RowSetReader (org.apache.drill.test.rowSet.RowSetReader)16 TupleWriter (org.apache.drill.exec.vector.accessor.TupleWriter)13 ColumnSize (org.apache.drill.exec.record.RecordBatchSizer.ColumnSize)11 ScalarWriter (org.apache.drill.exec.vector.accessor.ScalarWriter)11 ValueVector (org.apache.drill.exec.vector.ValueVector)10 ResultSetOptions (org.apache.drill.exec.physical.rowSet.impl.ResultSetLoaderImpl.ResultSetOptions)9 RepeatedValueVector (org.apache.drill.exec.vector.complex.RepeatedValueVector)9 DirectRowSet (org.apache.drill.test.rowSet.DirectRowSet)8 UInt4Vector (org.apache.drill.exec.vector.UInt4Vector)7