Search in sources :

Example 16 with RepeatedValueVector

use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by axbaretto.

the class TestRecordBatchSizer method testSizerRepeatedVariableWidth.

@Test
public void testSizerRepeatedVariableWidth() {
    BatchSchema schema = new SchemaBuilder().addArray("b", MinorType.VARCHAR).build();
    RowSetBuilder builder = fixture.rowSetBuilder(schema);
    // size = (5*6)/2 = 15
    String[] newString = new String[] { "a", "aa", "aaa", "aaaa", "aaaaa" };
    for (long i = 0; i < 10; i++) {
        builder.addRow((Object) (newString));
    }
    RowSet rows = builder.build();
    // Run the record batch sizer on the resulting batch.
    RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
    assertEquals(1, sizer.columns().size());
    ColumnSize bColumn = sizer.columns().get("b");
    /**
     * stdDataSize:50*10, stdNetSize:50*10+4*10+4, dataSizePerEntry:(5*6)/2, netSizePerEntry:(5*6)/2+5*4+4,
     * totalDataSize:(5*6)/2 * 10, totalNetSize: ((5*6)/2+5*4+4)*10, valueCount:10,
     * elementCount:50, estElementCountPerArray:5, isVariableWidth:true
     */
    verifyColumnValues(bColumn, 500, 544, 15, 39, 150, 390, 10, 50, 5, true);
    SingleRowSet empty = fixture.rowSet(schema);
    VectorAccessible accessible = empty.vectorAccessible();
    for (VectorWrapper<?> vw : accessible) {
        ValueVector v = vw.getValueVector();
        RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
        // Allocates to nearest power of two
        colSize.allocateVector(v, testRowCount - 1);
        UInt4Vector offsetVector = ((RepeatedValueVector) v).getOffsetVector();
        assertEquals(Integer.highestOneBit(testRowCount) << 1, offsetVector.getValueCapacity());
        VariableWidthVector vwVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector());
        offsetVector = vwVector.getOffsetVector();
        assertEquals((Integer.highestOneBit((testRowCount - 1) * 5) << 1), offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit((testRowCount - 1) * 5 << 1) - 1, vwVector.getValueCapacity());
        v.clear();
        // Allocates the same as value passed since it is already power of two.
        colSize.allocateVector(v, testRowCountPowerTwo);
        offsetVector = ((RepeatedValueVector) v).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCountPowerTwo) << 1), offsetVector.getValueCapacity());
        vwVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector());
        offsetVector = vwVector.getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCountPowerTwo * 5) << 1), offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCountPowerTwo * 5 << 1) - 1, vwVector.getValueCapacity());
        v.clear();
        // Allocate for max rows.
        colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT);
        offsetVector = ((RepeatedValueVector) v).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT << 1, offsetVector.getValueCapacity());
        vwVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector());
        offsetVector = vwVector.getOffsetVector();
        assertEquals((Integer.highestOneBit(ValueVector.MAX_ROW_COUNT * 5) << 1), offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(ValueVector.MAX_ROW_COUNT * 5 << 1) - 1, vwVector.getValueCapacity());
        v.clear();
        // Allocate for 0 rows. should atleast do allocation for 1 row.
        colSize.allocateVector(v, 0);
        offsetVector = ((RepeatedValueVector) v).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
        vwVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector());
        offsetVector = vwVector.getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MIN_ROW_COUNT, vwVector.getValueCapacity());
        v.clear();
    }
    empty.clear();
    rows.clear();
}
Also used : SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) ColumnSize(org.apache.drill.exec.record.RecordBatchSizer.ColumnSize) ColumnSize(org.apache.drill.exec.record.RecordBatchSizer.ColumnSize) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) VariableWidthVector(org.apache.drill.exec.vector.VariableWidthVector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) ValueVector(org.apache.drill.exec.vector.ValueVector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) RowSetBuilder(org.apache.drill.test.rowSet.RowSetBuilder) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 17 with RepeatedValueVector

use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by axbaretto.

the class TestRecordBatchSizer method testSizerRepeatedMap.

@Test
public void testSizerRepeatedMap() {
    BatchSchema schema = new SchemaBuilder().addMapArray("map").add("key", MinorType.INT).add("value", MinorType.VARCHAR).resumeSchema().build();
    RowSetBuilder builder = fixture.rowSetBuilder(schema);
    for (int i = 0; i < 10; i++) {
        builder.addRow((Object) new Object[] { new Object[] { 110, "a" }, new Object[] { 120, "b" } });
    }
    RowSet rows = builder.build();
    // Run the record batch sizer on the resulting batch.
    RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
    assertEquals(1, sizer.columns().size());
    /**
     * stdDataSize:50+4, stdNetSize:50+4+4+4, dataSizePerEntry:(4+1)*2,
     * netSizePerEntry: 4*2+1*2+4*2+4,
     * totalDataSize:5*2*10, totalNetSize:netSizePerEntry*2,
     * valueCount:10,
     * elementCount:20, estElementCountPerArray:2, isVariableWidth:true
     */
    verifyColumnValues(sizer.columns().get("map"), 54, 62, 10, 22, 100, 220, 10, 20, 2, false);
    SingleRowSet empty = fixture.rowSet(schema);
    VectorAccessible accessible = empty.vectorAccessible();
    UInt4Vector offsetVector;
    for (VectorWrapper<?> vw : accessible) {
        ValueVector v = vw.getValueVector();
        RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
        // Allocates to nearest power of two
        colSize.allocateVector(v, testRowCount);
        RepeatedMapVector mapVector = (RepeatedMapVector) v;
        offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
        ValueVector keyVector = mapVector.getChild("key");
        ValueVector valueVector1 = mapVector.getChild("value");
        assertEquals(((Integer.highestOneBit(testRowCount) << 1) * 2), keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1) * 2, offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCount << 1) * 2 - 1, valueVector1.getValueCapacity());
        // Allocates the same as value passed since it is already power of two.
        colSize.allocateVector(v, testRowCountPowerTwo - 1);
        mapVector = (RepeatedMapVector) v;
        offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
        assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals(testRowCountPowerTwo * 2, keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity());
        assertEquals(testRowCountPowerTwo * 2 - 1, valueVector1.getValueCapacity());
        // Allocate for max rows.
        colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
        mapVector = (RepeatedMapVector) v;
        offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals(ValueVector.MAX_ROW_COUNT * 2, keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT * 2, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MAX_ROW_COUNT * 2 - 1, valueVector1.getValueCapacity());
        // Allocate for 0 rows. should atleast do allocation for 1 row.
        colSize.allocateVector(v, 0);
        mapVector = (RepeatedMapVector) v;
        offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity());
        v.clear();
    }
    empty.clear();
    rows.clear();
}
Also used : SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) ColumnSize(org.apache.drill.exec.record.RecordBatchSizer.ColumnSize) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) ValueVector(org.apache.drill.exec.vector.ValueVector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) RowSetBuilder(org.apache.drill.test.rowSet.RowSetBuilder) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 18 with RepeatedValueVector

use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.

the class ParquetColumnMetadata method makeRepeatedFixedWidthReader.

FixedWidthRepeatedReader makeRepeatedFixedWidthReader(ParquetRecordReader reader) throws Exception {
    final RepeatedValueVector repeatedVector = RepeatedValueVector.class.cast(vector);
    ColumnReader<?> dataReader = ColumnReaderFactory.createFixedColumnReader(reader, true, column, columnChunkMetaData, repeatedVector.getDataVector(), se);
    return new FixedWidthRepeatedReader(reader, dataReader, getTypeLengthInBits(column.getType()), column, columnChunkMetaData, false, repeatedVector, se);
}
Also used : RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector)

Example 19 with RepeatedValueVector

use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.

the class TestScalarAccessors method intArrayTester.

private void intArrayTester(MinorType type) {
    TupleMetadata schema = new SchemaBuilder().addArray("col", type).buildSchema();
    SingleRowSet rs = fixture.rowSetBuilder(schema).addSingleCol(new int[] {}).addSingleCol(new int[] { 0, 20, 30 }).build();
    assertEquals(2, rs.rowCount());
    // Verify vector state
    VectorContainer container = rs.container();
    assertEquals(1, container.getNumberOfColumns());
    ValueVector v = container.getValueVector(0).getValueVector();
    assertTrue(v instanceof RepeatedValueVector);
    RepeatedValueVector rv = (RepeatedValueVector) v;
    assertEquals(2, rv.getAccessor().getValueCount());
    // Data vector: 3 values written above.
    ValueVector vv = rv.getDataVector();
    assertEquals(3, vv.getAccessor().getValueCount());
    assertEquals(3 * BasicTypeHelper.getSize(Types.required(type)), ((BaseDataValueVector) vv).getBuffer().writerIndex());
    // Offsets vector: one more than row count
    UInt4Vector ov = rv.getOffsetVector();
    assertEquals(3, ov.getAccessor().getValueCount());
    assertEquals(3 * 4, ov.getBuffer().writerIndex());
    RowSetReader reader = rs.reader();
    ArrayReader arrayReader = reader.array(0);
    ScalarReader colReader = arrayReader.scalar();
    assertEquals(ValueType.INTEGER, colReader.valueType());
    assertTrue(reader.next());
    assertEquals(0, arrayReader.size());
    assertTrue(reader.next());
    assertEquals(3, arrayReader.size());
    assertTrue(arrayReader.next());
    assertFalse(colReader.isNull());
    assertEquals(0, colReader.getInt());
    assertEquals(0, colReader.getObject());
    assertEquals("0", colReader.getAsString());
    assertTrue(arrayReader.next());
    assertFalse(colReader.isNull());
    assertEquals(20, colReader.getInt());
    assertEquals(20, colReader.getObject());
    assertEquals("20", colReader.getAsString());
    assertTrue(arrayReader.next());
    assertFalse(colReader.isNull());
    assertEquals(30, colReader.getInt());
    assertEquals(30, colReader.getObject());
    assertEquals("30", colReader.getAsString());
    assertFalse(arrayReader.next());
    assertEquals("[0, 20, 30]", arrayReader.getAsString());
    assertEquals(Arrays.asList(0, 20, 30), arrayReader.getObject());
    assertFalse(reader.next());
    rs.clear();
}
Also used : RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) BaseDataValueVector(org.apache.drill.exec.vector.BaseDataValueVector) ValueVector(org.apache.drill.exec.vector.ValueVector) ScalarReader(org.apache.drill.exec.vector.accessor.ScalarReader) ArrayReader(org.apache.drill.exec.vector.accessor.ArrayReader) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) BaseDataValueVector(org.apache.drill.exec.vector.BaseDataValueVector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 20 with RepeatedValueVector

use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.

the class TestRecordBatchSizer method testSizerRepeatedMap.

@Test
public void testSizerRepeatedMap() {
    TupleMetadata schema = new SchemaBuilder().addMapArray("map").add("key", MinorType.INT).add("value", MinorType.VARCHAR).resumeSchema().buildSchema();
    RowSetBuilder builder = fixture.rowSetBuilder(schema);
    for (int i = 0; i < 10; i++) {
        builder.addRow((Object) new Object[] { new Object[] { 110, "a" }, new Object[] { 120, "b" } });
    }
    RowSet rows = builder.build();
    // Run the record batch sizer on the resulting batch.
    RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
    assertEquals(1, sizer.columns().size());
    /**
     * stdDataSize:50+4, stdNetSize:50+4+4+4, dataSizePerEntry:(4+1)*2,
     * netSizePerEntry: 4*2+1*2+4*2+4,
     * totalDataSize:5*2*10, totalNetSize:netSizePerEntry*2,
     * valueCount:10,
     * elementCount:20, cardinality:2, isVariableWidth:true
     */
    verifyColumnValues(sizer.columns().get("map"), 54, 62, 10, 22, 100, 220, 10, 20, 2, false);
    SingleRowSet empty = fixture.rowSet(schema);
    VectorAccessible accessible = empty.vectorAccessible();
    UInt4Vector offsetVector;
    for (VectorWrapper<?> vw : accessible) {
        ValueVector v = vw.getValueVector();
        RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
        // Allocates to nearest power of two
        colSize.allocateVector(v, testRowCount);
        RepeatedMapVector mapVector = (RepeatedMapVector) v;
        offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
        ValueVector keyVector = mapVector.getChild("key");
        ValueVector valueVector1 = mapVector.getChild("value");
        assertEquals(((Integer.highestOneBit(testRowCount) << 1) * 2), keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1) * 2, offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCount << 1) * 2 - 1, valueVector1.getValueCapacity());
        // Allocates the same as value passed since it is already power of two.
        colSize.allocateVector(v, testRowCountPowerTwo - 1);
        mapVector = (RepeatedMapVector) v;
        offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
        assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals(testRowCountPowerTwo * 2, keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity());
        assertEquals(testRowCountPowerTwo * 2 - 1, valueVector1.getValueCapacity());
        // Allocate for max rows.
        colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
        mapVector = (RepeatedMapVector) v;
        offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals(ValueVector.MAX_ROW_COUNT * 2, keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT * 2, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MAX_ROW_COUNT * 2 - 1, valueVector1.getValueCapacity());
        // Allocate for 0 rows. should atleast do allocation for 1 row.
        colSize.allocateVector(v, 0);
        mapVector = (RepeatedMapVector) v;
        offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
        keyVector = mapVector.getChild("key");
        valueVector1 = mapVector.getChild("value");
        assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity());
        offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity());
        v.clear();
    }
    empty.clear();
    rows.clear();
}
Also used : SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ColumnSize(org.apache.drill.exec.record.RecordBatchSizer.ColumnSize) RepeatedMapVector(org.apache.drill.exec.vector.complex.RepeatedMapVector) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) ValueVector(org.apache.drill.exec.vector.ValueVector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) RowSetBuilder(org.apache.drill.exec.physical.rowSet.RowSetBuilder) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Aggregations

RepeatedValueVector (org.apache.drill.exec.vector.complex.RepeatedValueVector)26 ValueVector (org.apache.drill.exec.vector.ValueVector)22 UInt4Vector (org.apache.drill.exec.vector.UInt4Vector)14 Test (org.junit.Test)13 ColumnSize (org.apache.drill.exec.record.RecordBatchSizer.ColumnSize)9 RepeatedMapVector (org.apache.drill.exec.vector.complex.RepeatedMapVector)9 SubOperatorTest (org.apache.drill.test.SubOperatorTest)9 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)7 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)7 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)7 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)6 RowSetBuilder (org.apache.drill.exec.physical.rowSet.RowSetBuilder)6 ScanBatch (org.apache.drill.exec.physical.impl.ScanBatch)4 RecordBatch (org.apache.drill.exec.record.RecordBatch)4 RecordBatchSizer (org.apache.drill.exec.record.RecordBatchSizer)4 TypedFieldId (org.apache.drill.exec.record.TypedFieldId)4 VectorAccessible (org.apache.drill.exec.record.VectorAccessible)4 RepeatedListVector (org.apache.drill.exec.vector.complex.RepeatedListVector)4 MaterializedField (org.apache.drill.exec.record.MaterializedField)3 TransferPair (org.apache.drill.exec.record.TransferPair)3