Search in sources :

Example 1 with NullableVector

use of org.apache.drill.exec.vector.NullableVector in project drill by apache.

the class TestScalarAccessors method nullableIntTester.

private void nullableIntTester(MinorType type) {
    TupleMetadata schema = new SchemaBuilder().addNullable("col", type).buildSchema();
    SingleRowSet rs = fixture.rowSetBuilder(schema).addRow(10).addSingleCol(null).addRow(30).build();
    assertEquals(3, rs.rowCount());
    // Verify vector state
    VectorContainer container = rs.container();
    assertEquals(1, container.getNumberOfColumns());
    ValueVector v = container.getValueVector(0).getValueVector();
    assertTrue(v instanceof NullableVector);
    NullableVector nv = (NullableVector) v;
    assertEquals(3, nv.getAccessor().getValueCount());
    assertEquals(3 * BasicTypeHelper.getSize(Types.required(type)), ((BaseDataValueVector) v).getBuffer().writerIndex());
    // Verify bits vector. (Assumes UInt1 implementation.)
    UInt1Vector bv = (UInt1Vector) nv.getBitsVector();
    assertEquals(3, bv.getAccessor().getValueCount());
    assertEquals(3, bv.getBuffer().writerIndex());
    RowSetReader reader = rs.reader();
    ScalarReader colReader = reader.scalar(0);
    assertTrue(reader.next());
    assertFalse(colReader.isNull());
    assertEquals(10, colReader.getInt());
    assertTrue(reader.next());
    assertTrue(colReader.isNull());
    assertNull(colReader.getObject());
    assertEquals("null", colReader.getAsString());
    // Data value is undefined, may be garbage
    assertTrue(reader.next());
    assertEquals(30, colReader.getInt());
    assertFalse(reader.next());
    rs.clear();
}
Also used : RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) BaseDataValueVector(org.apache.drill.exec.vector.BaseDataValueVector) ValueVector(org.apache.drill.exec.vector.ValueVector) ScalarReader(org.apache.drill.exec.vector.accessor.ScalarReader) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) NullableVector(org.apache.drill.exec.vector.NullableVector) BaseDataValueVector(org.apache.drill.exec.vector.BaseDataValueVector) UInt1Vector(org.apache.drill.exec.vector.UInt1Vector) VectorContainer(org.apache.drill.exec.record.VectorContainer)

Example 2 with NullableVector

use of org.apache.drill.exec.vector.NullableVector in project drill by axbaretto.

the class TestRecordBatchSizer method testSizerNullableVariableWidth.

@Test
public void testSizerNullableVariableWidth() {
    BatchSchema schema = new SchemaBuilder().addNullable("b", MinorType.VARCHAR).build();
    RowSetBuilder builder = fixture.rowSetBuilder(schema);
    StringBuilder stringBuilder = new StringBuilder();
    for (long i = 0; i < 10; i++) {
        stringBuilder.append("a");
        builder.addRow((Object) stringBuilder.toString());
    }
    RowSet rows = builder.build();
    // Run the record batch sizer on the resulting batch.
    RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
    assertEquals(1, sizer.columns().size());
    /**
     * stdDataSize:50, stdNetSize:50+4+1, dataSizePerEntry:ceil((10*11)/2)*10),
     * netSizePerEntry: dataSizePerEntry+4+1,
     * totalDataSize:(10*11)/2, totalNetSize: (10*11)/2 + (4*10) + (1*10),
     * valueCount:10,
     * elementCount:10, estElementCountPerArray:1, isVariableWidth:true
     */
    verifyColumnValues(sizer.columns().get("b"), 50, 55, 6, 11, 55, 105, 10, 10, 1, true);
    SingleRowSet empty = fixture.rowSet(schema);
    VectorAccessible accessible = empty.vectorAccessible();
    ValueVector bitVector, valueVector;
    VariableWidthVector vwVector;
    UInt4Vector offsetVector;
    for (VectorWrapper<?> vw : accessible) {
        ValueVector v = vw.getValueVector();
        RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
        // Allocates to nearest power of two
        colSize.allocateVector(v, testRowCount);
        bitVector = ((NullableVector) v).getBitsVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1), bitVector.getValueCapacity());
        vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector();
        offsetVector = vwVector.getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCount << 1) - 1, vwVector.getValueCapacity());
        // Allocates the same as value passed since it is already power of two.
        colSize.allocateVector(v, testRowCountPowerTwo - 1);
        bitVector = ((NullableVector) v).getBitsVector();
        assertEquals(Integer.highestOneBit(testRowCountPowerTwo), bitVector.getValueCapacity());
        vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector();
        offsetVector = vwVector.getOffsetVector();
        assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCountPowerTwo) - 1, vwVector.getValueCapacity());
        // Allocate for max rows.
        colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
        bitVector = ((NullableVector) v).getBitsVector();
        assertEquals((Integer.highestOneBit(ValueVector.MAX_ROW_COUNT)), bitVector.getValueCapacity());
        vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector();
        offsetVector = vwVector.getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MAX_ROW_COUNT - 1, vwVector.getValueCapacity());
        // Allocate for 0 rows. should atleast do allocation for 1 row.
        colSize.allocateVector(v, 0);
        bitVector = ((NullableVector) v).getBitsVector();
        assertEquals((Integer.highestOneBit(ValueVector.MIN_ROW_COUNT)), bitVector.getValueCapacity());
        vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector();
        offsetVector = vwVector.getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MIN_ROW_COUNT, vwVector.getValueCapacity());
    }
    empty.clear();
    rows.clear();
}
Also used : SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) ColumnSize(org.apache.drill.exec.record.RecordBatchSizer.ColumnSize) SingleRowSet(org.apache.drill.test.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.test.rowSet.RowSet) VariableWidthVector(org.apache.drill.exec.vector.VariableWidthVector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) ValueVector(org.apache.drill.exec.vector.ValueVector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) RowSetBuilder(org.apache.drill.test.rowSet.RowSetBuilder) SchemaBuilder(org.apache.drill.test.rowSet.schema.SchemaBuilder) NullableVector(org.apache.drill.exec.vector.NullableVector) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 3 with NullableVector

use of org.apache.drill.exec.vector.NullableVector in project drill by apache.

the class TestRecordBatchSizer method testSizerNullableVariableWidth.

@Test
public void testSizerNullableVariableWidth() {
    TupleMetadata schema = new SchemaBuilder().addNullable("b", MinorType.VARCHAR).buildSchema();
    RowSetBuilder builder = fixture.rowSetBuilder(schema);
    StringBuilder stringBuilder = new StringBuilder();
    for (long i = 0; i < 10; i++) {
        stringBuilder.append("a");
        builder.addRow((Object) stringBuilder.toString());
    }
    RowSet rows = builder.build();
    // Run the record batch sizer on the resulting batch.
    RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
    assertEquals(1, sizer.columns().size());
    /**
     * stdDataSize:50, stdNetSize:50+4+1, dataSizePerEntry:ceil((10*11)/2)*10),
     * netSizePerEntry: dataSizePerEntry+4+1,
     * totalDataSize:(10*11)/2, totalNetSize: (10*11)/2 + (4*10) + (1*10),
     * valueCount:10,
     * elementCount:10, cardinality:1, isVariableWidth:true
     */
    verifyColumnValues(sizer.columns().get("b"), 50, 55, 6, 11, 55, 105, 10, 10, 1, true);
    SingleRowSet empty = fixture.rowSet(schema);
    VectorAccessible accessible = empty.vectorAccessible();
    ValueVector bitVector, valueVector;
    VariableWidthVector vwVector;
    UInt4Vector offsetVector;
    for (VectorWrapper<?> vw : accessible) {
        ValueVector v = vw.getValueVector();
        RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
        // Allocates to nearest power of two
        colSize.allocateVector(v, testRowCount);
        bitVector = ((NullableVector) v).getBitsVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1), bitVector.getValueCapacity());
        vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector();
        offsetVector = vwVector.getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCount << 1) - 1, vwVector.getValueCapacity());
        // Allocates the same as value passed since it is already power of two.
        colSize.allocateVector(v, testRowCountPowerTwo - 1);
        bitVector = ((NullableVector) v).getBitsVector();
        assertEquals(Integer.highestOneBit(testRowCountPowerTwo), bitVector.getValueCapacity());
        vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector();
        offsetVector = vwVector.getOffsetVector();
        assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCountPowerTwo) - 1, vwVector.getValueCapacity());
        // Allocate for max rows.
        colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
        bitVector = ((NullableVector) v).getBitsVector();
        assertEquals((Integer.highestOneBit(ValueVector.MAX_ROW_COUNT)), bitVector.getValueCapacity());
        vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector();
        offsetVector = vwVector.getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MAX_ROW_COUNT - 1, vwVector.getValueCapacity());
        // Allocate for 0 rows. should atleast do allocation for 1 row.
        colSize.allocateVector(v, 0);
        bitVector = ((NullableVector) v).getBitsVector();
        assertEquals((Integer.highestOneBit(ValueVector.MIN_ROW_COUNT)), bitVector.getValueCapacity());
        vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector();
        offsetVector = vwVector.getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MIN_ROW_COUNT, vwVector.getValueCapacity());
    }
    empty.clear();
    rows.clear();
}
Also used : SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ColumnSize(org.apache.drill.exec.record.RecordBatchSizer.ColumnSize) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) VariableWidthVector(org.apache.drill.exec.vector.VariableWidthVector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) ValueVector(org.apache.drill.exec.vector.ValueVector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) RowSetBuilder(org.apache.drill.exec.physical.rowSet.RowSetBuilder) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) NullableVector(org.apache.drill.exec.vector.NullableVector) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 4 with NullableVector

use of org.apache.drill.exec.vector.NullableVector in project drill by apache.

the class TestRecordBatchSizer method testEmptyBatchNullableVariableWidth.

@Test
public void testEmptyBatchNullableVariableWidth() {
    TupleMetadata schema = new SchemaBuilder().addNullable("b", MinorType.VARCHAR).buildSchema();
    RowSetBuilder builder = fixture.rowSetBuilder(schema);
    RowSet rows = builder.build();
    // Run the record batch sizer on the resulting batch.
    RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
    assertEquals(1, sizer.columns().size());
    /**
     * stdDataSize:50, stdNetSize:50+4+1, dataSizePerEntry:0,
     * netSizePerEntry:0,
     * totalDataSize:0, totalNetSize:0,
     * valueCount:0,
     * elementCount:0, cardinality:0, isVariableWidth:true
     */
    verifyColumnValues(sizer.columns().get("b"), 50, 55, 0, 0, 0, 0, 0, 0, 0, true);
    // Verify memory allocation is done correctly based on std size for empty batch.
    SingleRowSet empty = fixture.rowSet(schema);
    VectorAccessible accessible = empty.vectorAccessible();
    ValueVector bitVector;
    VariableWidthVector vwVector;
    UInt4Vector offsetVector;
    for (VectorWrapper<?> vw : accessible) {
        ValueVector v = vw.getValueVector();
        RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
        // Allocates to nearest power of two
        colSize.allocateVector(v, testRowCount);
        bitVector = ((NullableVector) v).getBitsVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1), bitVector.getValueCapacity());
        vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector();
        offsetVector = vwVector.getOffsetVector();
        assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCount << 1) - 1, vwVector.getValueCapacity());
        // Allocates the same as value passed since it is already power of two.
        colSize.allocateVector(v, testRowCountPowerTwo - 1);
        bitVector = ((NullableVector) v).getBitsVector();
        assertEquals(Integer.highestOneBit(testRowCountPowerTwo), bitVector.getValueCapacity());
        vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector();
        offsetVector = vwVector.getOffsetVector();
        assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
        assertEquals(Integer.highestOneBit(testRowCountPowerTwo) - 1, vwVector.getValueCapacity());
        // Allocate for max rows.
        colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
        bitVector = ((NullableVector) v).getBitsVector();
        assertEquals((Integer.highestOneBit(ValueVector.MAX_ROW_COUNT)), bitVector.getValueCapacity());
        vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector();
        offsetVector = vwVector.getOffsetVector();
        assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MAX_ROW_COUNT - 1, vwVector.getValueCapacity());
        // Allocate for 0 rows. should atleast do allocation for 1 row.
        colSize.allocateVector(v, 0);
        bitVector = ((NullableVector) v).getBitsVector();
        assertEquals((Integer.highestOneBit(ValueVector.MIN_ROW_COUNT)), bitVector.getValueCapacity());
        vwVector = (VariableWidthVector) ((NullableVector) v).getValuesVector();
        offsetVector = vwVector.getOffsetVector();
        assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
        assertEquals(ValueVector.MIN_ROW_COUNT, vwVector.getValueCapacity());
    }
    empty.clear();
    rows.clear();
}
Also used : SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) ColumnSize(org.apache.drill.exec.record.RecordBatchSizer.ColumnSize) SingleRowSet(org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet) RowSet(org.apache.drill.exec.physical.rowSet.RowSet) VariableWidthVector(org.apache.drill.exec.vector.VariableWidthVector) UInt4Vector(org.apache.drill.exec.vector.UInt4Vector) ValueVector(org.apache.drill.exec.vector.ValueVector) RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) RowSetBuilder(org.apache.drill.exec.physical.rowSet.RowSetBuilder) TupleMetadata(org.apache.drill.exec.record.metadata.TupleMetadata) SchemaBuilder(org.apache.drill.exec.record.metadata.SchemaBuilder) NullableVector(org.apache.drill.exec.vector.NullableVector) SubOperatorTest(org.apache.drill.test.SubOperatorTest) Test(org.junit.Test)

Example 5 with NullableVector

use of org.apache.drill.exec.vector.NullableVector in project drill by apache.

the class ColumnBuilder method buildPrimitive.

/**
 * Build a primitive column. Check if the column is projected. If not,
 * allocate a dummy writer for the column. If projected, then allocate
 * a vector, a writer, and the column state which binds the two together
 * and manages the column.
 *
 * @param parent schema of the new primitive column
 * @param columnSchema implied projection type for the column
 * @return column state for the new column
 */
private ColumnState buildPrimitive(ContainerState parent, ColumnMetadata columnSchema) {
    final ValueVector vector;
    if (parent.projection().projection(columnSchema).isProjected || allowCreation(parent)) {
        // Create the vector for the column.
        vector = parent.vectorCache().vectorFor(columnSchema.schema());
        // from that requested. Update the schema to match.
        if (parent.vectorCache().isPermissive() && !vector.getField().isEquivalent(columnSchema.schema())) {
            columnSchema = ((PrimitiveColumnMetadata) columnSchema).mergeWith(vector.getField());
        }
    } else {
        // Column is not projected. No materialized backing for the column.
        vector = null;
    }
    // Create the writer.
    final AbstractObjectWriter colWriter = ColumnWriterFactory.buildColumnWriter(columnSchema, vector);
    // Build the vector state which manages the vector.
    VectorState vectorState;
    if (vector == null) {
        vectorState = new NullVectorState();
    } else if (columnSchema.isArray()) {
        vectorState = new RepeatedVectorState(colWriter.array(), (RepeatedValueVector) vector);
    } else if (columnSchema.isNullable()) {
        vectorState = new NullableVectorState(colWriter, (NullableVector) vector);
    } else {
        vectorState = SimpleVectorState.vectorState(columnSchema, colWriter.events(), vector);
    }
    // Create the column state which binds the vector and writer together.
    return new PrimitiveColumnState(parent.loader(), colWriter, vectorState);
}
Also used : RepeatedValueVector(org.apache.drill.exec.vector.complex.RepeatedValueVector) ValueVector(org.apache.drill.exec.vector.ValueVector) NullableVector(org.apache.drill.exec.vector.NullableVector) PrimitiveColumnState(org.apache.drill.exec.physical.resultSet.impl.ColumnState.PrimitiveColumnState) AbstractObjectWriter(org.apache.drill.exec.vector.accessor.writer.AbstractObjectWriter) MapVectorState(org.apache.drill.exec.physical.resultSet.impl.TupleState.MapVectorState) RepeatedListVectorState(org.apache.drill.exec.physical.resultSet.impl.RepeatedListState.RepeatedListVectorState) UnionVectorState(org.apache.drill.exec.physical.resultSet.impl.UnionState.UnionVectorState) OffsetVectorState(org.apache.drill.exec.physical.resultSet.impl.SingleVectorState.OffsetVectorState) ListVectorState(org.apache.drill.exec.physical.resultSet.impl.ListState.ListVectorState) SimpleVectorState(org.apache.drill.exec.physical.resultSet.impl.SingleVectorState.SimpleVectorState)

Aggregations

NullableVector (org.apache.drill.exec.vector.NullableVector)5 ValueVector (org.apache.drill.exec.vector.ValueVector)5 RepeatedValueVector (org.apache.drill.exec.vector.complex.RepeatedValueVector)5 SingleRowSet (org.apache.drill.exec.physical.rowSet.RowSet.SingleRowSet)3 ColumnSize (org.apache.drill.exec.record.RecordBatchSizer.ColumnSize)3 SchemaBuilder (org.apache.drill.exec.record.metadata.SchemaBuilder)3 TupleMetadata (org.apache.drill.exec.record.metadata.TupleMetadata)3 UInt4Vector (org.apache.drill.exec.vector.UInt4Vector)3 VariableWidthVector (org.apache.drill.exec.vector.VariableWidthVector)3 SubOperatorTest (org.apache.drill.test.SubOperatorTest)3 Test (org.junit.Test)3 RowSet (org.apache.drill.exec.physical.rowSet.RowSet)2 RowSetBuilder (org.apache.drill.exec.physical.rowSet.RowSetBuilder)2 PrimitiveColumnState (org.apache.drill.exec.physical.resultSet.impl.ColumnState.PrimitiveColumnState)1 ListVectorState (org.apache.drill.exec.physical.resultSet.impl.ListState.ListVectorState)1 RepeatedListVectorState (org.apache.drill.exec.physical.resultSet.impl.RepeatedListState.RepeatedListVectorState)1 OffsetVectorState (org.apache.drill.exec.physical.resultSet.impl.SingleVectorState.OffsetVectorState)1 SimpleVectorState (org.apache.drill.exec.physical.resultSet.impl.SingleVectorState.SimpleVectorState)1 MapVectorState (org.apache.drill.exec.physical.resultSet.impl.TupleState.MapVectorState)1 UnionVectorState (org.apache.drill.exec.physical.resultSet.impl.UnionState.UnionVectorState)1