use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by axbaretto.
the class TestRecordBatchSizer method testSizerRepeatedVariableWidth.
@Test
public void testSizerRepeatedVariableWidth() {
BatchSchema schema = new SchemaBuilder().addArray("b", MinorType.VARCHAR).build();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
// size = (5*6)/2 = 15
String[] newString = new String[] { "a", "aa", "aaa", "aaaa", "aaaaa" };
for (long i = 0; i < 10; i++) {
builder.addRow((Object) (newString));
}
RowSet rows = builder.build();
// Run the record batch sizer on the resulting batch.
RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
assertEquals(1, sizer.columns().size());
ColumnSize bColumn = sizer.columns().get("b");
/**
* stdDataSize:50*10, stdNetSize:50*10+4*10+4, dataSizePerEntry:(5*6)/2, netSizePerEntry:(5*6)/2+5*4+4,
* totalDataSize:(5*6)/2 * 10, totalNetSize: ((5*6)/2+5*4+4)*10, valueCount:10,
* elementCount:50, estElementCountPerArray:5, isVariableWidth:true
*/
verifyColumnValues(bColumn, 500, 544, 15, 39, 150, 390, 10, 50, 5, true);
SingleRowSet empty = fixture.rowSet(schema);
VectorAccessible accessible = empty.vectorAccessible();
for (VectorWrapper<?> vw : accessible) {
ValueVector v = vw.getValueVector();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount - 1);
UInt4Vector offsetVector = ((RepeatedValueVector) v).getOffsetVector();
assertEquals(Integer.highestOneBit(testRowCount) << 1, offsetVector.getValueCapacity());
VariableWidthVector vwVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector());
offsetVector = vwVector.getOffsetVector();
assertEquals((Integer.highestOneBit((testRowCount - 1) * 5) << 1), offsetVector.getValueCapacity());
assertEquals(Integer.highestOneBit((testRowCount - 1) * 5 << 1) - 1, vwVector.getValueCapacity());
v.clear();
// Allocates the same as value passed since it is already power of two.
colSize.allocateVector(v, testRowCountPowerTwo);
offsetVector = ((RepeatedValueVector) v).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCountPowerTwo) << 1), offsetVector.getValueCapacity());
vwVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector());
offsetVector = vwVector.getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCountPowerTwo * 5) << 1), offsetVector.getValueCapacity());
assertEquals(Integer.highestOneBit(testRowCountPowerTwo * 5 << 1) - 1, vwVector.getValueCapacity());
v.clear();
// Allocate for max rows.
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT);
offsetVector = ((RepeatedValueVector) v).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT << 1, offsetVector.getValueCapacity());
vwVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector());
offsetVector = vwVector.getOffsetVector();
assertEquals((Integer.highestOneBit(ValueVector.MAX_ROW_COUNT * 5) << 1), offsetVector.getValueCapacity());
assertEquals(Integer.highestOneBit(ValueVector.MAX_ROW_COUNT * 5 << 1) - 1, vwVector.getValueCapacity());
v.clear();
// Allocate for 0 rows. should atleast do allocation for 1 row.
colSize.allocateVector(v, 0);
offsetVector = ((RepeatedValueVector) v).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
vwVector = ((VariableWidthVector) ((RepeatedValueVector) v).getDataVector());
offsetVector = vwVector.getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
assertEquals(ValueVector.MIN_ROW_COUNT, vwVector.getValueCapacity());
v.clear();
}
empty.clear();
rows.clear();
}
use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by axbaretto.
the class TestRecordBatchSizer method testSizerRepeatedMap.
@Test
public void testSizerRepeatedMap() {
BatchSchema schema = new SchemaBuilder().addMapArray("map").add("key", MinorType.INT).add("value", MinorType.VARCHAR).resumeSchema().build();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
for (int i = 0; i < 10; i++) {
builder.addRow((Object) new Object[] { new Object[] { 110, "a" }, new Object[] { 120, "b" } });
}
RowSet rows = builder.build();
// Run the record batch sizer on the resulting batch.
RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
assertEquals(1, sizer.columns().size());
/**
* stdDataSize:50+4, stdNetSize:50+4+4+4, dataSizePerEntry:(4+1)*2,
* netSizePerEntry: 4*2+1*2+4*2+4,
* totalDataSize:5*2*10, totalNetSize:netSizePerEntry*2,
* valueCount:10,
* elementCount:20, estElementCountPerArray:2, isVariableWidth:true
*/
verifyColumnValues(sizer.columns().get("map"), 54, 62, 10, 22, 100, 220, 10, 20, 2, false);
SingleRowSet empty = fixture.rowSet(schema);
VectorAccessible accessible = empty.vectorAccessible();
UInt4Vector offsetVector;
for (VectorWrapper<?> vw : accessible) {
ValueVector v = vw.getValueVector();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
RepeatedMapVector mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
ValueVector keyVector = mapVector.getChild("key");
ValueVector valueVector1 = mapVector.getChild("value");
assertEquals(((Integer.highestOneBit(testRowCount) << 1) * 2), keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1) * 2, offsetVector.getValueCapacity());
assertEquals(Integer.highestOneBit(testRowCount << 1) * 2 - 1, valueVector1.getValueCapacity());
// Allocates the same as value passed since it is already power of two.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(testRowCountPowerTwo * 2, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity());
assertEquals(testRowCountPowerTwo * 2 - 1, valueVector1.getValueCapacity());
// Allocate for max rows.
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(ValueVector.MAX_ROW_COUNT * 2, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 2, offsetVector.getValueCapacity());
assertEquals(ValueVector.MAX_ROW_COUNT * 2 - 1, valueVector1.getValueCapacity());
// Allocate for 0 rows. should atleast do allocation for 1 row.
colSize.allocateVector(v, 0);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity());
v.clear();
}
empty.clear();
rows.clear();
}
use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.
the class ParquetColumnMetadata method makeRepeatedFixedWidthReader.
FixedWidthRepeatedReader makeRepeatedFixedWidthReader(ParquetRecordReader reader) throws Exception {
final RepeatedValueVector repeatedVector = RepeatedValueVector.class.cast(vector);
ColumnReader<?> dataReader = ColumnReaderFactory.createFixedColumnReader(reader, true, column, columnChunkMetaData, repeatedVector.getDataVector(), se);
return new FixedWidthRepeatedReader(reader, dataReader, getTypeLengthInBits(column.getType()), column, columnChunkMetaData, false, repeatedVector, se);
}
use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.
the class TestScalarAccessors method intArrayTester.
private void intArrayTester(MinorType type) {
TupleMetadata schema = new SchemaBuilder().addArray("col", type).buildSchema();
SingleRowSet rs = fixture.rowSetBuilder(schema).addSingleCol(new int[] {}).addSingleCol(new int[] { 0, 20, 30 }).build();
assertEquals(2, rs.rowCount());
// Verify vector state
VectorContainer container = rs.container();
assertEquals(1, container.getNumberOfColumns());
ValueVector v = container.getValueVector(0).getValueVector();
assertTrue(v instanceof RepeatedValueVector);
RepeatedValueVector rv = (RepeatedValueVector) v;
assertEquals(2, rv.getAccessor().getValueCount());
// Data vector: 3 values written above.
ValueVector vv = rv.getDataVector();
assertEquals(3, vv.getAccessor().getValueCount());
assertEquals(3 * BasicTypeHelper.getSize(Types.required(type)), ((BaseDataValueVector) vv).getBuffer().writerIndex());
// Offsets vector: one more than row count
UInt4Vector ov = rv.getOffsetVector();
assertEquals(3, ov.getAccessor().getValueCount());
assertEquals(3 * 4, ov.getBuffer().writerIndex());
RowSetReader reader = rs.reader();
ArrayReader arrayReader = reader.array(0);
ScalarReader colReader = arrayReader.scalar();
assertEquals(ValueType.INTEGER, colReader.valueType());
assertTrue(reader.next());
assertEquals(0, arrayReader.size());
assertTrue(reader.next());
assertEquals(3, arrayReader.size());
assertTrue(arrayReader.next());
assertFalse(colReader.isNull());
assertEquals(0, colReader.getInt());
assertEquals(0, colReader.getObject());
assertEquals("0", colReader.getAsString());
assertTrue(arrayReader.next());
assertFalse(colReader.isNull());
assertEquals(20, colReader.getInt());
assertEquals(20, colReader.getObject());
assertEquals("20", colReader.getAsString());
assertTrue(arrayReader.next());
assertFalse(colReader.isNull());
assertEquals(30, colReader.getInt());
assertEquals(30, colReader.getObject());
assertEquals("30", colReader.getAsString());
assertFalse(arrayReader.next());
assertEquals("[0, 20, 30]", arrayReader.getAsString());
assertEquals(Arrays.asList(0, 20, 30), arrayReader.getObject());
assertFalse(reader.next());
rs.clear();
}
use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.
the class TestRecordBatchSizer method testSizerRepeatedMap.
@Test
public void testSizerRepeatedMap() {
TupleMetadata schema = new SchemaBuilder().addMapArray("map").add("key", MinorType.INT).add("value", MinorType.VARCHAR).resumeSchema().buildSchema();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
for (int i = 0; i < 10; i++) {
builder.addRow((Object) new Object[] { new Object[] { 110, "a" }, new Object[] { 120, "b" } });
}
RowSet rows = builder.build();
// Run the record batch sizer on the resulting batch.
RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
assertEquals(1, sizer.columns().size());
/**
* stdDataSize:50+4, stdNetSize:50+4+4+4, dataSizePerEntry:(4+1)*2,
* netSizePerEntry: 4*2+1*2+4*2+4,
* totalDataSize:5*2*10, totalNetSize:netSizePerEntry*2,
* valueCount:10,
* elementCount:20, cardinality:2, isVariableWidth:true
*/
verifyColumnValues(sizer.columns().get("map"), 54, 62, 10, 22, 100, 220, 10, 20, 2, false);
SingleRowSet empty = fixture.rowSet(schema);
VectorAccessible accessible = empty.vectorAccessible();
UInt4Vector offsetVector;
for (VectorWrapper<?> vw : accessible) {
ValueVector v = vw.getValueVector();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
RepeatedMapVector mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
ValueVector keyVector = mapVector.getChild("key");
ValueVector valueVector1 = mapVector.getChild("value");
assertEquals(((Integer.highestOneBit(testRowCount) << 1) * 2), keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1) * 2, offsetVector.getValueCapacity());
assertEquals(Integer.highestOneBit(testRowCount << 1) * 2 - 1, valueVector1.getValueCapacity());
// Allocates the same as value passed since it is already power of two.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(testRowCountPowerTwo * 2, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity());
assertEquals(testRowCountPowerTwo * 2 - 1, valueVector1.getValueCapacity());
// Allocate for max rows.
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(ValueVector.MAX_ROW_COUNT * 2, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 2, offsetVector.getValueCapacity());
assertEquals(ValueVector.MAX_ROW_COUNT * 2 - 1, valueVector1.getValueCapacity());
// Allocate for 0 rows. should atleast do allocation for 1 row.
colSize.allocateVector(v, 0);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity());
v.clear();
}
empty.clear();
rows.clear();
}
Aggregations