use of org.apache.drill.test.rowSet.RowSet.SingleRowSet in project drill by axbaretto.
the class TestRecordBatchSizer method testSizerVariableWidth.
@Test
public void testSizerVariableWidth() {
BatchSchema schema = new SchemaBuilder().add("a", MinorType.VARCHAR).build();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
StringBuilder stringBuilder = new StringBuilder();
// a, aa, aaa, ... aaaaaaaaaa. totalSize = (10*11)/2 = 55
for (long i = 0; i < 10; i++) {
stringBuilder.append("a");
builder.addRow(stringBuilder.toString());
}
RowSet rows = builder.build();
// Run the record batch sizer on the resulting batch.
RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
assertEquals(1, sizer.columns().size());
ColumnSize aColumn = sizer.columns().get("a");
/**
* stdDataSize:50, stdNetSize:50+4, dataSizePerEntry:8, netSizePerEntry:8,
* totalDataSize:(10*11)/2, totalNetSize:(10*11)/2 + 4*10, valueCount:10,
* elementCount:10, estElementCountPerArray:1, isVariableWidth:true
*/
verifyColumnValues(aColumn, 50, 54, 6, 10, 55, 95, 10, 10, 1, true);
SingleRowSet empty = fixture.rowSet(schema);
VectorAccessible accessible = empty.vectorAccessible();
UInt4Vector offsetVector;
for (VectorWrapper<?> vw : accessible) {
ValueVector v = vw.getValueVector();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
offsetVector = ((VariableWidthVector) v).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
assertEquals(Integer.highestOneBit(testRowCount << 1) - 1, v.getValueCapacity());
v.clear();
// Allocates the same as value passed since it is already power of two.
// -1 is done for adjustment needed for offset vector.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
offsetVector = ((VariableWidthVector) v).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
assertEquals(testRowCountPowerTwo - 1, v.getValueCapacity());
v.clear();
// Allocate for max rows.
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
offsetVector = ((VariableWidthVector) v).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
assertEquals(ValueVector.MAX_ROW_COUNT - 1, v.getValueCapacity());
v.clear();
// Allocate for 0 rows. should atleast do allocation for 1 row.
colSize.allocateVector(v, 0);
offsetVector = ((VariableWidthVector) v).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
assertEquals(ValueVector.MIN_ROW_COUNT, v.getValueCapacity());
v.clear();
}
empty.clear();
rows.clear();
}
use of org.apache.drill.test.rowSet.RowSet.SingleRowSet in project drill by axbaretto.
the class TestRecordBatchSizer method testSizerRepeatedMap.
@Test
public void testSizerRepeatedMap() {
BatchSchema schema = new SchemaBuilder().addMapArray("map").add("key", MinorType.INT).add("value", MinorType.VARCHAR).resumeSchema().build();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
for (int i = 0; i < 10; i++) {
builder.addRow((Object) new Object[] { new Object[] { 110, "a" }, new Object[] { 120, "b" } });
}
RowSet rows = builder.build();
// Run the record batch sizer on the resulting batch.
RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
assertEquals(1, sizer.columns().size());
/**
* stdDataSize:50+4, stdNetSize:50+4+4+4, dataSizePerEntry:(4+1)*2,
* netSizePerEntry: 4*2+1*2+4*2+4,
* totalDataSize:5*2*10, totalNetSize:netSizePerEntry*2,
* valueCount:10,
* elementCount:20, estElementCountPerArray:2, isVariableWidth:true
*/
verifyColumnValues(sizer.columns().get("map"), 54, 62, 10, 22, 100, 220, 10, 20, 2, false);
SingleRowSet empty = fixture.rowSet(schema);
VectorAccessible accessible = empty.vectorAccessible();
UInt4Vector offsetVector;
for (VectorWrapper<?> vw : accessible) {
ValueVector v = vw.getValueVector();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
RepeatedMapVector mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
ValueVector keyVector = mapVector.getChild("key");
ValueVector valueVector1 = mapVector.getChild("value");
assertEquals(((Integer.highestOneBit(testRowCount) << 1) * 2), keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1) * 2, offsetVector.getValueCapacity());
assertEquals(Integer.highestOneBit(testRowCount << 1) * 2 - 1, valueVector1.getValueCapacity());
// Allocates the same as value passed since it is already power of two.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(testRowCountPowerTwo * 2, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity());
assertEquals(testRowCountPowerTwo * 2 - 1, valueVector1.getValueCapacity());
// Allocate for max rows.
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(ValueVector.MAX_ROW_COUNT * 2, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 2, offsetVector.getValueCapacity());
assertEquals(ValueVector.MAX_ROW_COUNT * 2 - 1, valueVector1.getValueCapacity());
// Allocate for 0 rows. should atleast do allocation for 1 row.
colSize.allocateVector(v, 0);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity());
v.clear();
}
empty.clear();
rows.clear();
}
use of org.apache.drill.test.rowSet.RowSet.SingleRowSet in project drill by axbaretto.
the class TestVectorContainer method testContainerMerge.
/**
* Test of the ability to merge two schemas and to merge
* two vector containers. The merge is "horizontal", like
* a row-by-row join. Since each container is a list of
* vectors, we just combine the two lists to create the
* merged result.
*/
@Test
public void testContainerMerge() {
// Simulated data from a reader
BatchSchema leftSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).build();
SingleRowSet left = fixture.rowSetBuilder(leftSchema).addRow(10, "fred").addRow(20, "barney").addRow(30, "wilma").build();
// Simulated "implicit" coumns: row number and file name
BatchSchema rightSchema = new SchemaBuilder().add("x", MinorType.SMALLINT).add("y", MinorType.VARCHAR).build();
SingleRowSet right = fixture.rowSetBuilder(rightSchema).addRow(1, "foo.txt").addRow(2, "bar.txt").addRow(3, "dino.txt").build();
// The merge batch we expect to see
BatchSchema expectedSchema = new SchemaBuilder().add("a", MinorType.INT).addNullable("b", MinorType.VARCHAR).add("x", MinorType.SMALLINT).add("y", MinorType.VARCHAR).build();
SingleRowSet expected = fixture.rowSetBuilder(expectedSchema).addRow(10, "fred", 1, "foo.txt").addRow(20, "barney", 2, "bar.txt").addRow(30, "wilma", 3, "dino.txt").build();
// Merge containers without selection vector
RowSet merged = fixture.wrap(left.container().merge(right.container()));
RowSetComparison comparison = new RowSetComparison(expected);
comparison.verify(merged);
// Merge containers via row set facade
RowSet mergedRs = DirectRowSet.fromContainer(left.container().merge(right.container()));
comparison.verifyAndClearAll(mergedRs);
// Add a selection vector. Merging is forbidden, in the present code,
// for batches that have a selection vector.
SingleRowSet leftIndirect = left.toIndirect();
try {
leftIndirect.container().merge(right.container());
fail();
} catch (IllegalArgumentException e) {
// Expected
}
leftIndirect.clear();
right.clear();
}
Aggregations