use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.
the class TestRecordBatchSizer method testEmptyBatchRepeatedFixedWidth.
@Test
public void testEmptyBatchRepeatedFixedWidth() {
TupleMetadata schema = new SchemaBuilder().addArray("a", MinorType.BIGINT).addArray("b", MinorType.FLOAT8).buildSchema();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
RowSet rows = builder.build();
// Run the record batch sizer on the resulting batch.
RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
assertEquals(2, sizer.columns().size());
/**
* stdDataSize:8*5, stdNetSize:8*5+4, dataSizePerEntry:0, netSizePerEntry:0,
* totalDataSize:0, totalNetSize:0, valueCount:0,
* elementCount:0, cardinality:0, isVariableWidth:false
*/
verifyColumnValues(sizer.columns().get("a"), 40, 44, 0, 0, 0, 0, 0, 0, 0, false);
verifyColumnValues(sizer.columns().get("b"), 40, 44, 0, 0, 0, 0, 0, 0, 0, false);
// Verify memory allocation is done correctly based on std size for empty batch.
SingleRowSet empty = fixture.rowSet(schema);
VectorAccessible accessible = empty.vectorAccessible();
UInt4Vector offsetVector;
ValueVector dataVector;
for (VectorWrapper<?> vw : accessible) {
ValueVector v = vw.getValueVector();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
offsetVector = ((RepeatedValueVector) v).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
dataVector = ((RepeatedValueVector) v).getDataVector();
assertEquals(Integer.highestOneBit(testRowCount * STD_REPETITION_FACTOR << 1), dataVector.getValueCapacity());
v.clear();
// Allocates the same as value passed since it is already power of two.
// -1 is done for adjustment needed for offset vector.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
offsetVector = ((RepeatedValueVector) v).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
dataVector = ((RepeatedValueVector) v).getDataVector();
assertEquals(Integer.highestOneBit((testRowCountPowerTwo - 1) * STD_REPETITION_FACTOR) << 1, dataVector.getValueCapacity());
v.clear();
// Allocate for max rows.
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
offsetVector = ((RepeatedValueVector) v).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
dataVector = ((RepeatedValueVector) v).getDataVector();
assertEquals(Integer.highestOneBit(((ValueVector.MAX_ROW_COUNT - 1) * STD_REPETITION_FACTOR << 1)), dataVector.getValueCapacity());
v.clear();
// Allocate for 0 rows. should atleast do allocation for 1 row.
colSize.allocateVector(v, 0);
offsetVector = ((RepeatedValueVector) v).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
dataVector = ((RepeatedValueVector) v).getDataVector();
assertEquals(ValueVector.MIN_ROW_COUNT, dataVector.getValueCapacity());
v.clear();
}
empty.clear();
rows.clear();
}
use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.
the class TestOutputBatchSize method testSizerRepeatedList.
@Test
public void testSizerRepeatedList() throws Exception {
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
StringBuilder newString = new StringBuilder();
newString.append("[ [1,2,3,4], [5,6,7,8] ]");
numRows = 9;
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"c\" : " + newString);
batchString.append("},");
}
batchString.append("{\"c\" : " + newString);
batchString.append("}");
batchString.append("]");
inputJsonBatches.add(batchString.toString());
// Create a dummy scanBatch to figure out the size.
RecordBatch scanBatch = new ScanBatch(new MockPhysicalOperator(), fragContext, getReaderListForJsonBatches(inputJsonBatches, fragContext));
VectorAccessible va = new BatchIterator(scanBatch).iterator().next();
RecordBatchSizer sizer = new RecordBatchSizer(va);
assertEquals(1, sizer.columns().size());
RecordBatchSizer.ColumnSize column = sizer.columns().get("c");
assertNotNull(column);
/**
* stdDataSize:8*5*5, stdNetSize:8*5*5 + 4*5 + 4*5 + 4,
* dataSizePerEntry:8*8, netSizePerEntry:8*8 + 4*2 + 4,
* totalDataSize:8*8*10, totalNetSize:netSizePerEntry*10, valueCount:10,
* elementCount:10, estElementCountPerArray:1, isVariableWidth:false
*/
assertEquals(200, column.getStdDataSizePerEntry());
assertEquals(244, column.getStdNetSizePerEntry());
assertEquals(64, column.getDataSizePerEntry());
assertEquals(76, column.getNetSizePerEntry());
assertEquals(640, column.getTotalDataSize());
assertEquals(760, column.getTotalNetSize());
assertEquals(10, column.getValueCount());
assertEquals(20, column.getElementCount());
assertEquals(2, column.getCardinality(), 0.01);
assertEquals(false, column.isVariableWidth());
final int testRowCount = 1000;
final int testRowCountPowerTwo = 2048;
for (VectorWrapper<?> vw : va) {
ValueVector v = vw.getValueVector();
v.clear();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
// offset vector of delegate vector i.e. outer array should have row count number of values.
UInt4Vector offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
// Get inner vector of delegate vector.
ValueVector vector = ((RepeatedValueVector) v).getDataVector();
// Data vector of inner vector should
// have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values.
ValueVector dataVector = ((RepeatedValueVector) vector).getDataVector();
assertEquals(Integer.highestOneBit((testRowCount * 8) << 1), dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount * 2) << 1), offsetVector.getValueCapacity());
v.clear();
// Allocates the same as value passed since it is already power of two.
// -1 is done for adjustment needed for offset vector.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
// offset vector of delegate vector i.e. outer array should have row count number of values.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
// Get inner vector of delegate vector.
vector = ((RepeatedValueVector) v).getDataVector();
// Data vector of inner vector should
// have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values.
dataVector = ((RepeatedValueVector) vector).getDataVector();
assertEquals(testRowCountPowerTwo * 8, dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity());
v.clear();
// MAX ROW COUNT
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
// offset vector of delegate vector i.e. outer array should have row count number of values.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
// Get inner vector of delegate vector.
vector = ((RepeatedValueVector) v).getDataVector();
// Data vector of inner vector should
// have 2 (outer array cardinality) * 4 (inner array cardinality) * row count number of values.
dataVector = ((RepeatedValueVector) vector).getDataVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 8, dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 2, offsetVector.getValueCapacity());
v.clear();
// MIN ROW COUNT
colSize.allocateVector(v, 0);
// offset vector of delegate vector i.e. outer array should have 1 value.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
// Get inner vector of delegate vector.
vector = ((RepeatedValueVector) v).getDataVector();
// Data vector of inner vector should have 1 value
dataVector = ((RepeatedValueVector) vector).getDataVector();
assertEquals(ValueVector.MIN_ROW_COUNT, dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * 1.
offsetVector = ((RepeatedValueVector) vector).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT * 2, offsetVector.getValueCapacity());
v.clear();
}
}
use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.
the class TestRecordBatchSizer method testSizerRepeatedFixedWidth.
@Test
public void testSizerRepeatedFixedWidth() {
TupleMetadata schema = new SchemaBuilder().addArray("a", MinorType.BIGINT).addArray("b", MinorType.FLOAT8).buildSchema();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
for (long i = 0; i < 10; i++) {
builder.addRow(new long[] { 1, 2, 3, 4, 5 }, new double[] { i * 0.1, i * 0.1, i * 0.1, i * 0.2, i * 0.3 });
}
RowSet rows = builder.build();
// Run the record batch sizer on the resulting batch.
RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
assertEquals(2, sizer.columns().size());
/**
* stdDataSize:8*5, stdNetSize:8*5+4, dataSizePerEntry:5*8, netSizePerEntry:5*8+4,
* totalDataSize:5*8*5, totalNetSize:5*8*10+5*8, valueCount:10,
* elementCount:50, cardinality:5, isVariableWidth:false
*/
verifyColumnValues(sizer.columns().get("a"), 40, 44, 40, 44, 400, 440, 10, 50, 5, false);
verifyColumnValues(sizer.columns().get("b"), 40, 44, 40, 44, 400, 440, 10, 50, 5, false);
SingleRowSet empty = fixture.rowSet(schema);
VectorAccessible accessible = empty.vectorAccessible();
UInt4Vector offsetVector;
ValueVector dataVector;
for (VectorWrapper<?> vw : accessible) {
ValueVector v = vw.getValueVector();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
offsetVector = ((RepeatedValueVector) v).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
dataVector = ((RepeatedValueVector) v).getDataVector();
assertEquals(Integer.highestOneBit((testRowCount * 5) << 1), dataVector.getValueCapacity());
v.clear();
// Allocates the same as value passed since it is already power of two.
// -1 is done for adjustment needed for offset vector.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
offsetVector = ((RepeatedValueVector) v).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
dataVector = ((RepeatedValueVector) v).getDataVector();
assertEquals(Integer.highestOneBit((testRowCountPowerTwo - 1) * 5) << 1, dataVector.getValueCapacity());
v.clear();
// Allocate for max rows.
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
offsetVector = ((RepeatedValueVector) v).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
dataVector = ((RepeatedValueVector) v).getDataVector();
assertEquals(Integer.highestOneBit(((ValueVector.MAX_ROW_COUNT - 1) * 5) << 1), dataVector.getValueCapacity());
v.clear();
// Allocate for 0 rows. should atleast do allocation for 1 row.
colSize.allocateVector(v, 0);
offsetVector = ((RepeatedValueVector) v).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
dataVector = ((RepeatedValueVector) v).getDataVector();
assertEquals(ValueVector.MIN_ROW_COUNT, dataVector.getValueCapacity());
v.clear();
}
empty.clear();
rows.clear();
}
use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.
the class HyperReaderBuilder method buildDict.
private AbstractObjectReader buildDict(VectorAccessor va, ColumnMetadata metadata) {
boolean isArray = metadata.isArray();
ValueVector vector = va.vector();
VectorAccessor dictAccessor;
if (isArray) {
ValueVector dictVector = ((RepeatedValueVector) vector).getDataVector();
dictAccessor = new VectorAccessors.SingleVectorAccessor(dictVector);
} else {
dictAccessor = va;
}
List<AbstractObjectReader> readers = buildMapMembers(dictAccessor, metadata.tupleSchema());
AbstractObjectReader reader = DictReaderImpl.build(metadata, dictAccessor, readers);
if (!isArray) {
return reader;
}
return ArrayReaderImpl.buildTuple(metadata, va, reader);
}
use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.
the class ColumnBuilder method buildPrimitive.
/**
* Build a primitive column. Check if the column is projected. If not,
* allocate a dummy writer for the column. If projected, then allocate
* a vector, a writer, and the column state which binds the two together
* and manages the column.
*
* @param parent schema of the new primitive column
* @param columnSchema implied projection type for the column
* @return column state for the new column
*/
private ColumnState buildPrimitive(ContainerState parent, ColumnMetadata columnSchema) {
final ValueVector vector;
if (parent.projection().projection(columnSchema).isProjected || allowCreation(parent)) {
// Create the vector for the column.
vector = parent.vectorCache().vectorFor(columnSchema.schema());
// from that requested. Update the schema to match.
if (parent.vectorCache().isPermissive() && !vector.getField().isEquivalent(columnSchema.schema())) {
columnSchema = ((PrimitiveColumnMetadata) columnSchema).mergeWith(vector.getField());
}
} else {
// Column is not projected. No materialized backing for the column.
vector = null;
}
// Create the writer.
final AbstractObjectWriter colWriter = ColumnWriterFactory.buildColumnWriter(columnSchema, vector);
// Build the vector state which manages the vector.
VectorState vectorState;
if (vector == null) {
vectorState = new NullVectorState();
} else if (columnSchema.isArray()) {
vectorState = new RepeatedVectorState(colWriter.array(), (RepeatedValueVector) vector);
} else if (columnSchema.isNullable()) {
vectorState = new NullableVectorState(colWriter, (NullableVector) vector);
} else {
vectorState = SimpleVectorState.vectorState(columnSchema, colWriter.events(), vector);
}
// Create the column state which binds the vector and writer together.
return new PrimitiveColumnState(parent.loader(), colWriter, vectorState);
}
Aggregations