use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by axbaretto.
the class TestOutputBatchSize method testSizerRepeatedRepeatedList.
@Test
public void testSizerRepeatedRepeatedList() throws Exception {
List<String> inputJsonBatches = Lists.newArrayList();
StringBuilder batchString = new StringBuilder();
StringBuilder newString = new StringBuilder();
newString.append("[ [[1,2,3,4], [5,6,7,8]], [[1,2,3,4], [5,6,7,8]] ]");
numRows = 9;
batchString.append("[");
for (int i = 0; i < numRows; i++) {
batchString.append("{\"c\" : " + newString);
batchString.append("},");
}
batchString.append("{\"c\" : " + newString);
batchString.append("}");
batchString.append("]");
inputJsonBatches.add(batchString.toString());
// Create a dummy scanBatch to figure out the size.
RecordBatch scanBatch = new ScanBatch(new MockPhysicalOperator(), fragContext, getReaderListForJsonBatches(inputJsonBatches, fragContext));
VectorAccessible va = new BatchIterator(scanBatch).iterator().next();
RecordBatchSizer sizer = new RecordBatchSizer(va);
assertEquals(1, sizer.columns().size());
RecordBatchSizer.ColumnSize column = sizer.columns().get("c");
assertNotNull(column);
/**
* stdDataSize:8*10*10*10, stdNetSize:8*10*10*10 + 8*10*10 + 8*10 + 4,
* dataSizePerEntry:16*8, netSizePerEntry:16*8 + 16*4 + 4*2 + 4*2,
* totalDataSize:16*8*10, totalNetSize:netSizePerEntry*10, valueCount:10,
* elementCount:10, estElementCountPerArray:1, isVariableWidth:false
*/
assertEquals(8000, column.getStdDataSizePerEntry());
assertEquals(8884, column.getStdNetSizePerEntry());
assertEquals(128, column.getDataSizePerEntry());
assertEquals(156, column.getNetSizePerEntry());
assertEquals(1280, column.getTotalDataSize());
assertEquals(1560, column.getTotalNetSize());
assertEquals(10, column.getValueCount());
assertEquals(20, column.getElementCount());
assertEquals(2, column.getCardinality(), 0.01);
assertEquals(false, column.isVariableWidth());
final int testRowCount = 1000;
final int testRowCountPowerTwo = 2048;
for (VectorWrapper<?> vw : va) {
ValueVector v = vw.getValueVector();
v.clear();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
// offset vector of delegate vector i.e. outer array should have row count number of values.
UInt4Vector offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
// Get data vector of delegate vector. This is repeated list again
ValueVector dataVector = ((RepeatedListVector) v).getDataVector();
// offset vector of delegate vector of the inner repeated list
// This should have row count * 2 number of values.
offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount * 2) << 1), offsetVector.getValueCapacity());
// Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
ValueVector innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
assertEquals((Integer.highestOneBit((testRowCount * 2) << 1) - 1), dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount * 4) << 1), offsetVector.getValueCapacity());
// Data vector of inner vector should
// have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
assertEquals(Integer.highestOneBit(testRowCount << 1) * 16, dataVector.getValueCapacity());
v.clear();
// Allocates the same as value passed since it is already power of two.
// -1 is done for adjustment needed for offset vector.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
// offset vector of delegate vector i.e. outer array should have row count number of values.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
// Get data vector of delegate vector. This is repeated list again
dataVector = ((RepeatedListVector) v).getDataVector();
// offset vector of delegate vector of the inner repeated list
// This should have row count * 2 number of values.
offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
assertEquals(testRowCountPowerTwo * 2, offsetVector.getValueCapacity());
// Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
assertEquals(testRowCountPowerTwo * 2 - 1, dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
assertEquals(testRowCountPowerTwo * 4, offsetVector.getValueCapacity());
// Data vector of inner vector should
// have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
assertEquals(testRowCountPowerTwo * 16, dataVector.getValueCapacity());
v.clear();
// MAX ROW COUNT
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
// offset vector of delegate vector i.e. outer array should have row count number of values.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
// Get data vector of delegate vector. This is repeated list again
dataVector = ((RepeatedListVector) v).getDataVector();
// offset vector of delegate vector of the inner repeated list
// This should have row count * 2 number of values.
offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 2, offsetVector.getValueCapacity());
// Data vector of inner vector should have row count * 2 number of values - 1 (for offset vector adjustment).
innerDataVector = ((RepeatedValueVector) dataVector).getDataVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 2 - 1, dataVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 4, offsetVector.getValueCapacity());
// Data vector of inner vector should
// have 2 (outer array cardinality) * 2 (inner array cardinality) * row count number of values.
dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
assertEquals(ValueVector.MAX_ROW_COUNT * 16, dataVector.getValueCapacity());
v.clear();
// MIN ROW COUNT
colSize.allocateVector(v, 0);
// offset vector of delegate vector i.e. outer array should have 1 value.
offsetVector = ((RepeatedListVector) v).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
// Get data vector of delegate vector. This is repeated list again
dataVector = ((RepeatedListVector) v).getDataVector();
// offset vector of delegate vector of the inner repeated list
offsetVector = ((RepeatedListVector) dataVector).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
// offset vector of inner vector should have
// 2 (outer array cardinality) * 1.
offsetVector = ((RepeatedValueVector) innerDataVector).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT * 2, offsetVector.getValueCapacity());
// Data vector of inner vector should 1 value.
dataVector = ((RepeatedValueVector) innerDataVector).getDataVector();
assertEquals(ValueVector.MIN_ROW_COUNT, dataVector.getValueCapacity());
v.clear();
}
}
use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.
the class SimpleReaderBuilder method buildDict.
private AbstractObjectReader buildDict(ValueVector vector, VectorAccessor va, VectorDescrip descrip) {
boolean isArray = descrip.metadata.isArray();
DictVector dictVector;
VectorAccessor dictAccessor;
if (isArray) {
dictVector = (DictVector) ((RepeatedValueVector) vector).getDataVector();
dictAccessor = new SingleVectorAccessor(dictVector);
} else {
dictVector = (DictVector) vector;
dictAccessor = va;
}
List<AbstractObjectReader> readers = buildMapMembers(dictVector, descrip.childProvider());
AbstractObjectReader reader = DictReaderImpl.build(descrip.metadata, dictAccessor, readers);
if (!isArray) {
return reader;
}
return ArrayReaderImpl.buildTuple(descrip.metadata, va, reader);
}
use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.
the class FlattenRecordBatch method getFlattenFieldTransferPair.
/**
* The data layout is the same for the actual data within a repeated field, as
* it is in a scalar vector for the same sql type. For example, a repeated int
* vector has a vector of offsets into a regular int vector to represent the
* lists. As the data layout for the actual values in the same in the repeated
* vector as in the scalar vector of the same type, we can avoid making
* individual copies for the column being flattened, and just use vector
* copies between the inner vector of the repeated field to the resulting
* scalar vector from the flatten operation. This is completed after we
* determine how many records will fit (as we will hit either a batch end, or
* the end of one of the other vectors while we are copying the data of the
* other vectors alongside each new flattened value coming out of the repeated
* field.)
*/
private TransferPair getFlattenFieldTransferPair(FieldReference reference) {
TypedFieldId fieldId = incoming.getValueVectorId(popConfig.getColumn());
Class<?> vectorClass = incoming.getSchema().getColumn(fieldId.getFieldIds()[0]).getValueClass();
ValueVector flattenField = incoming.getValueAccessorById(vectorClass, fieldId.getFieldIds()).getValueVector();
TransferPair tp = null;
if (flattenField instanceof AbstractRepeatedMapVector) {
tp = ((AbstractRepeatedMapVector) flattenField).getTransferPairToSingleMap(reference.getAsNamePart().getName(), oContext.getAllocator());
} else if (!(flattenField instanceof RepeatedValueVector)) {
if (incoming.getRecordCount() != 0) {
throw UserException.unsupportedError().message("Flatten does not support inputs of non-list values.").build(logger);
}
logger.error("Cannot cast {} to RepeatedValueVector", flattenField);
// when incoming recordCount is 0, don't throw exception since the type being seen here is not solid
ValueVector vv = new RepeatedMapVector(flattenField.getField(), oContext.getAllocator(), null);
tp = RepeatedValueVector.class.cast(vv).getTransferPair(reference.getAsNamePart().getName(), oContext.getAllocator());
} else {
ValueVector vvIn = RepeatedValueVector.class.cast(flattenField).getDataVector();
// vvIn may be null because of fast schema return for repeated list vectors
if (vvIn != null) {
tp = vvIn.getTransferPair(reference.getAsNamePart().getName(), oContext.getAllocator());
}
}
return tp;
}
use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.
the class FlattenRecordBatch method setFlattenVector.
private void setFlattenVector() {
TypedFieldId typedFieldId = incoming.getValueVectorId(popConfig.getColumn());
MaterializedField field = incoming.getSchema().getColumn(typedFieldId.getFieldIds()[0]);
RepeatedValueVector vector;
ValueVector inVV = incoming.getValueAccessorById(field.getValueClass(), typedFieldId.getFieldIds()).getValueVector();
if (!(inVV instanceof RepeatedValueVector)) {
if (incoming.getRecordCount() != 0) {
throw UserException.unsupportedError().message("Flatten does not support inputs of non-list values.").build(logger);
}
// when incoming recordCount is 0, don't throw exception since the type being seen here is not solid
logger.error("setFlattenVector cast failed and recordcount is 0, create empty vector anyway.");
vector = new RepeatedMapVector(field, oContext.getAllocator(), null);
} else {
vector = RepeatedValueVector.class.cast(inVV);
}
flattener.setFlattenField(vector);
}
use of org.apache.drill.exec.vector.complex.RepeatedValueVector in project drill by apache.
the class TestRecordBatchSizer method testEmptyBatchRepeatedMap.
@Test
public void testEmptyBatchRepeatedMap() {
TupleMetadata schema = new SchemaBuilder().addMapArray("map").add("key", MinorType.INT).add("value", MinorType.VARCHAR).resumeSchema().buildSchema();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
RowSet rows = builder.build();
// Run the record batch sizer on the resulting batch.
RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
assertEquals(1, sizer.columns().size());
/**
* stdDataSize:50+4, stdNetSize:50+4+4+4, dataSizePerEntry:0,
* netSizePerEntry: 0,
* totalDataSize:0, totalNetSize:0,
* valueCount:0,
* elementCount:0, cardinality:0, isVariableWidth:true
*/
verifyColumnValues(sizer.columns().get("map"), 54, 62, 0, 0, 0, 0, 0, 0, 0, false);
// Verify memory allocation is done correctly based on std size for empty batch.
SingleRowSet empty = fixture.rowSet(schema);
VectorAccessible accessible = empty.vectorAccessible();
UInt4Vector offsetVector;
for (VectorWrapper<?> vw : accessible) {
ValueVector v = vw.getValueVector();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
RepeatedMapVector mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
ValueVector keyVector = mapVector.getChild("key");
ValueVector valueVector1 = mapVector.getChild("value");
assertEquals(((Integer.highestOneBit(testRowCount * STD_REPETITION_FACTOR) << 1)), keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount * STD_REPETITION_FACTOR) << 1), offsetVector.getValueCapacity());
assertEquals(Integer.highestOneBit(testRowCount * STD_REPETITION_FACTOR << 1) - 1, valueVector1.getValueCapacity());
// Allocates the same as value passed since it is already power of two.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(Integer.highestOneBit(testRowCountPowerTwo * STD_REPETITION_FACTOR) << 1, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(Integer.highestOneBit((int) (testRowCountPowerTwo * STD_REPETITION_FACTOR)) << 1, offsetVector.getValueCapacity());
assertEquals((Integer.highestOneBit(testRowCountPowerTwo * STD_REPETITION_FACTOR << 1)) - 1, valueVector1.getValueCapacity());
// Allocate for max rows.
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(Integer.highestOneBit(ValueVector.MAX_ROW_COUNT * STD_REPETITION_FACTOR) << 1, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(Integer.highestOneBit(ValueVector.MAX_ROW_COUNT * STD_REPETITION_FACTOR) << 1, offsetVector.getValueCapacity());
assertEquals((Integer.highestOneBit(ValueVector.MAX_ROW_COUNT * STD_REPETITION_FACTOR) << 1) - 1, valueVector1.getValueCapacity());
// Allocate for 0 rows. should atleast do allocation for 1 row.
colSize.allocateVector(v, 0);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity());
v.clear();
}
empty.clear();
rows.clear();
}
Aggregations