use of org.apache.drill.exec.vector.complex.RepeatedMapVector in project drill by apache.
the class TestResultSetLoaderMapArray method testBasics.
@Test
public void testBasics() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addMapArray("m").add("c", MinorType.INT).add("d", MinorType.VARCHAR).resumeSchema().buildSchema();
ResultSetLoaderImpl.ResultSetOptions options = new ResultSetOptionBuilder().readerSchema(schema).build();
ResultSetLoader rsLoader = new ResultSetLoaderImpl(fixture.allocator(), options);
RowSetLoader rootWriter = rsLoader.writer();
// Verify structure and schema
TupleMetadata actualSchema = rootWriter.tupleSchema();
assertEquals(2, actualSchema.size());
assertTrue(actualSchema.metadata(1).isArray());
assertTrue(actualSchema.metadata(1).isMap());
assertEquals(2, actualSchema.metadata("m").tupleSchema().size());
assertEquals(2, actualSchema.column("m").getChildren().size());
TupleWriter mapWriter = rootWriter.array("m").tuple();
assertSame(actualSchema.metadata("m").tupleSchema(), mapWriter.schema().tupleSchema());
assertSame(mapWriter.tupleSchema(), mapWriter.schema().tupleSchema());
assertSame(mapWriter.tupleSchema().metadata(0), mapWriter.scalar(0).schema());
assertSame(mapWriter.tupleSchema().metadata(1), mapWriter.scalar(1).schema());
// Write a couple of rows with arrays.
rsLoader.startBatch();
rootWriter.addRow(10, mapArray(mapValue(110, "d1.1"), mapValue(120, "d2.2"))).addRow(20, mapArray()).addRow(30, mapArray(mapValue(310, "d3.1"), mapValue(320, "d3.2"), mapValue(330, "d3.3")));
// Verify the first batch
RowSet actual = fixture.wrap(rsLoader.harvest());
RepeatedMapVector mapVector = (RepeatedMapVector) actual.container().getValueVector(1).getValueVector();
MaterializedField mapField = mapVector.getField();
assertEquals(2, mapField.getChildren().size());
Iterator<MaterializedField> iter = mapField.getChildren().iterator();
assertTrue(mapWriter.scalar(0).schema().schema().isEquivalent(iter.next()));
assertTrue(mapWriter.scalar(1).schema().schema().isEquivalent(iter.next()));
SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(10, mapArray(mapValue(110, "d1.1"), mapValue(120, "d2.2"))).addRow(20, mapArray()).addRow(30, mapArray(mapValue(310, "d3.1"), mapValue(320, "d3.2"), mapValue(330, "d3.3"))).build();
RowSetUtilities.verify(expected, actual);
// In the second, create a row, then add a map member.
// Should be back-filled to empty for the first row.
rsLoader.startBatch();
rootWriter.addRow(40, mapArray(mapValue(410, "d4.1"), mapValue(420, "d4.2")));
mapWriter.addColumn(SchemaBuilder.columnSchema("e", MinorType.VARCHAR, DataMode.OPTIONAL));
rootWriter.addRow(50, mapArray(mapValue(510, "d5.1", "e5.1"), mapValue(520, "d5.2", null))).addRow(60, mapArray(mapValue(610, "d6.1", "e6.1"), mapValue(620, "d6.2", null), mapValue(630, "d6.3", "e6.3")));
// Verify the second batch
actual = fixture.wrap(rsLoader.harvest());
mapVector = (RepeatedMapVector) actual.container().getValueVector(1).getValueVector();
mapField = mapVector.getField();
assertEquals(3, mapField.getChildren().size());
TupleMetadata expectedSchema = new SchemaBuilder().add("a", MinorType.INT).addMapArray("m").add("c", MinorType.INT).add("d", MinorType.VARCHAR).addNullable("e", MinorType.VARCHAR).resumeSchema().buildSchema();
expected = fixture.rowSetBuilder(expectedSchema).addRow(40, mapArray(mapValue(410, "d4.1", null), mapValue(420, "d4.2", null))).addRow(50, mapArray(mapValue(510, "d5.1", "e5.1"), mapValue(520, "d5.2", null))).addRow(60, mapArray(mapValue(610, "d6.1", "e6.1"), mapValue(620, "d6.2", null), mapValue(630, "d6.3", "e6.3"))).build();
RowSetUtilities.verify(expected, actual);
rsLoader.close();
}
use of org.apache.drill.exec.vector.complex.RepeatedMapVector in project drill by apache.
the class FlattenRecordBatch method getFlattenFieldTransferPair.
/**
* The data layout is the same for the actual data within a repeated field, as
* it is in a scalar vector for the same sql type. For example, a repeated int
* vector has a vector of offsets into a regular int vector to represent the
* lists. As the data layout for the actual values in the same in the repeated
* vector as in the scalar vector of the same type, we can avoid making
* individual copies for the column being flattened, and just use vector
* copies between the inner vector of the repeated field to the resulting
* scalar vector from the flatten operation. This is completed after we
* determine how many records will fit (as we will hit either a batch end, or
* the end of one of the other vectors while we are copying the data of the
* other vectors alongside each new flattened value coming out of the repeated
* field.)
*/
private TransferPair getFlattenFieldTransferPair(FieldReference reference) {
TypedFieldId fieldId = incoming.getValueVectorId(popConfig.getColumn());
Class<?> vectorClass = incoming.getSchema().getColumn(fieldId.getFieldIds()[0]).getValueClass();
ValueVector flattenField = incoming.getValueAccessorById(vectorClass, fieldId.getFieldIds()).getValueVector();
TransferPair tp = null;
if (flattenField instanceof AbstractRepeatedMapVector) {
tp = ((AbstractRepeatedMapVector) flattenField).getTransferPairToSingleMap(reference.getAsNamePart().getName(), oContext.getAllocator());
} else if (!(flattenField instanceof RepeatedValueVector)) {
if (incoming.getRecordCount() != 0) {
throw UserException.unsupportedError().message("Flatten does not support inputs of non-list values.").build(logger);
}
logger.error("Cannot cast {} to RepeatedValueVector", flattenField);
// when incoming recordCount is 0, don't throw exception since the type being seen here is not solid
ValueVector vv = new RepeatedMapVector(flattenField.getField(), oContext.getAllocator(), null);
tp = RepeatedValueVector.class.cast(vv).getTransferPair(reference.getAsNamePart().getName(), oContext.getAllocator());
} else {
ValueVector vvIn = RepeatedValueVector.class.cast(flattenField).getDataVector();
// vvIn may be null because of fast schema return for repeated list vectors
if (vvIn != null) {
tp = vvIn.getTransferPair(reference.getAsNamePart().getName(), oContext.getAllocator());
}
}
return tp;
}
use of org.apache.drill.exec.vector.complex.RepeatedMapVector in project drill by apache.
the class FlattenRecordBatch method setFlattenVector.
private void setFlattenVector() {
TypedFieldId typedFieldId = incoming.getValueVectorId(popConfig.getColumn());
MaterializedField field = incoming.getSchema().getColumn(typedFieldId.getFieldIds()[0]);
RepeatedValueVector vector;
ValueVector inVV = incoming.getValueAccessorById(field.getValueClass(), typedFieldId.getFieldIds()).getValueVector();
if (!(inVV instanceof RepeatedValueVector)) {
if (incoming.getRecordCount() != 0) {
throw UserException.unsupportedError().message("Flatten does not support inputs of non-list values.").build(logger);
}
// when incoming recordCount is 0, don't throw exception since the type being seen here is not solid
logger.error("setFlattenVector cast failed and recordcount is 0, create empty vector anyway.");
vector = new RepeatedMapVector(field, oContext.getAllocator(), null);
} else {
vector = RepeatedValueVector.class.cast(inVV);
}
flattener.setFlattenField(vector);
}
use of org.apache.drill.exec.vector.complex.RepeatedMapVector in project drill by apache.
the class TestValueVector method testVectors.
/**
* Convenience method that allows running tests on various {@link ValueVector vector} instances.
*
* @param test test function to execute
*/
private void testVectors(VectorVerifier test) throws Exception {
final MaterializedField[] fields = { MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, BitHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, VarCharHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVarCharHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedListVector.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, MapVector.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedMapVector.TYPE) };
final ValueVector[] vectors = { new UInt4Vector(fields[0], allocator), new BitVector(fields[1], allocator), new VarCharVector(fields[2], allocator), new NullableVarCharVector(fields[3], allocator), new RepeatedListVector(fields[4], allocator, null), new MapVector(fields[5], allocator, null), new RepeatedMapVector(fields[6], allocator, null) };
try {
for (final ValueVector vector : vectors) {
test.verify(vector);
}
} finally {
AutoCloseables.close(vectors);
}
}
use of org.apache.drill.exec.vector.complex.RepeatedMapVector in project drill by apache.
the class TestRecordBatchSizer method testEmptyBatchRepeatedMap.
@Test
public void testEmptyBatchRepeatedMap() {
TupleMetadata schema = new SchemaBuilder().addMapArray("map").add("key", MinorType.INT).add("value", MinorType.VARCHAR).resumeSchema().buildSchema();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
RowSet rows = builder.build();
// Run the record batch sizer on the resulting batch.
RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
assertEquals(1, sizer.columns().size());
/**
* stdDataSize:50+4, stdNetSize:50+4+4+4, dataSizePerEntry:0,
* netSizePerEntry: 0,
* totalDataSize:0, totalNetSize:0,
* valueCount:0,
* elementCount:0, cardinality:0, isVariableWidth:true
*/
verifyColumnValues(sizer.columns().get("map"), 54, 62, 0, 0, 0, 0, 0, 0, 0, false);
// Verify memory allocation is done correctly based on std size for empty batch.
SingleRowSet empty = fixture.rowSet(schema);
VectorAccessible accessible = empty.vectorAccessible();
UInt4Vector offsetVector;
for (VectorWrapper<?> vw : accessible) {
ValueVector v = vw.getValueVector();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
RepeatedMapVector mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
ValueVector keyVector = mapVector.getChild("key");
ValueVector valueVector1 = mapVector.getChild("value");
assertEquals(((Integer.highestOneBit(testRowCount * STD_REPETITION_FACTOR) << 1)), keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount * STD_REPETITION_FACTOR) << 1), offsetVector.getValueCapacity());
assertEquals(Integer.highestOneBit(testRowCount * STD_REPETITION_FACTOR << 1) - 1, valueVector1.getValueCapacity());
// Allocates the same as value passed since it is already power of two.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(Integer.highestOneBit(testRowCountPowerTwo * STD_REPETITION_FACTOR) << 1, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(Integer.highestOneBit((int) (testRowCountPowerTwo * STD_REPETITION_FACTOR)) << 1, offsetVector.getValueCapacity());
assertEquals((Integer.highestOneBit(testRowCountPowerTwo * STD_REPETITION_FACTOR << 1)) - 1, valueVector1.getValueCapacity());
// Allocate for max rows.
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(Integer.highestOneBit(ValueVector.MAX_ROW_COUNT * STD_REPETITION_FACTOR) << 1, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(Integer.highestOneBit(ValueVector.MAX_ROW_COUNT * STD_REPETITION_FACTOR) << 1, offsetVector.getValueCapacity());
assertEquals((Integer.highestOneBit(ValueVector.MAX_ROW_COUNT * STD_REPETITION_FACTOR) << 1) - 1, valueVector1.getValueCapacity());
// Allocate for 0 rows. should atleast do allocation for 1 row.
colSize.allocateVector(v, 0);
mapVector = (RepeatedMapVector) v;
offsetVector = ((RepeatedValueVector) mapVector).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT, offsetVector.getValueCapacity());
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity());
v.clear();
}
empty.clear();
rows.clear();
}
Aggregations