use of org.apache.drill.exec.vector.complex.MapVector in project drill by axbaretto.
the class HBaseRecordReader method setup.
@Override
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
this.operatorContext = context;
this.outputMutator = output;
familyVectorMap = new HashMap<>();
try {
hTable = connection.getTable(hbaseTableName);
// when creating reader (order of first appearance in query).
for (SchemaPath column : getColumns()) {
if (column.equals(ROW_KEY_PATH)) {
MaterializedField field = MaterializedField.create(column.getAsNamePart().getName(), ROW_KEY_TYPE);
rowKeyVector = outputMutator.addField(field, VarBinaryVector.class);
} else {
getOrCreateFamilyVector(column.getRootSegment().getPath(), false);
}
}
// Add map and child vectors for any HBase columns that are requested (in
// order to avoid later creation of dummy NullableIntVectors for them).
final Set<Map.Entry<byte[], NavigableSet<byte[]>>> familiesEntries = hbaseScanColumnsOnly.getFamilyMap().entrySet();
for (Map.Entry<byte[], NavigableSet<byte[]>> familyEntry : familiesEntries) {
final String familyName = new String(familyEntry.getKey(), StandardCharsets.UTF_8);
final MapVector familyVector = getOrCreateFamilyVector(familyName, false);
final Set<byte[]> children = familyEntry.getValue();
if (null != children) {
for (byte[] childNameBytes : children) {
final String childName = new String(childNameBytes, StandardCharsets.UTF_8);
getOrCreateColumnVector(familyVector, childName);
}
}
}
// Add map vectors for any HBase column families that are requested.
for (String familyName : completeFamilies) {
getOrCreateFamilyVector(familyName, false);
}
resultScanner = hTable.getScanner(hbaseScan);
} catch (SchemaChangeException | IOException e) {
throw new ExecutionSetupException(e);
}
}
use of org.apache.drill.exec.vector.complex.MapVector in project drill by axbaretto.
the class VectorContainerBuilder method buildMap.
@SuppressWarnings("resource")
private void buildMap(TupleProxy parentTuple, BaseMapColumnState colModel) {
// Creating the map vector will create its contained vectors if we
// give it a materialized field with children. So, instead pass a clone
// without children so we can add them.
ColumnMetadata mapColSchema = colModel.schema().cloneEmpty();
// Don't get the map vector from the vector cache. Map vectors may
// have content that varies from batch to batch. Only the leaf
// vectors can be cached.
AbstractMapVector mapVector;
if (mapColSchema.isArray()) {
// A repeated map shares an offset vector with the internal
// repeated map.
UInt4Vector offsets = (UInt4Vector) colModel.vector();
mapVector = new RepeatedMapVector(mapColSchema.schema(), offsets, null);
} else {
mapVector = new MapVector(mapColSchema.schema(), allocator(), null);
}
// Add the map vector and schema to the parent tuple
parentTuple.add(mapVector);
int index = parentTuple.schema.addColumn(mapColSchema);
assert parentTuple.size() == parentTuple.size();
// Update the tuple, which will add the new columns in the map
updateTuple(colModel.mapState(), parentTuple.mapProxy(index));
}
use of org.apache.drill.exec.vector.complex.MapVector in project drill by axbaretto.
the class TestValueVector method testVectors.
/**
* Convenience method that allows running tests on various {@link ValueVector vector} instances.
*
* @param test test function to execute
*/
@SuppressWarnings("resource")
private void testVectors(VectorVerifier test) throws Exception {
final MaterializedField[] fields = { MaterializedField.create(EMPTY_SCHEMA_PATH, UInt4Holder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, BitHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, VarCharHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, NullableVarCharHolder.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedListVector.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, MapVector.TYPE), MaterializedField.create(EMPTY_SCHEMA_PATH, RepeatedMapVector.TYPE) };
final ValueVector[] vectors = { new UInt4Vector(fields[0], allocator), new BitVector(fields[1], allocator), new VarCharVector(fields[2], allocator), new NullableVarCharVector(fields[3], allocator), new RepeatedListVector(fields[4], allocator, null), new MapVector(fields[5], allocator, null), new RepeatedMapVector(fields[6], allocator, null) };
try {
for (final ValueVector vector : vectors) {
test.verify(vector);
}
} finally {
AutoCloseables.close(vectors);
}
}
use of org.apache.drill.exec.vector.complex.MapVector in project drill by axbaretto.
the class TestRecordBatchSizer method testSizerMap.
@Test
public void testSizerMap() {
BatchSchema schema = new SchemaBuilder().addMap("map").add("key", MinorType.INT).add("value", MinorType.VARCHAR).resumeSchema().build();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
for (int i = 0; i < 10; i++) {
builder.addRow((Object) (new Object[] { 10, "a" }));
}
RowSet rows = builder.build();
// Run the record batch sizer on the resulting batch.
RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
assertEquals(1, sizer.columns().size());
/**
* stdDataSize:50+4, stdNetSize:50+4+4, dataSizePerEntry:4+1,
* netSizePerEntry: 4+1+4,
* totalDataSize:5*10, totalNetSize:4*10+4*10+1*10,
* valueCount:10,
* elementCount:10, estElementCountPerArray:1, isVariableWidth:true
*/
verifyColumnValues(sizer.columns().get("map"), 54, 58, 5, 9, 50, 90, 10, 10, 1, false);
SingleRowSet empty = fixture.rowSet(schema);
VectorAccessible accessible = empty.vectorAccessible();
for (VectorWrapper<?> vw : accessible) {
ValueVector v = vw.getValueVector();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
MapVector mapVector = (MapVector) v;
ValueVector keyVector = mapVector.getChild("key");
ValueVector valueVector1 = mapVector.getChild("value");
assertEquals((Integer.highestOneBit(testRowCount) << 1), keyVector.getValueCapacity());
UInt4Vector offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
assertEquals(Integer.highestOneBit(testRowCount << 1) - 1, valueVector1.getValueCapacity());
// Allocates the same as value passed since it is already power of two.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
mapVector = (MapVector) v;
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals((Integer.highestOneBit(testRowCountPowerTwo - 1) << 1), keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
assertEquals(Integer.highestOneBit(testRowCountPowerTwo) - 1, valueVector1.getValueCapacity());
// Allocate for max rows.
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
mapVector = (MapVector) v;
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(ValueVector.MAX_ROW_COUNT, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
assertEquals(ValueVector.MAX_ROW_COUNT - 1, valueVector1.getValueCapacity());
// Allocate for 0 rows. should atleast do allocation for 1 row.
colSize.allocateVector(v, 0);
mapVector = (MapVector) v;
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity());
v.clear();
}
empty.clear();
rows.clear();
}
use of org.apache.drill.exec.vector.complex.MapVector in project drill by axbaretto.
the class RowSetTest method testMapStructure.
/**
* Test a simple map structure at the top level of a row.
*
* @throws VectorOverflowException should never occur
*/
@Test
public void testMapStructure() {
TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addMap("m").addArray("b", MinorType.INT).resumeSchema().buildSchema();
ExtendableRowSet rowSet = fixture.rowSet(schema);
RowSetWriter writer = rowSet.writer();
// Map and Int
// Test Invariants
assertEquals(ObjectType.SCALAR, writer.column("a").type());
assertEquals(ObjectType.SCALAR, writer.column(0).type());
assertEquals(ObjectType.TUPLE, writer.column("m").type());
assertEquals(ObjectType.TUPLE, writer.column(1).type());
assertSame(writer.column(1).tuple(), writer.tuple(1));
TupleWriter mapWriter = writer.column(1).tuple();
assertEquals(ObjectType.SCALAR, mapWriter.column("b").array().entry().type());
assertEquals(ObjectType.SCALAR, mapWriter.column("b").array().entryType());
ScalarWriter aWriter = writer.column("a").scalar();
ScalarWriter bWriter = writer.column("m").tuple().column("b").array().entry().scalar();
assertSame(bWriter, writer.tuple(1).array(0).scalar());
assertEquals(ValueType.INTEGER, bWriter.valueType());
try {
writer.column(1).scalar();
fail();
} catch (UnsupportedOperationException e) {
// Expected
}
try {
writer.column(1).array();
fail();
} catch (UnsupportedOperationException e) {
// Expected
}
// Write data
aWriter.setInt(10);
bWriter.setInt(11);
bWriter.setInt(12);
writer.save();
aWriter.setInt(20);
bWriter.setInt(21);
bWriter.setInt(22);
writer.save();
aWriter.setInt(30);
bWriter.setInt(31);
bWriter.setInt(32);
writer.save();
// Finish the row set and get a reader.
SingleRowSet actual = writer.done();
RowSetReader reader = actual.reader();
assertEquals(ObjectType.SCALAR, reader.column("a").type());
assertEquals(ObjectType.SCALAR, reader.column(0).type());
assertEquals(ObjectType.TUPLE, reader.column("m").type());
assertEquals(ObjectType.TUPLE, reader.column(1).type());
assertSame(reader.column(1).tuple(), reader.tuple(1));
ScalarReader aReader = reader.column(0).scalar();
TupleReader mReader = reader.column(1).tuple();
assertEquals(ObjectType.SCALAR, mReader.column("b").array().entryType());
ScalarElementReader bReader = mReader.column(0).elements();
assertEquals(ValueType.INTEGER, bReader.valueType());
assertTrue(reader.next());
assertEquals(10, aReader.getInt());
assertEquals(11, bReader.getInt(0));
assertEquals(12, bReader.getInt(1));
assertTrue(reader.next());
assertEquals(20, aReader.getInt());
assertEquals(21, bReader.getInt(0));
assertEquals(22, bReader.getInt(1));
assertTrue(reader.next());
assertEquals(30, aReader.getInt());
assertEquals(31, bReader.getInt(0));
assertEquals(32, bReader.getInt(1));
assertFalse(reader.next());
// Verify that the map accessor's value count was set.
@SuppressWarnings("resource") MapVector mapVector = (MapVector) actual.container().getValueVector(1).getValueVector();
assertEquals(actual.rowCount(), mapVector.getAccessor().getValueCount());
SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(10, objArray(intArray(11, 12))).addRow(20, objArray(intArray(21, 22))).addRow(30, objArray(intArray(31, 32))).build();
new RowSetComparison(expected).verifyAndClearAll(actual);
}
Aggregations