use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.
the class TestRowSet method testMapStructure.
/**
* Test a simple map structure at the top level of a row.
*
* @throws VectorOverflowException should never occur
*/
@Test
public void testMapStructure() {
final TupleMetadata schema = new SchemaBuilder().add("a", MinorType.INT).addMap("m").addArray("b", MinorType.INT).resumeSchema().buildSchema();
final ExtendableRowSet rowSet = fixture.rowSet(schema);
final RowSetWriter writer = rowSet.writer();
// Map and Int
// Test Invariants
assertEquals(ObjectType.SCALAR, writer.column("a").type());
assertEquals(ObjectType.SCALAR, writer.column(0).type());
assertEquals(ObjectType.TUPLE, writer.column("m").type());
assertEquals(ObjectType.TUPLE, writer.column(1).type());
assertSame(writer.column(1).tuple(), writer.tuple(1));
final TupleWriter mapWriter = writer.column(1).tuple();
assertEquals(ObjectType.SCALAR, mapWriter.column("b").array().entry().type());
assertEquals(ObjectType.SCALAR, mapWriter.column("b").array().entryType());
final ScalarWriter aWriter = writer.column("a").scalar();
final ScalarWriter bWriter = writer.column("m").tuple().column("b").array().entry().scalar();
assertSame(bWriter, writer.tuple(1).array(0).scalar());
assertEquals(ValueType.INTEGER, bWriter.valueType());
try {
writer.column(1).scalar();
fail();
} catch (final UnsupportedOperationException e) {
// Expected
}
try {
writer.column(1).array();
fail();
} catch (final UnsupportedOperationException e) {
// Expected
}
// Write data
aWriter.setInt(10);
bWriter.setInt(11);
bWriter.setInt(12);
writer.save();
aWriter.setInt(20);
bWriter.setInt(21);
bWriter.setInt(22);
writer.save();
aWriter.setInt(30);
bWriter.setInt(31);
bWriter.setInt(32);
writer.save();
// Finish the row set and get a reader.
final SingleRowSet actual = writer.done();
final RowSetReader reader = actual.reader();
assertEquals(ObjectType.SCALAR, reader.column("a").type());
assertEquals(ObjectType.SCALAR, reader.column(0).type());
assertEquals(ObjectType.TUPLE, reader.column("m").type());
assertEquals(ObjectType.TUPLE, reader.column(1).type());
assertSame(reader.column(1).tuple(), reader.tuple(1));
final ScalarReader aReader = reader.column(0).scalar();
final TupleReader mReader = reader.column(1).tuple();
final ArrayReader bArray = mReader.column("b").array();
assertEquals(ObjectType.SCALAR, bArray.entryType());
final ScalarReader bReader = bArray.scalar();
assertEquals(ValueType.INTEGER, bReader.valueType());
// Row 1: (10, {[11, 12]})
assertTrue(reader.next());
assertEquals(10, aReader.getInt());
assertFalse(mReader.isNull());
assertTrue(bArray.next());
assertFalse(bReader.isNull());
assertEquals(11, bReader.getInt());
assertTrue(bArray.next());
assertFalse(bReader.isNull());
assertEquals(12, bReader.getInt());
assertFalse(bArray.next());
// Row 2: (20, {[21, 22]})
assertTrue(reader.next());
assertEquals(20, aReader.getInt());
assertFalse(mReader.isNull());
assertTrue(bArray.next());
assertEquals(21, bReader.getInt());
assertTrue(bArray.next());
assertEquals(22, bReader.getInt());
// Row 3: (30, {[31, 32]})
assertTrue(reader.next());
assertEquals(30, aReader.getInt());
assertFalse(mReader.isNull());
assertTrue(bArray.next());
assertEquals(31, bReader.getInt());
assertTrue(bArray.next());
assertEquals(32, bReader.getInt());
assertFalse(reader.next());
// Verify that the map accessor's value count was set.
final MapVector mapVector = (MapVector) actual.container().getValueVector(1).getValueVector();
assertEquals(actual.rowCount(), mapVector.getAccessor().getValueCount());
final SingleRowSet expected = fixture.rowSetBuilder(schema).addRow(10, objArray(intArray(11, 12))).addRow(20, objArray(intArray(21, 22))).addRow(30, objArray(intArray(31, 32))).build();
RowSetUtilities.verify(expected, actual);
}
use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.
the class TestRecordBatchSizer method testEmptyBatchMap.
@Test
public void testEmptyBatchMap() {
TupleMetadata schema = new SchemaBuilder().addMap("map").add("key", MinorType.INT).add("value", MinorType.VARCHAR).resumeSchema().buildSchema();
RowSetBuilder builder = fixture.rowSetBuilder(schema);
RowSet rows = builder.build();
// Run the record batch sizer on the resulting batch.
RecordBatchSizer sizer = new RecordBatchSizer(rows.container());
assertEquals(1, sizer.columns().size());
/**
* stdDataSize:50+4, stdNetSize:50+4+4, dataSizePerEntry:0,
* netSizePerEntry:0,
* totalDataSize:0, totalNetSize:0,
* valueCount:0,
* elementCount:0, cardinality:0, isVariableWidth:true
*/
verifyColumnValues(sizer.columns().get("map"), 54, 58, 0, 0, 0, 0, 0, 0, 0, false);
// Verify memory allocation is done correctly based on std size for empty batch.
SingleRowSet empty = fixture.rowSet(schema);
VectorAccessible accessible = empty.vectorAccessible();
for (VectorWrapper<?> vw : accessible) {
ValueVector v = vw.getValueVector();
RecordBatchSizer.ColumnSize colSize = sizer.getColumn(v.getField().getName());
// Allocates to nearest power of two
colSize.allocateVector(v, testRowCount);
MapVector mapVector = (MapVector) v;
ValueVector keyVector = mapVector.getChild("key");
ValueVector valueVector1 = mapVector.getChild("value");
assertEquals((Integer.highestOneBit(testRowCount) << 1), keyVector.getValueCapacity());
UInt4Vector offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals((Integer.highestOneBit(testRowCount) << 1), offsetVector.getValueCapacity());
assertEquals(Integer.highestOneBit(testRowCount << 1) - 1, valueVector1.getValueCapacity());
// Allocates the same as value passed since it is already power of two.
colSize.allocateVector(v, testRowCountPowerTwo - 1);
mapVector = (MapVector) v;
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals((Integer.highestOneBit(testRowCountPowerTwo - 1) << 1), keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(testRowCountPowerTwo, offsetVector.getValueCapacity());
assertEquals(Integer.highestOneBit(testRowCountPowerTwo) - 1, valueVector1.getValueCapacity());
// Allocate for max rows.
colSize.allocateVector(v, ValueVector.MAX_ROW_COUNT - 1);
mapVector = (MapVector) v;
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(ValueVector.MAX_ROW_COUNT, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(ValueVector.MAX_ROW_COUNT, offsetVector.getValueCapacity());
assertEquals(ValueVector.MAX_ROW_COUNT - 1, valueVector1.getValueCapacity());
// Allocate for 0 rows. should atleast do allocation for 1 row.
colSize.allocateVector(v, 0);
mapVector = (MapVector) v;
keyVector = mapVector.getChild("key");
valueVector1 = mapVector.getChild("value");
assertEquals(ValueVector.MIN_ROW_COUNT, keyVector.getValueCapacity());
offsetVector = ((VariableWidthVector) valueVector1).getOffsetVector();
assertEquals(ValueVector.MIN_ROW_COUNT + 1, offsetVector.getValueCapacity());
assertEquals(ValueVector.MIN_ROW_COUNT, valueVector1.getValueCapacity());
v.clear();
}
empty.clear();
rows.clear();
}
use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.
the class StatisticsAggBatch method createAggregatorInternal.
@Override
protected StreamingAggregator createAggregatorInternal() {
List<LogicalExpression> keyExprs = Lists.newArrayList();
List<LogicalExpression> valueExprs = Lists.newArrayList();
List<TypedFieldId> keyOutputIds = Lists.newArrayList();
String[] colMeta = new String[] { Statistic.COLNAME, Statistic.COLTYPE };
container.clear();
// the implicit columns
for (String col : colMeta) {
MapVector parent = new MapVector(col, oContext.getAllocator(), null);
container.add(parent);
for (MaterializedField mf : incoming.getSchema()) {
LogicalExpression expr;
if (col.equals(colMeta[0])) {
expr = ValueExpressions.getChar(SchemaPath.getSimplePath(mf.getName()).toString(), 0);
} else {
try {
expr = ValueExpressions.getChar(DrillStatsTable.getMapper().writeValueAsString(mf.getType()), 0);
} catch (JsonProcessingException e) {
throw UserException.dataWriteError(e).addContext("Failed to write statistics to JSON").build();
}
}
// Ignore implicit columns
if (!isImplicitFileOrPartitionColumn(mf, incoming.getContext().getOptions())) {
createNestedKeyColumn(parent, SchemaPath.getSimplePath(mf.getName()).toString(), expr, keyExprs, keyOutputIds);
}
}
}
// employee NDV = 500, salary NDV = 10
for (String func : functions) {
MapVector parent = new MapVector(func, oContext.getAllocator(), null);
container.add(parent);
for (MaterializedField mf : incoming.getSchema()) {
// such as MAP, LIST are not supported!
if (isColMinorTypeValid(mf) && !isImplicitFileOrPartitionColumn(mf, incoming.getContext().getOptions())) {
List<LogicalExpression> args = Lists.newArrayList();
args.add(SchemaPath.getSimplePath(mf.getName()));
LogicalExpression call = FunctionCallFactory.createExpression(func, args);
addMapVector(SchemaPath.getSimplePath(mf.getName()).toString(), parent, call, valueExprs);
}
}
}
// Now generate the code for the statistics aggregate
return codegenAggregator(keyExprs, valueExprs, keyOutputIds);
}
use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.
the class ColumnMergedStatistic method setOutput.
@Override
public void setOutput(MapVector output) {
// Check the input is a Map Vector
assert (output.getField().getType().getMinorType() == TypeProtos.MinorType.MAP);
MapVector outputMap = (MapVector) output;
for (ValueVector outMapCol : outputMap) {
String colName = outMapCol.getField().getName();
VarCharVector vv = (VarCharVector) outMapCol;
vv.allocateNewSafe();
// Set column name in ValueVector
vv.getMutator().setSafe(0, colName.getBytes(), 0, colName.length());
}
// Now moving to COMPLETE state
state = State.COMPLETE;
}
use of org.apache.drill.exec.vector.complex.MapVector in project drill by apache.
the class HBaseRecordReader method setup.
@Override
public void setup(OperatorContext context, OutputMutator output) throws ExecutionSetupException {
this.operatorContext = context;
this.outputMutator = output;
familyVectorMap = new HashMap<>();
try {
hTable = connection.getTable(hbaseTableName);
// when creating reader (order of first appearance in query).
for (SchemaPath column : getColumns()) {
if (column.equals(ROW_KEY_PATH)) {
MaterializedField field = MaterializedField.create(column.getAsNamePart().getName(), ROW_KEY_TYPE);
rowKeyVector = outputMutator.addField(field, VarBinaryVector.class);
} else {
getOrCreateFamilyVector(column.getRootSegment().getPath(), false);
}
}
// Add map and child vectors for any HBase columns that are requested (in
// order to avoid later creation of dummy NullableIntVectors for them).
final Set<Map.Entry<byte[], NavigableSet<byte[]>>> familiesEntries = hbaseScanColumnsOnly.getFamilyMap().entrySet();
for (Map.Entry<byte[], NavigableSet<byte[]>> familyEntry : familiesEntries) {
final String familyName = new String(familyEntry.getKey(), StandardCharsets.UTF_8);
final MapVector familyVector = getOrCreateFamilyVector(familyName, false);
final Set<byte[]> children = familyEntry.getValue();
if (null != children) {
for (byte[] childNameBytes : children) {
final String childName = new String(childNameBytes, StandardCharsets.UTF_8);
getOrCreateColumnVector(familyVector, childName);
}
}
}
// Add map vectors for any HBase column families that are requested.
for (String familyName : completeFamilies) {
getOrCreateFamilyVector(familyName, false);
}
resultScanner = hTable.getScanner(hbaseScan);
} catch (SchemaChangeException | IOException e) {
throw new ExecutionSetupException(e);
}
}
Aggregations