use of org.apache.arrow.vector.complex.MapVector in project aws-athena-query-federation by awslabs.
the class BlockTest method generateTestBlock.
public static Block generateTestBlock(BlockAllocatorImpl expectedAllocator, Schema origSchema, int expectedRows) throws UnsupportedOperationException {
/**
* Generate and write the block
*/
Block expectedBlock = expectedAllocator.createBlock(origSchema);
int fieldCount = 1;
for (Field next : origSchema.getFields()) {
ValueVector vector = expectedBlock.getFieldVector(next.getName());
switch(vector.getMinorType()) {
case DATEDAY:
DateDayVector dateDayVector = (DateDayVector) vector;
for (int i = 0; i < expectedRows; i++) {
dateDayVector.setSafe(i, i * fieldCount);
}
break;
case UINT4:
UInt4Vector uInt4Vector = (UInt4Vector) vector;
for (int i = 0; i < expectedRows; i++) {
uInt4Vector.setSafe(i, i * fieldCount * 2);
}
break;
case INT:
IntVector intVector = (IntVector) vector;
for (int i = 0; i < expectedRows; i++) {
intVector.setSafe(i, i * fieldCount * 3);
}
break;
case FLOAT8:
Float8Vector fVector = (Float8Vector) vector;
for (int i = 0; i < expectedRows; i++) {
fVector.setSafe(i, i * fieldCount * 1.1);
}
break;
case VARCHAR:
VarCharVector vVector = (VarCharVector) vector;
for (int i = 0; i < expectedRows; i++) {
vVector.setSafe(i, String.valueOf(i * fieldCount).getBytes(Charsets.UTF_8));
}
break;
case DATEMILLI:
DateMilliVector dateMilliVector = (DateMilliVector) vector;
for (int i = 0; i < expectedRows; i++) {
dateMilliVector.setSafe(i, i * fieldCount * 4);
}
break;
case TINYINT:
TinyIntVector tinyIntVector = (TinyIntVector) vector;
for (int i = 0; i < expectedRows; i++) {
tinyIntVector.setSafe(i, i * fieldCount * 5);
}
break;
case UINT1:
UInt1Vector uInt1Vector = (UInt1Vector) vector;
for (int i = 0; i < expectedRows; i++) {
uInt1Vector.setSafe(i, i * fieldCount * 6);
}
break;
case SMALLINT:
SmallIntVector smallIntVector = (SmallIntVector) vector;
for (int i = 0; i < expectedRows; i++) {
smallIntVector.setSafe(i, i * fieldCount * 7);
}
break;
case UINT2:
UInt2Vector uInt2Vector = (UInt2Vector) vector;
for (int i = 0; i < expectedRows; i++) {
uInt2Vector.setSafe(i, i * fieldCount * 8);
}
break;
case UINT8:
UInt8Vector uInt8Vector = (UInt8Vector) vector;
for (int i = 0; i < expectedRows; i++) {
uInt8Vector.setSafe(i, i * fieldCount * 9);
}
break;
case BIGINT:
BigIntVector bigIntVector = (BigIntVector) vector;
for (int i = 0; i < expectedRows; i++) {
bigIntVector.setSafe(i, i * fieldCount * 10);
}
break;
case DECIMAL:
DecimalVector decimalVector = (DecimalVector) vector;
for (int i = 0; i < expectedRows; i++) {
BigDecimal bigDecimal = new BigDecimal((double) (i * fieldCount) * 1.01);
bigDecimal = bigDecimal.setScale(2, RoundingMode.HALF_UP);
decimalVector.setSafe(i, bigDecimal);
}
break;
case FLOAT4:
Float4Vector float4Vector = (Float4Vector) vector;
for (int i = 0; i < expectedRows; i++) {
float4Vector.setSafe(i, i * fieldCount * 9);
}
break;
case VARBINARY:
VarBinaryVector varBinaryVector = (VarBinaryVector) vector;
for (int i = 0; i < expectedRows; i++) {
byte[] data = String.valueOf(i * fieldCount).getBytes();
varBinaryVector.setSafe(i, data);
}
break;
case BIT:
BitVector bitVector = (BitVector) vector;
for (int i = 0; i < expectedRows; i++) {
bitVector.setSafe(i, i % 2);
}
break;
case STRUCT:
StructVector sVector = (StructVector) vector;
for (int i = 0; i < expectedRows; i++) {
final int seed = i;
BlockUtils.setComplexValue(sVector, i, (Field field, Object value) -> {
if (field.getName().equals("nestedBigInt")) {
return (long) seed;
}
if (field.getName().equals("nestedString")) {
return String.valueOf(1000 + seed);
}
if (field.getName().equals("tinyintcol")) {
return (byte) seed;
}
if (field.getName().equals("smallintcol")) {
return (short) seed;
}
if (field.getName().equals("nestedList")) {
List<String> values = new ArrayList<>();
values.add("val1");
values.add("val2");
return values;
}
if (field.getName().equals("nestedListDec")) {
List<Double> values = new ArrayList<>();
values.add(2.0D);
values.add(2.2D);
return values;
}
if (field.getName().equals("float4Col")) {
return seed * 1.0F;
}
if (field.getName().equals("float8Col")) {
return seed * 2.0D;
}
if (field.getName().equals("shortDecCol")) {
return seed * 3.0D;
}
if (field.getName().equals("longDecCol")) {
return seed * 4.0D;
}
if (field.getName().equals("binaryCol")) {
return String.valueOf(seed).getBytes(Charsets.UTF_8);
}
if (field.getName().equals("bitCol")) {
return seed % 2 == 1;
}
if (field.getName().equals("nestedStruct")) {
// it just needs to be non-null
return new Object();
}
throw new RuntimeException("Unexpected field " + field.getName());
}, new Object());
}
break;
case LIST:
Field child = vector.getField().getChildren().get(0);
if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.BIGINT) {
for (int i = 0; i < expectedRows; i++) {
List<Long> values = new ArrayList<>();
values.add(Long.valueOf(i));
values.add(i + 1L);
values.add(i + 2L);
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, values);
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.VARCHAR) {
for (int i = 0; i < expectedRows; i++) {
List<String> values = new ArrayList<>();
values.add(String.valueOf(1000 + i));
values.add(String.valueOf(1000 + i + 1));
values.add(String.valueOf(1000 + i + 2));
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, values);
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.SMALLINT) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((short) (i + 1)));
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.INT) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList(i));
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.TINYINT) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((byte) i));
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.FLOAT4) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((i * 1.0F)));
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.FLOAT8) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((i * 1.0D)));
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.DECIMAL) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((i * 1.0D)));
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.VARBINARY) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList(String.valueOf(i).getBytes(Charsets.UTF_8)));
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.BIT) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList(i % 2 == 1));
}
}
break;
case MAP:
MapVector mapVector = (MapVector) vector;
for (int i = 0; i < expectedRows; i++) {
final int seed = i;
BlockUtils.setComplexValue(mapVector, i, (Field field, Object value) -> {
if (field.getName().equals("key")) {
return String.valueOf(1000 + seed);
}
if (field.getName().equals("value")) {
return seed;
}
if (field.getName().equals("tinyintcol")) {
return (byte) seed;
}
if (field.getName().equals("smallintcol")) {
return (short) seed;
}
if (field.getName().equals("nestedList")) {
List<String> values = new ArrayList<>();
values.add("val1");
values.add("val2");
return values;
}
if (field.getName().equals("nestedListDec")) {
List<Double> values = new ArrayList<>();
values.add(2.0D);
values.add(2.2D);
return values;
}
if (field.getName().equals("float4Col")) {
return seed * 1.0F;
}
if (field.getName().equals("float8Col")) {
return seed * 2.0D;
}
if (field.getName().equals("shortDecCol")) {
return seed * 3.0D;
}
if (field.getName().equals("longDecCol")) {
return seed * 4.0D;
}
if (field.getName().equals("binaryCol")) {
return String.valueOf(seed).getBytes(Charsets.UTF_8);
}
if (field.getName().equals("bitCol")) {
return seed % 2 == 1;
}
if (field.getName().equals("nestedStruct")) {
// it just needs to be non-null
return new Object();
}
throw new RuntimeException("Unexpected field " + field.getName());
}, new Object());
}
List<Field> children = vector.getField().getChildren();
Field keyValueStructField;
if (children.size() != 1) {
throw new IllegalStateException("Invalid Arrow Map schema: " + vector.getField());
} else {
keyValueStructField = children.get(0);
if (!ENTRIES.equals(keyValueStructField.getName()) || !(keyValueStructField.getType() instanceof ArrowType.Struct)) {
throw new IllegalStateException("Invalid Arrow Map schema: " + vector.getField());
}
}
List<Field> keyValueChildren = keyValueStructField.getChildren();
Field keyField;
Field valueField;
if (keyValueChildren.size() != 2) {
throw new IllegalStateException("Invalid Arrow Map schema: " + vector.getField());
} else {
keyField = keyValueChildren.get(0);
valueField = keyValueChildren.get(1);
if (!KEY.equals(keyField.getName()) || !VALUE.equals(valueField.getName())) {
throw new IllegalStateException("Invalid Arrow Map schema: " + vector.getField());
}
}
break;
default:
throw new UnsupportedOperationException(vector.getMinorType() + " is not supported");
}
fieldCount++;
}
expectedBlock.setRowCount(expectedRows);
return expectedBlock;
}
use of org.apache.arrow.vector.complex.MapVector in project aws-athena-query-federation by awslabs.
the class ExampleRecordHandler method makeFactory.
/**
* Since GeneratedRowWriter doesn't yet support complex types (STRUCT, LIST) we use this to
* create our own FieldWriters via customer FieldWriterFactory. In this case we are producing
* FieldWriters that only work for our exact example schema. This will be enhanced with a more
* generic solution in a future release.
*/
private FieldWriterFactory makeFactory(Field field, RowContext rowContext) {
Types.MinorType fieldType = Types.getMinorTypeForArrowType(field.getType());
switch(fieldType) {
case LIST:
Field child = field.getChildren().get(0);
Types.MinorType childType = Types.getMinorTypeForArrowType(child.getType());
switch(childType) {
case LIST:
return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
UnionListWriter writer = ((ListVector) vector).getWriter();
writer.setPosition(rowNum);
writer.startList();
BaseWriter.ListWriter innerWriter = writer.list();
innerWriter.startList();
for (int i = 0; i < 3; i++) {
byte[] bytes = String.valueOf(1000 + i).getBytes(Charsets.UTF_8);
try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
buf.writeBytes(bytes);
innerWriter.varChar().writeVarChar(0, (int) (buf.readableBytes()), buf);
}
}
innerWriter.endList();
writer.endList();
((ListVector) vector).setNotNull(rowNum);
return true;
};
case STRUCT:
return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
UnionListWriter writer = ((ListVector) vector).getWriter();
writer.setPosition(rowNum);
writer.startList();
BaseWriter.StructWriter structWriter = writer.struct();
structWriter.start();
byte[] bytes = "chars".getBytes(Charsets.UTF_8);
try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
buf.writeBytes(bytes);
structWriter.varChar("varchar").writeVarChar(0, (int) (buf.readableBytes()), buf);
}
structWriter.bigInt("bigint").writeBigInt(100L);
structWriter.end();
writer.endList();
((ListVector) vector).setNotNull(rowNum);
return true;
};
default:
throw new IllegalArgumentException("Unsupported type " + childType);
}
case MAP:
return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
UnionMapWriter writer = ((MapVector) vector).getWriter();
writer.setPosition(rowNum);
writer.startMap();
writer.startEntry();
byte[] bytes = "chars".getBytes(Charsets.UTF_8);
try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
buf.writeBytes(bytes);
writer.key().varChar("key").writeVarChar(0, (int) (buf.readableBytes()), buf);
}
writer.value().integer("value").writeInt(1001);
writer.endEntry();
writer.endMap();
((MapVector) vector).setNotNull(rowNum);
return true;
};
default:
throw new IllegalArgumentException("Unsupported type " + fieldType);
}
}
use of org.apache.arrow.vector.complex.MapVector in project aws-athena-query-federation by awslabs.
the class BlockUtils method setComplexValue.
/**
* Used to set complex values (Struct, List, etc...) on the provided FieldVector.
*
* @param vector The FieldVector into which we should write the provided value.
* @param pos The row number that the value should be written to.
* @param resolver The FieldResolver that can be used to map your value to the complex type (mostly for Structs, Maps).
* @param value The value to write.
* @note This method incurs more Object overhead (heap churn) than using Arrow's native interface. Users of this Utility
* should weigh their performance needs vs. the readability / ease of use.
*/
public static void setComplexValue(FieldVector vector, int pos, FieldResolver resolver, Object value) {
if (vector instanceof MapVector) {
UnionMapWriter writer = ((MapVector) vector).getWriter();
writer.setPosition(pos);
writeMap(vector.getAllocator(), writer, vector.getField(), pos, value, resolver);
writer.endMap();
} else if (vector instanceof ListVector) {
if (value != null) {
UnionListWriter writer = ((ListVector) vector).getWriter();
writer.setPosition(pos);
writeList(vector.getAllocator(), writer, vector.getField(), pos, ((List) value), resolver);
((ListVector) vector).setNotNull(pos);
}
} else if (vector instanceof StructVector) {
StructWriter writer = ((StructVector) vector).getWriter();
writer.setPosition(pos);
writeStruct(vector.getAllocator(), writer, vector.getField(), pos, value, resolver);
} else {
throw new RuntimeException("Unsupported 'Complex' vector " + vector.getClass().getSimpleName() + " for field " + vector.getField().getName());
}
}
use of org.apache.arrow.vector.complex.MapVector in project aws-athena-query-federation by awslabs.
the class BlockUtils method unsetRow.
/**
* In some filtering situations it can be useful to 'unset' a row as an indication to a later processing stage
* that the row is irrelevant. The mechanism by which we 'unset' a row is actually field type specific and as such
* this method is not supported for all field types.
*
* @param row The row number to unset in the provided Block.
* @param block The Block where we'd like to unset the specified row.
*/
public static void unsetRow(int row, Block block) {
for (FieldVector vector : block.getFieldVectors()) {
switch(vector.getMinorType()) {
case TIMESTAMPMILLITZ:
((TimeStampMilliTZVector) vector).setNull(row);
break;
case DATEDAY:
((DateDayVector) vector).setNull(row);
break;
case DATEMILLI:
((DateMilliVector) vector).setNull(row);
break;
case TINYINT:
((TinyIntVector) vector).setNull(row);
break;
case UINT1:
((UInt1Vector) vector).setNull(row);
break;
case SMALLINT:
((SmallIntVector) vector).setNull(row);
break;
case UINT2:
((UInt2Vector) vector).setNull(row);
break;
case UINT4:
((UInt4Vector) vector).setNull(row);
break;
case INT:
((IntVector) vector).setNull(row);
break;
case UINT8:
((UInt8Vector) vector).setNull(row);
break;
case BIGINT:
((BigIntVector) vector).setNull(row);
break;
case FLOAT4:
((Float4Vector) vector).setNull(row);
break;
case FLOAT8:
((Float8Vector) vector).setNull(row);
break;
case DECIMAL:
((DecimalVector) vector).setNull(row);
break;
case VARBINARY:
((VarBinaryVector) vector).setNull(row);
break;
case VARCHAR:
((VarCharVector) vector).setNull(row);
break;
case BIT:
((BitVector) vector).setNull(row);
break;
case STRUCT:
((StructVector) vector).setNull(row);
break;
case LIST:
UnionListWriter writer = ((ListVector) vector).getWriter();
writer.setPosition(row);
writer.startList();
writer.endList();
writer.setValueCount(0);
break;
case MAP:
((MapVector) vector).setNull(row);
break;
default:
throw new IllegalArgumentException("Unknown type " + vector.getMinorType());
}
}
}
Aggregations