use of org.apache.arrow.vector.ValueVector in project iceberg by apache.
the class TestHelpers method assertEqualsBatch.
public static void assertEqualsBatch(Types.StructType struct, Iterator<Record> expected, ColumnarBatch batch, boolean checkArrowValidityVector) {
for (int rowId = 0; rowId < batch.numRows(); rowId++) {
List<Types.NestedField> fields = struct.fields();
InternalRow row = batch.getRow(rowId);
Record rec = expected.next();
for (int i = 0; i < fields.size(); i += 1) {
Type fieldType = fields.get(i).type();
Object expectedValue = rec.get(i);
Object actualValue = row.isNullAt(i) ? null : row.get(i, convert(fieldType));
assertEqualsUnsafe(fieldType, expectedValue, actualValue);
if (checkArrowValidityVector) {
ColumnVector columnVector = batch.column(i);
ValueVector arrowVector = ((IcebergArrowColumnVector) columnVector).vectorAccessor().getVector();
Assert.assertFalse("Nullability doesn't match of " + columnVector.dataType(), expectedValue == null ^ arrowVector.isNull(rowId));
}
}
}
}
use of org.apache.arrow.vector.ValueVector in project conquery by bakdata.
the class ArrowRenderer method generateVectorFiller.
private static RowConsumer generateVectorFiller(int pos, ValueVector vector, final PrintSettings settings, ResultType resultType) {
// TODO When Pattern-matching lands, clean this up. (Think Java 12?)
if (vector instanceof IntVector) {
return intVectorFiller((IntVector) vector, (line) -> (Integer) line[pos]);
}
if (vector instanceof VarCharVector) {
return varCharVectorFiller((VarCharVector) vector, (line) -> {
if (line[pos] == null) {
// If there is no value, we don't want to have it displayed as an empty string (see next if)
return null;
}
if (resultType != null) {
return resultType.printNullable(settings, line[pos]);
}
return line[pos].toString();
});
}
if (vector instanceof BitVector) {
return bitVectorFiller((BitVector) vector, (line) -> (Boolean) line[pos]);
}
if (vector instanceof Float4Vector) {
return float4VectorFiller((Float4Vector) vector, (line) -> (Number) line[pos]);
}
if (vector instanceof Float8Vector) {
return float8VectorFiller((Float8Vector) vector, (line) -> (Number) line[pos]);
}
if (vector instanceof DateDayVector) {
return dateDayVectorFiller((DateDayVector) vector, (line) -> (Number) line[pos]);
}
if (vector instanceof StructVector) {
StructVector structVector = (StructVector) vector;
List<ValueVector> nestedVectors = structVector.getPrimitiveVectors();
RowConsumer[] nestedConsumers = new RowConsumer[nestedVectors.size()];
for (int i = 0; i < nestedVectors.size(); i++) {
nestedConsumers[i] = generateVectorFiller(i, nestedVectors.get(i), settings, resultType);
}
return structVectorFiller(structVector, nestedConsumers, (line) -> (List<?>) line[pos]);
}
if (vector instanceof ListVector) {
ListVector listVector = (ListVector) vector;
ValueVector nestedVector = listVector.getDataVector();
// pos = 0 is a workaround for now
return listVectorFiller(listVector, generateVectorFiller(0, nestedVector, settings, ((ResultType.ListT) resultType).getElementType()), (line) -> (List<?>) line[pos]);
}
throw new IllegalArgumentException("Unsupported vector type " + vector);
}
use of org.apache.arrow.vector.ValueVector in project aws-athena-query-federation by awslabs.
the class BlockTest method ListOfListsTest.
@Test
public void ListOfListsTest() throws Exception {
BlockAllocatorImpl expectedAllocator = new BlockAllocatorImpl();
/**
* Generate and write the schema
*/
SchemaBuilder schemaBuilder = new SchemaBuilder();
schemaBuilder.addField(FieldBuilder.newBuilder("outerlist", new ArrowType.List()).addListField("innerList", Types.MinorType.VARCHAR.getType()).build());
Schema origSchema = schemaBuilder.build();
/**
* Generate and write the block
*/
Block expectedBlock = expectedAllocator.createBlock(origSchema);
int expectedRows = 200;
for (Field next : origSchema.getFields()) {
ValueVector vector = expectedBlock.getFieldVector(next.getName());
switch(vector.getMinorType()) {
case LIST:
Field child = vector.getField().getChildren().get(0);
for (int i = 0; i < expectedRows; i++) {
// For each row
List<List<String>> value = new ArrayList<>();
switch(Types.getMinorTypeForArrowType(child.getType())) {
case LIST:
List<String> values = new ArrayList<>();
values.add(String.valueOf(1000));
values.add(String.valueOf(1001));
values.add(String.valueOf(1002));
value.add(values);
break;
default:
throw new UnsupportedOperationException(vector.getMinorType() + " is not supported");
}
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, value);
}
break;
default:
throw new UnsupportedOperationException(vector.getMinorType() + " is not supported");
}
}
expectedBlock.setRowCount(expectedRows);
RecordBatchSerDe expectSerDe = new RecordBatchSerDe(expectedAllocator);
ByteArrayOutputStream blockOut = new ByteArrayOutputStream();
ArrowRecordBatch expectedBatch = expectedBlock.getRecordBatch();
expectSerDe.serialize(expectedBatch, blockOut);
assertSerializationOverhead(blockOut);
expectedBatch.close();
expectedBlock.close();
ByteArrayOutputStream schemaOut = new ByteArrayOutputStream();
SchemaSerDe schemaSerDe = new SchemaSerDe();
schemaSerDe.serialize(origSchema, schemaOut);
Schema actualSchema = schemaSerDe.deserialize(new ByteArrayInputStream(schemaOut.toByteArray()));
BlockAllocatorImpl actualAllocator = new BlockAllocatorImpl();
RecordBatchSerDe actualSerDe = new RecordBatchSerDe(actualAllocator);
ArrowRecordBatch batch = actualSerDe.deserialize(blockOut.toByteArray());
/**
* Generate and write the block
*/
Block actualBlock = actualAllocator.createBlock(actualSchema);
actualBlock.loadRecordBatch(batch);
batch.close();
for (int i = 0; i < actualBlock.getRowCount(); i++) {
logger.info("ListOfList: util {}", BlockUtils.rowToString(actualBlock, i));
}
assertEquals("Row count missmatch", expectedRows, actualBlock.getRowCount());
int actualFieldCount = 1;
for (Field next : actualBlock.getFields()) {
FieldReader vector = actualBlock.getFieldReader(next.getName());
switch(vector.getMinorType()) {
case LIST:
int actual = 0;
for (int i = 0; i < actualBlock.getRowCount(); i++) {
vector.setPosition(i);
int entryValues = 0;
while (vector.next()) {
FieldReader innerReader = vector.reader();
int j = 0;
while (innerReader.next()) {
entryValues++;
assertEquals(String.valueOf(1000 + j++), innerReader.reader().readText().toString());
}
}
if (entryValues > 0) {
actual++;
}
}
assertEquals("failed for " + vector.getField().getName(), actualBlock.getRowCount(), actual);
break;
default:
throw new UnsupportedOperationException(next.getType().getTypeID() + " is not supported");
}
actualFieldCount++;
}
actualBlock.close();
}
use of org.apache.arrow.vector.ValueVector in project aws-athena-query-federation by awslabs.
the class BlockTest method generateTestBlock.
public static Block generateTestBlock(BlockAllocatorImpl expectedAllocator, Schema origSchema, int expectedRows) throws UnsupportedOperationException {
/**
* Generate and write the block
*/
Block expectedBlock = expectedAllocator.createBlock(origSchema);
int fieldCount = 1;
for (Field next : origSchema.getFields()) {
ValueVector vector = expectedBlock.getFieldVector(next.getName());
switch(vector.getMinorType()) {
case DATEDAY:
DateDayVector dateDayVector = (DateDayVector) vector;
for (int i = 0; i < expectedRows; i++) {
dateDayVector.setSafe(i, i * fieldCount);
}
break;
case UINT4:
UInt4Vector uInt4Vector = (UInt4Vector) vector;
for (int i = 0; i < expectedRows; i++) {
uInt4Vector.setSafe(i, i * fieldCount * 2);
}
break;
case INT:
IntVector intVector = (IntVector) vector;
for (int i = 0; i < expectedRows; i++) {
intVector.setSafe(i, i * fieldCount * 3);
}
break;
case FLOAT8:
Float8Vector fVector = (Float8Vector) vector;
for (int i = 0; i < expectedRows; i++) {
fVector.setSafe(i, i * fieldCount * 1.1);
}
break;
case VARCHAR:
VarCharVector vVector = (VarCharVector) vector;
for (int i = 0; i < expectedRows; i++) {
vVector.setSafe(i, String.valueOf(i * fieldCount).getBytes(Charsets.UTF_8));
}
break;
case DATEMILLI:
DateMilliVector dateMilliVector = (DateMilliVector) vector;
for (int i = 0; i < expectedRows; i++) {
dateMilliVector.setSafe(i, i * fieldCount * 4);
}
break;
case TINYINT:
TinyIntVector tinyIntVector = (TinyIntVector) vector;
for (int i = 0; i < expectedRows; i++) {
tinyIntVector.setSafe(i, i * fieldCount * 5);
}
break;
case UINT1:
UInt1Vector uInt1Vector = (UInt1Vector) vector;
for (int i = 0; i < expectedRows; i++) {
uInt1Vector.setSafe(i, i * fieldCount * 6);
}
break;
case SMALLINT:
SmallIntVector smallIntVector = (SmallIntVector) vector;
for (int i = 0; i < expectedRows; i++) {
smallIntVector.setSafe(i, i * fieldCount * 7);
}
break;
case UINT2:
UInt2Vector uInt2Vector = (UInt2Vector) vector;
for (int i = 0; i < expectedRows; i++) {
uInt2Vector.setSafe(i, i * fieldCount * 8);
}
break;
case UINT8:
UInt8Vector uInt8Vector = (UInt8Vector) vector;
for (int i = 0; i < expectedRows; i++) {
uInt8Vector.setSafe(i, i * fieldCount * 9);
}
break;
case BIGINT:
BigIntVector bigIntVector = (BigIntVector) vector;
for (int i = 0; i < expectedRows; i++) {
bigIntVector.setSafe(i, i * fieldCount * 10);
}
break;
case DECIMAL:
DecimalVector decimalVector = (DecimalVector) vector;
for (int i = 0; i < expectedRows; i++) {
BigDecimal bigDecimal = new BigDecimal((double) (i * fieldCount) * 1.01);
bigDecimal = bigDecimal.setScale(2, RoundingMode.HALF_UP);
decimalVector.setSafe(i, bigDecimal);
}
break;
case FLOAT4:
Float4Vector float4Vector = (Float4Vector) vector;
for (int i = 0; i < expectedRows; i++) {
float4Vector.setSafe(i, i * fieldCount * 9);
}
break;
case VARBINARY:
VarBinaryVector varBinaryVector = (VarBinaryVector) vector;
for (int i = 0; i < expectedRows; i++) {
byte[] data = String.valueOf(i * fieldCount).getBytes();
varBinaryVector.setSafe(i, data);
}
break;
case BIT:
BitVector bitVector = (BitVector) vector;
for (int i = 0; i < expectedRows; i++) {
bitVector.setSafe(i, i % 2);
}
break;
case STRUCT:
StructVector sVector = (StructVector) vector;
for (int i = 0; i < expectedRows; i++) {
final int seed = i;
BlockUtils.setComplexValue(sVector, i, (Field field, Object value) -> {
if (field.getName().equals("nestedBigInt")) {
return (long) seed;
}
if (field.getName().equals("nestedString")) {
return String.valueOf(1000 + seed);
}
if (field.getName().equals("tinyintcol")) {
return (byte) seed;
}
if (field.getName().equals("smallintcol")) {
return (short) seed;
}
if (field.getName().equals("nestedList")) {
List<String> values = new ArrayList<>();
values.add("val1");
values.add("val2");
return values;
}
if (field.getName().equals("nestedListDec")) {
List<Double> values = new ArrayList<>();
values.add(2.0D);
values.add(2.2D);
return values;
}
if (field.getName().equals("float4Col")) {
return seed * 1.0F;
}
if (field.getName().equals("float8Col")) {
return seed * 2.0D;
}
if (field.getName().equals("shortDecCol")) {
return seed * 3.0D;
}
if (field.getName().equals("longDecCol")) {
return seed * 4.0D;
}
if (field.getName().equals("binaryCol")) {
return String.valueOf(seed).getBytes(Charsets.UTF_8);
}
if (field.getName().equals("bitCol")) {
return seed % 2 == 1;
}
if (field.getName().equals("nestedStruct")) {
// it just needs to be non-null
return new Object();
}
throw new RuntimeException("Unexpected field " + field.getName());
}, new Object());
}
break;
case LIST:
Field child = vector.getField().getChildren().get(0);
if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.BIGINT) {
for (int i = 0; i < expectedRows; i++) {
List<Long> values = new ArrayList<>();
values.add(Long.valueOf(i));
values.add(i + 1L);
values.add(i + 2L);
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, values);
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.VARCHAR) {
for (int i = 0; i < expectedRows; i++) {
List<String> values = new ArrayList<>();
values.add(String.valueOf(1000 + i));
values.add(String.valueOf(1000 + i + 1));
values.add(String.valueOf(1000 + i + 2));
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, values);
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.SMALLINT) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((short) (i + 1)));
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.INT) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList(i));
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.TINYINT) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((byte) i));
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.FLOAT4) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((i * 1.0F)));
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.FLOAT8) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((i * 1.0D)));
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.DECIMAL) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList((i * 1.0D)));
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.VARBINARY) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList(String.valueOf(i).getBytes(Charsets.UTF_8)));
}
} else if (Types.getMinorTypeForArrowType(child.getType()) == Types.MinorType.BIT) {
for (int i = 0; i < expectedRows; i++) {
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, Collections.singletonList(i % 2 == 1));
}
}
break;
case MAP:
MapVector mapVector = (MapVector) vector;
for (int i = 0; i < expectedRows; i++) {
final int seed = i;
BlockUtils.setComplexValue(mapVector, i, (Field field, Object value) -> {
if (field.getName().equals("key")) {
return String.valueOf(1000 + seed);
}
if (field.getName().equals("value")) {
return seed;
}
if (field.getName().equals("tinyintcol")) {
return (byte) seed;
}
if (field.getName().equals("smallintcol")) {
return (short) seed;
}
if (field.getName().equals("nestedList")) {
List<String> values = new ArrayList<>();
values.add("val1");
values.add("val2");
return values;
}
if (field.getName().equals("nestedListDec")) {
List<Double> values = new ArrayList<>();
values.add(2.0D);
values.add(2.2D);
return values;
}
if (field.getName().equals("float4Col")) {
return seed * 1.0F;
}
if (field.getName().equals("float8Col")) {
return seed * 2.0D;
}
if (field.getName().equals("shortDecCol")) {
return seed * 3.0D;
}
if (field.getName().equals("longDecCol")) {
return seed * 4.0D;
}
if (field.getName().equals("binaryCol")) {
return String.valueOf(seed).getBytes(Charsets.UTF_8);
}
if (field.getName().equals("bitCol")) {
return seed % 2 == 1;
}
if (field.getName().equals("nestedStruct")) {
// it just needs to be non-null
return new Object();
}
throw new RuntimeException("Unexpected field " + field.getName());
}, new Object());
}
List<Field> children = vector.getField().getChildren();
Field keyValueStructField;
if (children.size() != 1) {
throw new IllegalStateException("Invalid Arrow Map schema: " + vector.getField());
} else {
keyValueStructField = children.get(0);
if (!ENTRIES.equals(keyValueStructField.getName()) || !(keyValueStructField.getType() instanceof ArrowType.Struct)) {
throw new IllegalStateException("Invalid Arrow Map schema: " + vector.getField());
}
}
List<Field> keyValueChildren = keyValueStructField.getChildren();
Field keyField;
Field valueField;
if (keyValueChildren.size() != 2) {
throw new IllegalStateException("Invalid Arrow Map schema: " + vector.getField());
} else {
keyField = keyValueChildren.get(0);
valueField = keyValueChildren.get(1);
if (!KEY.equals(keyField.getName()) || !VALUE.equals(valueField.getName())) {
throw new IllegalStateException("Invalid Arrow Map schema: " + vector.getField());
}
}
break;
default:
throw new UnsupportedOperationException(vector.getMinorType() + " is not supported");
}
fieldCount++;
}
expectedBlock.setRowCount(expectedRows);
return expectedBlock;
}
use of org.apache.arrow.vector.ValueVector in project TileDB-Spark by TileDB-Inc.
the class TileDBDataReaderPartitionScan method createValueVectors.
/**
* Creates the value Vectors, later to be used to create the arrowBuffers for the query.
*
* @param readBufferSize the readBufferSize
* @throws TileDBError
*/
private void createValueVectors(long readBufferSize) throws TileDBError {
metricsUpdater.startTimer(queryAllocBufferTimerName);
// Create coordinate buffers
int minDimDize = Integer.MAX_VALUE;
for (Dimension dimension : arraySchema.getDomain().getDimensions()) {
int nativeSize = dimension.getType().getNativeSize();
if (nativeSize < minDimDize)
minDimDize = nativeSize;
}
releaseArrowVectors();
for (String fieldName : fieldNames) {
// get the spark column name and match to array schema
String name = fieldName;
TypeInfo typeInfo = getTypeInfo(name);
RootAllocator allocator = ArrowUtils.rootAllocator();
ArrowType arrowType;
ValueVector valueVector;
ValueVector validityValueVector = new UInt1Vector(fieldName, allocator);
switch(typeInfo.datatype) {
case CHAR:
case ASCII:
if (!typeInfo.isVarLen)
throw new RuntimeException("Unhandled fixed-len char buffer for attribute " + fieldName);
valueVector = new VarCharVector(fieldName, allocator);
break;
case UINT8:
case INT8:
arrowType = new ArrowType.Int(8, true);
if (typeInfo.isVarLen) {
ListVector lv = ListVector.empty(fieldName, allocator);
lv.addOrGetVector(FieldType.nullable(arrowType));
valueVector = lv;
} else {
valueVector = new TinyIntVector(fieldName, allocator);
}
break;
case INT32:
arrowType = new ArrowType.Int(32, true);
if (typeInfo.isVarLen || typeInfo.isArray) {
ListVector lv = ListVector.empty(fieldName, allocator);
lv.addOrGetVector(FieldType.nullable(arrowType));
valueVector = lv;
} else {
valueVector = new IntVector(fieldName, allocator);
}
break;
case FLOAT32:
arrowType = new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE);
if (typeInfo.isVarLen) {
ListVector lv = ListVector.empty(fieldName, allocator);
lv.addOrGetVector(FieldType.nullable(arrowType));
valueVector = lv;
} else {
valueVector = new Float4Vector(fieldName, allocator);
}
break;
case FlOAT64:
arrowType = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
if (typeInfo.isVarLen) {
ListVector lv = ListVector.empty(fieldName, allocator);
lv.addOrGetVector(FieldType.nullable(arrowType));
valueVector = lv;
} else {
valueVector = new Float8Vector(fieldName, allocator);
}
break;
case INT16:
case UINT16:
arrowType = new ArrowType.Int(16, true);
if (typeInfo.isVarLen) {
ListVector lv = ListVector.empty(fieldName, allocator);
lv.addOrGetVector(FieldType.nullable(arrowType));
valueVector = lv;
} else {
valueVector = new SmallIntVector(fieldName, allocator);
}
break;
case LONG:
case DATE:
arrowType = new ArrowType.Int(64, true);
if (typeInfo.isVarLen) {
ListVector lv = ListVector.empty(fieldName, allocator);
lv.addOrGetVector(FieldType.nullable(arrowType));
valueVector = lv;
} else {
valueVector = new BigIntVector(fieldName, allocator);
}
break;
default:
throw new RuntimeException("Unhandled datatype for Arrow buffer, attribute " + fieldName);
}
// Max number of rows is nbytes / sizeof(int32_t), i.e. the max number of offsets that can be
// stored.
long maxRowsL = (readBufferSize / util.getDefaultRecordByteCount(valueVector.getClass()));
int maxNumRows = util.longToInt(maxRowsL);
// rare case when readbuffer size is set to a value smaller than the type
if (maxNumRows == 0)
maxNumRows = 1;
if (valueVector instanceof ListVector) {
((ListVector) valueVector).setInitialCapacity(maxNumRows, 1);
} else {
valueVector.setInitialCapacity(maxNumRows);
}
validityValueVector.setInitialCapacity(maxNumRows);
// The valueVector is the one holding the data and the corresponding validity and
// offsetBuffers.
// The validityValueVector is a help valueVector that holds the validity values in a byte
// format which is the one expected from TileDB. The validity buffers in the main valueVector
// is a bitmap instead!
// A conversion between the two is needed when retrieving the data. See the code in the get()
// method.
valueVector.allocateNew();
validityValueVector.allocateNew();
createAndSetArrowBuffers(valueVector, validityValueVector, typeInfo, name);
}
metricsUpdater.finish(queryAllocBufferTimerName);
}
Aggregations