use of org.apache.arrow.vector.ValueVector in project Mycat2 by MyCATApache.
the class TopKPlan method buildCompare.
private VectorValueComparator buildCompare(SortPlan.SortColumn p) {
FieldVector values = p.getValues();
SortOptions options = p.getOptions();
VectorValueComparator<FieldVector> defaultComparator = DefaultVectorComparators.createDefaultComparator(values);
defaultComparator.attachVectors(values, values);
return new VectorValueComparator() {
@Override
public int getValueWidth() {
return super.getValueWidth();
}
@Override
public void attachVector(ValueVector vector) {
super.attachVector(vector);
}
@Override
public void attachVectors(ValueVector vector1, ValueVector vector2) {
super.attachVectors(vector1, vector2);
}
@Override
public int compare(int index1, int index2) {
boolean isNull1 = values.isNull(index1);
boolean isNull2 = values.isNull(index2);
if (isNull1 || isNull2) {
if (isNull1 && isNull2) {
return 0;
} else if (isNull1) {
if (options.nullsFirst) {
// null1 is smaller
return -1;
} else {
return 1;
}
} else {
if (options.nullsFirst) {
// null2 is smaller
return 1;
} else {
return -1;
}
}
}
return compareNotNull(index1, index2);
}
@Override
public int compareNotNull(int index1, int index2) {
return defaultComparator.compareNotNull(index1, index2);
}
@Override
public VectorValueComparator createNew() {
return this;
}
};
}
use of org.apache.arrow.vector.ValueVector in project TileDB-Spark by TileDB-Inc.
the class TileDBPartitionReader method createValueVectors.
/**
* Creates the value Vectors, later to be used to create the arrowBuffers for the query.
*
* @param readBufferSize the readBufferSize
* @throws TileDBError
*/
private void createValueVectors(long readBufferSize) throws TileDBError {
metricsUpdater.startTimer(queryAllocBufferTimerName);
// Create coordinate buffers
int minDimDize = Integer.MAX_VALUE;
Domain domain = arraySchema.getDomain();
for (Dimension dimension : domain.getDimensions()) {
int nativeSize = dimension.getType().getNativeSize();
if (nativeSize < minDimDize)
minDimDize = nativeSize;
dimension.close();
}
domain.close();
releaseArrowVectors();
for (String fieldName : fieldNames) {
// get the spark column name and match to array schema
String name = fieldName;
TypeInfo typeInfo = getTypeInfo(name);
RootAllocator allocator = ArrowUtils.rootAllocator();
ArrowType arrowType;
ValueVector valueVector;
// In theory we could try to replace the following UInt2Vector with Uint1Vector. However,
// TileDB will throw an error that more validity cells are needed for the query. This
// happens because apache-arrow rounds up the size of the data buffers, thus making it
// necessary for us to provide more validity cells. This implementation provides double
// the amount of validity cells necessary which makes it safe.
ValueVector validityValueVector = new UInt2Vector(fieldName, allocator);
switch(typeInfo.datatype) {
case CHAR:
case ASCII:
if (!typeInfo.isVarLen)
throw new RuntimeException("Unhandled fixed-len char buffer for attribute " + fieldName);
valueVector = new VarCharVector(fieldName, allocator);
break;
case UINT8:
case INT8:
arrowType = new ArrowType.Int(8, true);
if (typeInfo.isVarLen) {
ListVector lv = ListVector.empty(fieldName, allocator);
lv.addOrGetVector(FieldType.nullable(arrowType));
valueVector = lv;
} else {
valueVector = new TinyIntVector(fieldName, allocator);
}
break;
case INT32:
arrowType = new ArrowType.Int(32, true);
if (typeInfo.isVarLen || typeInfo.isArray) {
ListVector lv = ListVector.empty(fieldName, allocator);
lv.addOrGetVector(FieldType.nullable(arrowType));
valueVector = lv;
} else {
valueVector = new IntVector(fieldName, allocator);
}
break;
case FLOAT32:
arrowType = new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE);
if (typeInfo.isVarLen) {
ListVector lv = ListVector.empty(fieldName, allocator);
lv.addOrGetVector(FieldType.nullable(arrowType));
valueVector = lv;
} else {
valueVector = new Float4Vector(fieldName, allocator);
}
break;
case FlOAT64:
arrowType = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
if (typeInfo.isVarLen) {
ListVector lv = ListVector.empty(fieldName, allocator);
lv.addOrGetVector(FieldType.nullable(arrowType));
valueVector = lv;
} else {
valueVector = new Float8Vector(fieldName, allocator);
}
break;
case INT16:
case UINT16:
arrowType = new ArrowType.Int(16, true);
if (typeInfo.isVarLen) {
ListVector lv = ListVector.empty(fieldName, allocator);
lv.addOrGetVector(FieldType.nullable(arrowType));
valueVector = lv;
} else {
valueVector = new SmallIntVector(fieldName, allocator);
}
break;
case LONG:
case DATE:
arrowType = new ArrowType.Int(64, true);
if (typeInfo.isVarLen) {
ListVector lv = ListVector.empty(fieldName, allocator);
lv.addOrGetVector(FieldType.nullable(arrowType));
valueVector = lv;
} else {
valueVector = new BigIntVector(fieldName, allocator);
}
break;
default:
throw new RuntimeException("Unhandled datatype for Arrow buffer, attribute " + fieldName);
}
// Max number of rows is nbytes / sizeof(int32_t), i.e. the max number of offsets that can be
// stored.
long maxRowsL = (readBufferSize / util.getDefaultRecordByteCount(valueVector.getClass()));
int maxNumRows = util.longToInt(maxRowsL);
// rare case when readbuffer size is set to a value smaller than the type
if (maxNumRows == 0)
maxNumRows = 1;
if (valueVector instanceof ListVector) {
((ListVector) valueVector).setInitialCapacity(maxNumRows, 1);
} else {
valueVector.setInitialCapacity(maxNumRows);
}
validityValueVector.setInitialCapacity(maxNumRows);
// The valueVector is the one holding the data and the corresponding validity and
// offsetBuffers.
// The validityValueVector is a help valueVector that holds the validity values in a byte
// format which is the one expected from TileDB. The validity buffers in the main valueVector
// is a bitmap instead!
// A conversion between the two is needed when retrieving the data. See the code in the get()
// method.
valueVector.allocateNew();
validityValueVector.allocateNew();
createAndSetArrowBuffers(valueVector, validityValueVector, typeInfo, name);
}
metricsUpdater.finish(queryAllocBufferTimerName);
}
use of org.apache.arrow.vector.ValueVector in project deephaven-core by deephaven.
the class VectorSchemaRootAdapter method of.
/**
* Convert a {@code table} into a {@link FieldVector}.
*
* @param table the table
* @param allocator the allocator
* @return the vector schema root
*/
public static VectorSchemaRoot of(NewTable table, BufferAllocator allocator) {
final List<FieldVector> fieldVectors = new ArrayList<>(table.numColumns());
for (Column<?> column : table) {
fieldVectors.add(FieldVectorAdapter.of(column, allocator));
}
final Schema schema = new Schema(fieldVectors.stream().map(ValueVector::getField).collect(Collectors.toList()));
return new VectorSchemaRoot(schema, fieldVectors, table.size());
}
use of org.apache.arrow.vector.ValueVector in project aws-athena-query-federation by awslabs.
the class BlockTest method ListOfStructsTest.
@Test
public void ListOfStructsTest() throws Exception {
BlockAllocatorImpl expectedAllocator = new BlockAllocatorImpl();
/**
* Generate and write the schema
*/
SchemaBuilder schemaBuilder = new SchemaBuilder();
schemaBuilder.addField(FieldBuilder.newBuilder("outerlist", new ArrowType.List()).addField(FieldBuilder.newBuilder("innerStruct", Types.MinorType.STRUCT.getType()).addStringField("varchar").addBigIntField("bigint").build()).build());
Schema origSchema = schemaBuilder.build();
/**
* Generate and write the block
*/
Block expectedBlock = expectedAllocator.createBlock(origSchema);
int expectedRows = 200;
for (Field next : origSchema.getFields()) {
ValueVector vector = expectedBlock.getFieldVector(next.getName());
switch(vector.getMinorType()) {
case LIST:
Field child = vector.getField().getChildren().get(0);
for (int i = 0; i < expectedRows; i++) {
// For each row
List<Map<String, Object>> value = new ArrayList<>();
switch(Types.getMinorTypeForArrowType(child.getType())) {
case STRUCT:
Map<String, Object> values = new HashMap<>();
values.put("varchar", "chars");
values.put("bigint", 100L);
value.add(values);
break;
default:
throw new UnsupportedOperationException(vector.getMinorType() + " is not supported");
}
BlockUtils.setComplexValue((ListVector) vector, i, FieldResolver.DEFAULT, value);
}
break;
default:
throw new UnsupportedOperationException(vector.getMinorType() + " is not supported");
}
}
expectedBlock.setRowCount(expectedRows);
RecordBatchSerDe expectSerDe = new RecordBatchSerDe(expectedAllocator);
ByteArrayOutputStream blockOut = new ByteArrayOutputStream();
ArrowRecordBatch expectedBatch = expectedBlock.getRecordBatch();
expectSerDe.serialize(expectedBatch, blockOut);
assertSerializationOverhead(blockOut);
expectedBatch.close();
expectedBlock.close();
ByteArrayOutputStream schemaOut = new ByteArrayOutputStream();
SchemaSerDe schemaSerDe = new SchemaSerDe();
schemaSerDe.serialize(origSchema, schemaOut);
Schema actualSchema = schemaSerDe.deserialize(new ByteArrayInputStream(schemaOut.toByteArray()));
BlockAllocatorImpl actualAllocator = new BlockAllocatorImpl();
RecordBatchSerDe actualSerDe = new RecordBatchSerDe(actualAllocator);
ArrowRecordBatch batch = actualSerDe.deserialize(blockOut.toByteArray());
/**
* Generate and write the block
*/
Block actualBlock = actualAllocator.createBlock(actualSchema);
actualBlock.loadRecordBatch(batch);
batch.close();
for (int i = 0; i < actualBlock.getRowCount(); i++) {
logger.info("ListOfList: util {}", BlockUtils.rowToString(actualBlock, i));
}
assertEquals("Row count missmatch", expectedRows, actualBlock.getRowCount());
int actualFieldCount = 1;
for (Field next : actualBlock.getFields()) {
FieldReader vector = actualBlock.getFieldReader(next.getName());
switch(vector.getMinorType()) {
case LIST:
int actual = 0;
for (int i = 0; i < actualBlock.getRowCount(); i++) {
vector.setPosition(i);
int entryValues = 0;
while (vector.next()) {
entryValues++;
assertEquals("chars", vector.reader().reader("varchar").readText().toString());
assertEquals(Long.valueOf(100), vector.reader().reader("bigint").readLong());
}
if (entryValues > 0) {
actual++;
}
}
assertEquals("failed for " + vector.getField().getName(), actualBlock.getRowCount(), actual);
break;
default:
throw new UnsupportedOperationException(next.getType().getTypeID() + " is not supported");
}
actualFieldCount++;
}
actualBlock.close();
}
use of org.apache.arrow.vector.ValueVector in project aws-athena-query-federation by awslabs.
the class BlockTest method structOfListsTest.
@Test
public void structOfListsTest() throws Exception {
BlockAllocatorImpl expectedAllocator = new BlockAllocatorImpl();
/**
* Generate and write the schema
*/
SchemaBuilder schemaBuilder = new SchemaBuilder();
schemaBuilder.addField(FieldBuilder.newBuilder("innerStruct", Types.MinorType.STRUCT.getType()).addStringField("varchar").addListField("list", Types.MinorType.VARCHAR.getType()).build());
Schema origSchema = schemaBuilder.build();
/**
* Generate and write the block
*/
Block expectedBlock = expectedAllocator.createBlock(origSchema);
int expectedRows = 200;
for (Field next : origSchema.getFields()) {
ValueVector vector = expectedBlock.getFieldVector(next.getName());
for (int i = 0; i < expectedRows; i++) {
switch(vector.getMinorType()) {
case STRUCT:
Map<String, Object> value = new HashMap<>();
value.put("varchar", "chars");
if (i % 2 == 0) {
List<String> listVal = new ArrayList<>();
listVal.add("value_0_" + i);
listVal.add("value_1_" + i);
value.put("list", listVal);
} else {
value.put("list", null);
}
BlockUtils.setComplexValue((StructVector) vector, i, FieldResolver.DEFAULT, value);
break;
default:
throw new UnsupportedOperationException(vector.getMinorType() + " is not supported");
}
}
}
expectedBlock.setRowCount(expectedRows);
RecordBatchSerDe expectSerDe = new RecordBatchSerDe(expectedAllocator);
ByteArrayOutputStream blockOut = new ByteArrayOutputStream();
ArrowRecordBatch expectedBatch = expectedBlock.getRecordBatch();
expectSerDe.serialize(expectedBatch, blockOut);
assertSerializationOverhead(blockOut);
expectedBatch.close();
expectedBlock.close();
ByteArrayOutputStream schemaOut = new ByteArrayOutputStream();
SchemaSerDe schemaSerDe = new SchemaSerDe();
schemaSerDe.serialize(origSchema, schemaOut);
Schema actualSchema = schemaSerDe.deserialize(new ByteArrayInputStream(schemaOut.toByteArray()));
BlockAllocatorImpl actualAllocator = new BlockAllocatorImpl();
RecordBatchSerDe actualSerDe = new RecordBatchSerDe(actualAllocator);
ArrowRecordBatch batch = actualSerDe.deserialize(blockOut.toByteArray());
/**
* Generate and write the block
*/
Block actualBlock = actualAllocator.createBlock(actualSchema);
actualBlock.loadRecordBatch(batch);
batch.close();
for (int i = 0; i < actualBlock.getRowCount(); i++) {
logger.info("ListOfList: util {}", BlockUtils.rowToString(actualBlock, i));
}
assertEquals("Row count missmatch", expectedRows, actualBlock.getRowCount());
int actualListValues = 0;
int emptyListValues = 0;
for (Field next : actualBlock.getFields()) {
FieldReader vector = actualBlock.getFieldReader(next.getName());
for (int i = 0; i < actualBlock.getRowCount(); i++) {
switch(vector.getMinorType()) {
case STRUCT:
vector.setPosition(i);
assertEquals("chars", vector.reader("varchar").readText().toString());
FieldReader listReader = vector.reader("list");
int found = 0;
while (listReader.next()) {
assertEquals("value_" + found + "_" + i, listReader.reader().readText().toString());
found++;
actualListValues++;
}
if (found == 0) {
emptyListValues++;
}
break;
default:
throw new UnsupportedOperationException(next.getType().getTypeID() + " is not supported");
}
}
}
actualBlock.close();
assertEquals(200, actualListValues);
assertEquals(100, emptyListValues);
logger.info("structOfListsTest: actualListValues[{}] emptyListValues[{}]", actualListValues, emptyListValues);
}
Aggregations