use of org.apache.arrow.memory.ArrowBuf in project aws-athena-query-federation by awslabs.
the class BlockUtils method writeStructValue.
/**
* Used to write a value into a specific child field within a Struct. Multiple calls to this method per-cell are
* expected in order to write to all N fields of a Struct.
*
* @param writer The FieldWriter (already positioned at the row and list entry number) that we want to write into.
* @param field The child field we are attempting to write into.
* @param allocator The BlockAllocator that can be used for allocating Arrow Buffers for fields which require conversion
* to Arrow Buff before being written.
* @param value The value to write.
* @note This method and its List complement violate the DRY mantra because ListWriter and StructWriter don't share
* a meaningful ancestor despite having identical methods. This requires us to either further wrap and abstract the writer
* or duplicate come code. In a future release we hope to have contributed a better option to Apache Arrow which allows
* us to simplify this method.
*/
@VisibleForTesting
protected static void writeStructValue(StructWriter writer, Field field, BufferAllocator allocator, Object value) {
if (value == null) {
return;
}
ArrowType type = field.getType();
try {
switch(Types.getMinorTypeForArrowType(type)) {
case TIMESTAMPMILLITZ:
long dateTimeWithZone;
if (value instanceof ZonedDateTime) {
dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone((ZonedDateTime) value);
} else if (value instanceof LocalDateTime) {
dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone(((LocalDateTime) value).atZone(UTC_ZONE_ID).toInstant().toEpochMilli(), UTC_ZONE_ID.getId());
} else if (value instanceof Date) {
long ldtInLong = Instant.ofEpochMilli(((Date) value).getTime()).atZone(UTC_ZONE_ID).toInstant().toEpochMilli();
dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone(ldtInLong, UTC_ZONE_ID.getId());
} else {
dateTimeWithZone = (long) value;
}
writer.timeStampMilliTZ(field.getName()).writeTimeStampMilliTZ(dateTimeWithZone);
case DATEMILLI:
if (value instanceof Date) {
writer.dateMilli(field.getName()).writeDateMilli(((Date) value).getTime());
} else {
writer.dateMilli(field.getName()).writeDateMilli((long) value);
}
break;
case DATEDAY:
if (value instanceof Date) {
org.joda.time.Days days = org.joda.time.Days.daysBetween(EPOCH, new org.joda.time.DateTime(((Date) value).getTime()));
writer.dateDay(field.getName()).writeDateDay(days.getDays());
} else if (value instanceof LocalDate) {
int days = (int) ((LocalDate) value).toEpochDay();
writer.dateDay(field.getName()).writeDateDay(days);
} else if (value instanceof Long) {
writer.dateDay(field.getName()).writeDateDay(((Long) value).intValue());
} else {
writer.dateDay(field.getName()).writeDateDay((int) value);
}
break;
case FLOAT8:
writer.float8(field.getName()).writeFloat8((double) value);
break;
case FLOAT4:
writer.float4(field.getName()).writeFloat4((float) value);
break;
case INT:
if (value != null && value instanceof Long) {
// This may seem odd at first but many frameworks (like Presto) use long as the preferred
// native java type for representing integers. We do this to keep type conversions simple.
writer.integer(field.getName()).writeInt(((Long) value).intValue());
} else {
writer.integer(field.getName()).writeInt((int) value);
}
break;
case TINYINT:
writer.tinyInt(field.getName()).writeTinyInt((byte) value);
break;
case SMALLINT:
writer.smallInt(field.getName()).writeSmallInt((short) value);
break;
case UINT1:
writer.uInt1(field.getName()).writeUInt1((byte) value);
break;
case UINT2:
writer.uInt2(field.getName()).writeUInt2((char) value);
break;
case UINT4:
writer.uInt4(field.getName()).writeUInt4((int) value);
break;
case UINT8:
writer.uInt8(field.getName()).writeUInt8((long) value);
break;
case BIGINT:
writer.bigInt(field.getName()).writeBigInt((long) value);
break;
case VARBINARY:
if (value instanceof ArrowBuf) {
ArrowBuf buf = (ArrowBuf) value;
writer.varBinary(field.getName()).writeVarBinary(0, (int) (buf.capacity()), buf);
} else if (value instanceof byte[]) {
byte[] bytes = (byte[]) value;
try (ArrowBuf buf = allocator.buffer(bytes.length)) {
buf.writeBytes(bytes);
writer.varBinary(field.getName()).writeVarBinary(0, (int) (buf.readableBytes()), buf);
}
}
break;
case DECIMAL:
int scale = ((ArrowType.Decimal) type).getScale();
int precision = ((ArrowType.Decimal) type).getPrecision();
if (value instanceof Double) {
BigDecimal bdVal = new BigDecimal((double) value);
bdVal = bdVal.setScale(scale, RoundingMode.HALF_UP);
writer.decimal(field.getName(), scale, precision).writeDecimal(bdVal);
} else {
BigDecimal scaledValue = ((BigDecimal) value).setScale(scale, RoundingMode.HALF_UP);
writer.decimal(field.getName(), scale, precision).writeDecimal(scaledValue);
}
break;
case VARCHAR:
if (value instanceof String) {
byte[] bytes = ((String) value).getBytes(Charsets.UTF_8);
try (ArrowBuf buf = allocator.buffer(bytes.length)) {
buf.writeBytes(bytes);
writer.varChar(field.getName()).writeVarChar(0, (int) (buf.readableBytes()), buf);
}
} else if (value instanceof ArrowBuf) {
ArrowBuf buf = (ArrowBuf) value;
writer.varChar(field.getName()).writeVarChar(0, (int) (buf.readableBytes()), buf);
} else if (value instanceof byte[]) {
byte[] bytes = (byte[]) value;
try (ArrowBuf buf = allocator.buffer(bytes.length)) {
buf.writeBytes(bytes);
writer.varChar(field.getName()).writeVarChar(0, (int) (buf.readableBytes()), buf);
}
}
break;
case BIT:
if (value instanceof Integer && (int) value > 0) {
writer.bit(field.getName()).writeBit(1);
} else if (value instanceof Boolean && (boolean) value) {
writer.bit(field.getName()).writeBit(1);
} else {
writer.bit(field.getName()).writeBit(0);
}
break;
default:
throw new IllegalArgumentException("Unknown type " + type);
}
} catch (RuntimeException ex) {
throw new RuntimeException("Unable to write value for field " + field.getName() + " using value " + value, ex);
}
}
use of org.apache.arrow.memory.ArrowBuf in project aws-athena-query-federation by awslabs.
the class BlockUtils method writeListValue.
/**
* Used to write an individual value into a List field, multiple calls to this method per-cell are expected in order
* to write the N values of a list of size N.
*
* @param writer The FieldWriter (already positioned at the row and list entry number) that we want to write into.
* @param type The concrete type of the List's values.
* @param allocator The BlockAllocator that can be used for allocating Arrow Buffers for fields which require conversion
* to Arrow Buff before being written.
* @param value The value to write.
* @note This method and its Struct complement violate the DRY mantra because ListWriter and StructWriter don't share
* a meaningful ancestor despite having identical methods. This requires us to either further wrap and abstract the writer
* or duplicate come code. In a future release we hope to have contributed a better option to Apache Arrow which allows
* us to simplify this method.
*/
protected static void writeListValue(FieldWriter writer, ArrowType type, BufferAllocator allocator, Object value) {
if (value == null) {
return;
}
try {
// TODO: add all types
switch(Types.getMinorTypeForArrowType(type)) {
case TIMESTAMPMILLITZ:
long dateTimeWithZone;
if (value instanceof ZonedDateTime) {
dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone((ZonedDateTime) value);
} else if (value instanceof LocalDateTime) {
dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone(((LocalDateTime) value).atZone(UTC_ZONE_ID).toInstant().toEpochMilli(), UTC_ZONE_ID.getId());
} else if (value instanceof Date) {
long ldtInLong = Instant.ofEpochMilli(((Date) value).getTime()).atZone(UTC_ZONE_ID).toInstant().toEpochMilli();
dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone(ldtInLong, UTC_ZONE_ID.getId());
} else {
dateTimeWithZone = (long) value;
}
writer.writeTimeStampMilliTZ(dateTimeWithZone);
case DATEMILLI:
if (value instanceof Date) {
writer.writeDateMilli(((Date) value).getTime());
} else {
writer.writeDateMilli((long) value);
}
break;
case DATEDAY:
if (value instanceof Date) {
org.joda.time.Days days = org.joda.time.Days.daysBetween(EPOCH, new org.joda.time.DateTime(((Date) value).getTime()));
writer.writeDateDay(days.getDays());
} else if (value instanceof LocalDate) {
int days = (int) ((LocalDate) value).toEpochDay();
writer.writeDateDay(days);
} else if (value instanceof Long) {
writer.writeDateDay(((Long) value).intValue());
} else {
writer.writeDateDay((int) value);
}
break;
case FLOAT8:
writer.float8().writeFloat8((double) value);
break;
case FLOAT4:
writer.float4().writeFloat4((float) value);
break;
case INT:
if (value != null && value instanceof Long) {
// This may seem odd at first but many frameworks (like Presto) use long as the preferred
// native java type for representing integers. We do this to keep type conversions simple.
writer.integer().writeInt(((Long) value).intValue());
} else {
writer.integer().writeInt((int) value);
}
break;
case TINYINT:
writer.tinyInt().writeTinyInt((byte) value);
break;
case SMALLINT:
writer.smallInt().writeSmallInt((short) value);
break;
case UINT1:
writer.uInt1().writeUInt1((byte) value);
break;
case UINT2:
writer.uInt2().writeUInt2((char) value);
break;
case UINT4:
writer.uInt4().writeUInt4((int) value);
break;
case UINT8:
writer.uInt8().writeUInt8((long) value);
break;
case BIGINT:
writer.bigInt().writeBigInt((long) value);
break;
case VARBINARY:
if (value instanceof ArrowBuf) {
ArrowBuf buf = (ArrowBuf) value;
writer.varBinary().writeVarBinary(0, (int) (buf.capacity()), buf);
} else if (value instanceof byte[]) {
byte[] bytes = (byte[]) value;
try (ArrowBuf buf = allocator.buffer(bytes.length)) {
buf.writeBytes(bytes);
writer.varBinary().writeVarBinary(0, (int) (buf.readableBytes()), buf);
}
}
break;
case DECIMAL:
int scale = ((ArrowType.Decimal) type).getScale();
if (value instanceof Double) {
int precision = ((ArrowType.Decimal) type).getPrecision();
BigDecimal bdVal = new BigDecimal((double) value);
bdVal = bdVal.setScale(scale, RoundingMode.HALF_UP);
writer.decimal().writeDecimal(bdVal);
} else {
BigDecimal scaledValue = ((BigDecimal) value).setScale(scale, RoundingMode.HALF_UP);
writer.decimal().writeDecimal(scaledValue);
}
break;
case VARCHAR:
if (value instanceof ArrowBuf) {
ArrowBuf buf = (ArrowBuf) value;
writer.varChar().writeVarChar(0, (int) (buf.readableBytes()), buf);
} else if (value instanceof byte[]) {
byte[] bytes = (byte[]) value;
try (ArrowBuf buf = allocator.buffer(bytes.length)) {
buf.writeBytes(bytes);
writer.varChar().writeVarChar(0, (int) (buf.readableBytes()), buf);
}
} else {
// always fall back to the object's toString()
byte[] bytes = value.toString().getBytes(Charsets.UTF_8);
try (ArrowBuf buf = allocator.buffer(bytes.length)) {
buf.writeBytes(bytes);
writer.varChar().writeVarChar(0, (int) (buf.readableBytes()), buf);
}
}
break;
case BIT:
if (value instanceof Integer && (int) value > 0) {
writer.bit().writeBit(1);
} else if (value instanceof Boolean && (boolean) value) {
writer.bit().writeBit(1);
} else {
writer.bit().writeBit(0);
}
break;
default:
throw new IllegalArgumentException("Unknown type " + type);
}
} catch (RuntimeException ex) {
String fieldName = (writer.getField() != null) ? writer.getField().getName() : "null_vector";
throw new RuntimeException("Unable to write value for field " + fieldName + " using value " + value, ex);
}
}
use of org.apache.arrow.memory.ArrowBuf in project aws-athena-query-federation by awslabs.
the class BlockAllocatorImpl method createBuffer.
/**
* Creates an ArrowBuf and registers it for later clean up if the ArrowBuff isn't explicitly closed by the caller.
*
* @see com.amazonaws.athena.connector.lambda.data.BlockAllocator
*/
public ArrowBuf createBuffer(int size) {
ArrowBuf buffer = null;
try {
buffer = rootAllocator.buffer(size);
arrowBufs.add(buffer);
return buffer;
} catch (Exception ex) {
if (buffer != null) {
buffer.close();
}
throw ex;
}
}
use of org.apache.arrow.memory.ArrowBuf in project aws-athena-query-federation by awslabs.
the class ExampleRecordHandler method makeFactory.
/**
* Since GeneratedRowWriter doesn't yet support complex types (STRUCT, LIST) we use this to
* create our own FieldWriters via customer FieldWriterFactory. In this case we are producing
* FieldWriters that only work for our exact example schema. This will be enhanced with a more
* generic solution in a future release.
*/
private FieldWriterFactory makeFactory(Field field, RowContext rowContext) {
Types.MinorType fieldType = Types.getMinorTypeForArrowType(field.getType());
switch(fieldType) {
case LIST:
Field child = field.getChildren().get(0);
Types.MinorType childType = Types.getMinorTypeForArrowType(child.getType());
switch(childType) {
case LIST:
return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
UnionListWriter writer = ((ListVector) vector).getWriter();
writer.setPosition(rowNum);
writer.startList();
BaseWriter.ListWriter innerWriter = writer.list();
innerWriter.startList();
for (int i = 0; i < 3; i++) {
byte[] bytes = String.valueOf(1000 + i).getBytes(Charsets.UTF_8);
try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
buf.writeBytes(bytes);
innerWriter.varChar().writeVarChar(0, (int) (buf.readableBytes()), buf);
}
}
innerWriter.endList();
writer.endList();
((ListVector) vector).setNotNull(rowNum);
return true;
};
case STRUCT:
return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
UnionListWriter writer = ((ListVector) vector).getWriter();
writer.setPosition(rowNum);
writer.startList();
BaseWriter.StructWriter structWriter = writer.struct();
structWriter.start();
byte[] bytes = "chars".getBytes(Charsets.UTF_8);
try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
buf.writeBytes(bytes);
structWriter.varChar("varchar").writeVarChar(0, (int) (buf.readableBytes()), buf);
}
structWriter.bigInt("bigint").writeBigInt(100L);
structWriter.end();
writer.endList();
((ListVector) vector).setNotNull(rowNum);
return true;
};
default:
throw new IllegalArgumentException("Unsupported type " + childType);
}
case MAP:
return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
UnionMapWriter writer = ((MapVector) vector).getWriter();
writer.setPosition(rowNum);
writer.startMap();
writer.startEntry();
byte[] bytes = "chars".getBytes(Charsets.UTF_8);
try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
buf.writeBytes(bytes);
writer.key().varChar("key").writeVarChar(0, (int) (buf.readableBytes()), buf);
}
writer.value().integer("value").writeInt(1001);
writer.endEntry();
writer.endMap();
((MapVector) vector).setNotNull(rowNum);
return true;
};
default:
throw new IllegalArgumentException("Unsupported type " + fieldType);
}
}
use of org.apache.arrow.memory.ArrowBuf in project hive by apache.
the class Deserializer method readList.
private void readList(FieldVector arrowVector, ListColumnVector hiveVector, ListTypeInfo typeInfo) {
final int size = arrowVector.getValueCount();
hiveVector.ensureSize(size, false);
final ArrowBuf offsets = arrowVector.getOffsetBuffer();
final int OFFSET_WIDTH = 4;
read(arrowVector.getChildrenFromFields().get(0), hiveVector.child, typeInfo.getListElementTypeInfo());
for (int i = 0; i < size; i++) {
if (arrowVector.isNull(i)) {
VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
} else {
hiveVector.isNull[i] = false;
final int offset = offsets.getInt(i * OFFSET_WIDTH);
hiveVector.offsets[i] = offset;
hiveVector.lengths[i] = offsets.getInt((i + 1) * OFFSET_WIDTH) - offset;
}
}
}
Aggregations