Search in sources :

Example 1 with ArrowBuf

use of org.apache.arrow.memory.ArrowBuf in project aws-athena-query-federation by awslabs.

the class BlockUtils method writeStructValue.

/**
 * Used to write a value into a specific child field within a Struct. Multiple calls to this method per-cell are
 * expected in order to write to all N fields of a Struct.
 *
 * @param writer The FieldWriter (already positioned at the row and list entry number) that we want to write into.
 * @param field The child field we are attempting to write into.
 * @param allocator The BlockAllocator that can be used for allocating Arrow Buffers for fields which require conversion
 * to Arrow Buff before being written.
 * @param value The value to write.
 * @note This method and its List complement violate the DRY mantra because ListWriter and StructWriter don't share
 * a meaningful ancestor despite having identical methods. This requires us to either further wrap and abstract the writer
 * or duplicate come code. In a future release we hope to have contributed a better option to Apache Arrow which allows
 * us to simplify this method.
 */
@VisibleForTesting
protected static void writeStructValue(StructWriter writer, Field field, BufferAllocator allocator, Object value) {
    if (value == null) {
        return;
    }
    ArrowType type = field.getType();
    try {
        switch(Types.getMinorTypeForArrowType(type)) {
            case TIMESTAMPMILLITZ:
                long dateTimeWithZone;
                if (value instanceof ZonedDateTime) {
                    dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone((ZonedDateTime) value);
                } else if (value instanceof LocalDateTime) {
                    dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone(((LocalDateTime) value).atZone(UTC_ZONE_ID).toInstant().toEpochMilli(), UTC_ZONE_ID.getId());
                } else if (value instanceof Date) {
                    long ldtInLong = Instant.ofEpochMilli(((Date) value).getTime()).atZone(UTC_ZONE_ID).toInstant().toEpochMilli();
                    dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone(ldtInLong, UTC_ZONE_ID.getId());
                } else {
                    dateTimeWithZone = (long) value;
                }
                writer.timeStampMilliTZ(field.getName()).writeTimeStampMilliTZ(dateTimeWithZone);
            case DATEMILLI:
                if (value instanceof Date) {
                    writer.dateMilli(field.getName()).writeDateMilli(((Date) value).getTime());
                } else {
                    writer.dateMilli(field.getName()).writeDateMilli((long) value);
                }
                break;
            case DATEDAY:
                if (value instanceof Date) {
                    org.joda.time.Days days = org.joda.time.Days.daysBetween(EPOCH, new org.joda.time.DateTime(((Date) value).getTime()));
                    writer.dateDay(field.getName()).writeDateDay(days.getDays());
                } else if (value instanceof LocalDate) {
                    int days = (int) ((LocalDate) value).toEpochDay();
                    writer.dateDay(field.getName()).writeDateDay(days);
                } else if (value instanceof Long) {
                    writer.dateDay(field.getName()).writeDateDay(((Long) value).intValue());
                } else {
                    writer.dateDay(field.getName()).writeDateDay((int) value);
                }
                break;
            case FLOAT8:
                writer.float8(field.getName()).writeFloat8((double) value);
                break;
            case FLOAT4:
                writer.float4(field.getName()).writeFloat4((float) value);
                break;
            case INT:
                if (value != null && value instanceof Long) {
                    // This may seem odd at first but many frameworks (like Presto) use long as the preferred
                    // native java type for representing integers. We do this to keep type conversions simple.
                    writer.integer(field.getName()).writeInt(((Long) value).intValue());
                } else {
                    writer.integer(field.getName()).writeInt((int) value);
                }
                break;
            case TINYINT:
                writer.tinyInt(field.getName()).writeTinyInt((byte) value);
                break;
            case SMALLINT:
                writer.smallInt(field.getName()).writeSmallInt((short) value);
                break;
            case UINT1:
                writer.uInt1(field.getName()).writeUInt1((byte) value);
                break;
            case UINT2:
                writer.uInt2(field.getName()).writeUInt2((char) value);
                break;
            case UINT4:
                writer.uInt4(field.getName()).writeUInt4((int) value);
                break;
            case UINT8:
                writer.uInt8(field.getName()).writeUInt8((long) value);
                break;
            case BIGINT:
                writer.bigInt(field.getName()).writeBigInt((long) value);
                break;
            case VARBINARY:
                if (value instanceof ArrowBuf) {
                    ArrowBuf buf = (ArrowBuf) value;
                    writer.varBinary(field.getName()).writeVarBinary(0, (int) (buf.capacity()), buf);
                } else if (value instanceof byte[]) {
                    byte[] bytes = (byte[]) value;
                    try (ArrowBuf buf = allocator.buffer(bytes.length)) {
                        buf.writeBytes(bytes);
                        writer.varBinary(field.getName()).writeVarBinary(0, (int) (buf.readableBytes()), buf);
                    }
                }
                break;
            case DECIMAL:
                int scale = ((ArrowType.Decimal) type).getScale();
                int precision = ((ArrowType.Decimal) type).getPrecision();
                if (value instanceof Double) {
                    BigDecimal bdVal = new BigDecimal((double) value);
                    bdVal = bdVal.setScale(scale, RoundingMode.HALF_UP);
                    writer.decimal(field.getName(), scale, precision).writeDecimal(bdVal);
                } else {
                    BigDecimal scaledValue = ((BigDecimal) value).setScale(scale, RoundingMode.HALF_UP);
                    writer.decimal(field.getName(), scale, precision).writeDecimal(scaledValue);
                }
                break;
            case VARCHAR:
                if (value instanceof String) {
                    byte[] bytes = ((String) value).getBytes(Charsets.UTF_8);
                    try (ArrowBuf buf = allocator.buffer(bytes.length)) {
                        buf.writeBytes(bytes);
                        writer.varChar(field.getName()).writeVarChar(0, (int) (buf.readableBytes()), buf);
                    }
                } else if (value instanceof ArrowBuf) {
                    ArrowBuf buf = (ArrowBuf) value;
                    writer.varChar(field.getName()).writeVarChar(0, (int) (buf.readableBytes()), buf);
                } else if (value instanceof byte[]) {
                    byte[] bytes = (byte[]) value;
                    try (ArrowBuf buf = allocator.buffer(bytes.length)) {
                        buf.writeBytes(bytes);
                        writer.varChar(field.getName()).writeVarChar(0, (int) (buf.readableBytes()), buf);
                    }
                }
                break;
            case BIT:
                if (value instanceof Integer && (int) value > 0) {
                    writer.bit(field.getName()).writeBit(1);
                } else if (value instanceof Boolean && (boolean) value) {
                    writer.bit(field.getName()).writeBit(1);
                } else {
                    writer.bit(field.getName()).writeBit(0);
                }
                break;
            default:
                throw new IllegalArgumentException("Unknown type " + type);
        }
    } catch (RuntimeException ex) {
        throw new RuntimeException("Unable to write value for field " + field.getName() + " using value " + value, ex);
    }
}
Also used : LocalDateTime(java.time.LocalDateTime) ArrowBuf(org.apache.arrow.memory.ArrowBuf) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) LocalDate(java.time.LocalDate) Date(java.util.Date) LocalDate(java.time.LocalDate) BigDecimal(java.math.BigDecimal) BigDecimal(java.math.BigDecimal) ZonedDateTime(java.time.ZonedDateTime) VisibleForTesting(org.apache.arrow.util.VisibleForTesting)

Example 2 with ArrowBuf

use of org.apache.arrow.memory.ArrowBuf in project aws-athena-query-federation by awslabs.

the class BlockUtils method writeListValue.

/**
 * Used to write an individual value into a List field, multiple calls to this method per-cell are expected in order
 * to write the N values of a list of size N.
 *
 * @param writer The FieldWriter (already positioned at the row and list entry number) that we want to write into.
 * @param type The concrete type of the List's values.
 * @param allocator The BlockAllocator that can be used for allocating Arrow Buffers for fields which require conversion
 * to Arrow Buff before being written.
 * @param value The value to write.
 * @note This method and its Struct complement violate the DRY mantra because ListWriter and StructWriter don't share
 * a meaningful ancestor despite having identical methods. This requires us to either further wrap and abstract the writer
 * or duplicate come code. In a future release we hope to have contributed a better option to Apache Arrow which allows
 * us to simplify this method.
 */
protected static void writeListValue(FieldWriter writer, ArrowType type, BufferAllocator allocator, Object value) {
    if (value == null) {
        return;
    }
    try {
        // TODO: add all types
        switch(Types.getMinorTypeForArrowType(type)) {
            case TIMESTAMPMILLITZ:
                long dateTimeWithZone;
                if (value instanceof ZonedDateTime) {
                    dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone((ZonedDateTime) value);
                } else if (value instanceof LocalDateTime) {
                    dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone(((LocalDateTime) value).atZone(UTC_ZONE_ID).toInstant().toEpochMilli(), UTC_ZONE_ID.getId());
                } else if (value instanceof Date) {
                    long ldtInLong = Instant.ofEpochMilli(((Date) value).getTime()).atZone(UTC_ZONE_ID).toInstant().toEpochMilli();
                    dateTimeWithZone = DateTimeFormatterUtil.packDateTimeWithZone(ldtInLong, UTC_ZONE_ID.getId());
                } else {
                    dateTimeWithZone = (long) value;
                }
                writer.writeTimeStampMilliTZ(dateTimeWithZone);
            case DATEMILLI:
                if (value instanceof Date) {
                    writer.writeDateMilli(((Date) value).getTime());
                } else {
                    writer.writeDateMilli((long) value);
                }
                break;
            case DATEDAY:
                if (value instanceof Date) {
                    org.joda.time.Days days = org.joda.time.Days.daysBetween(EPOCH, new org.joda.time.DateTime(((Date) value).getTime()));
                    writer.writeDateDay(days.getDays());
                } else if (value instanceof LocalDate) {
                    int days = (int) ((LocalDate) value).toEpochDay();
                    writer.writeDateDay(days);
                } else if (value instanceof Long) {
                    writer.writeDateDay(((Long) value).intValue());
                } else {
                    writer.writeDateDay((int) value);
                }
                break;
            case FLOAT8:
                writer.float8().writeFloat8((double) value);
                break;
            case FLOAT4:
                writer.float4().writeFloat4((float) value);
                break;
            case INT:
                if (value != null && value instanceof Long) {
                    // This may seem odd at first but many frameworks (like Presto) use long as the preferred
                    // native java type for representing integers. We do this to keep type conversions simple.
                    writer.integer().writeInt(((Long) value).intValue());
                } else {
                    writer.integer().writeInt((int) value);
                }
                break;
            case TINYINT:
                writer.tinyInt().writeTinyInt((byte) value);
                break;
            case SMALLINT:
                writer.smallInt().writeSmallInt((short) value);
                break;
            case UINT1:
                writer.uInt1().writeUInt1((byte) value);
                break;
            case UINT2:
                writer.uInt2().writeUInt2((char) value);
                break;
            case UINT4:
                writer.uInt4().writeUInt4((int) value);
                break;
            case UINT8:
                writer.uInt8().writeUInt8((long) value);
                break;
            case BIGINT:
                writer.bigInt().writeBigInt((long) value);
                break;
            case VARBINARY:
                if (value instanceof ArrowBuf) {
                    ArrowBuf buf = (ArrowBuf) value;
                    writer.varBinary().writeVarBinary(0, (int) (buf.capacity()), buf);
                } else if (value instanceof byte[]) {
                    byte[] bytes = (byte[]) value;
                    try (ArrowBuf buf = allocator.buffer(bytes.length)) {
                        buf.writeBytes(bytes);
                        writer.varBinary().writeVarBinary(0, (int) (buf.readableBytes()), buf);
                    }
                }
                break;
            case DECIMAL:
                int scale = ((ArrowType.Decimal) type).getScale();
                if (value instanceof Double) {
                    int precision = ((ArrowType.Decimal) type).getPrecision();
                    BigDecimal bdVal = new BigDecimal((double) value);
                    bdVal = bdVal.setScale(scale, RoundingMode.HALF_UP);
                    writer.decimal().writeDecimal(bdVal);
                } else {
                    BigDecimal scaledValue = ((BigDecimal) value).setScale(scale, RoundingMode.HALF_UP);
                    writer.decimal().writeDecimal(scaledValue);
                }
                break;
            case VARCHAR:
                if (value instanceof ArrowBuf) {
                    ArrowBuf buf = (ArrowBuf) value;
                    writer.varChar().writeVarChar(0, (int) (buf.readableBytes()), buf);
                } else if (value instanceof byte[]) {
                    byte[] bytes = (byte[]) value;
                    try (ArrowBuf buf = allocator.buffer(bytes.length)) {
                        buf.writeBytes(bytes);
                        writer.varChar().writeVarChar(0, (int) (buf.readableBytes()), buf);
                    }
                } else {
                    // always fall back to the object's toString()
                    byte[] bytes = value.toString().getBytes(Charsets.UTF_8);
                    try (ArrowBuf buf = allocator.buffer(bytes.length)) {
                        buf.writeBytes(bytes);
                        writer.varChar().writeVarChar(0, (int) (buf.readableBytes()), buf);
                    }
                }
                break;
            case BIT:
                if (value instanceof Integer && (int) value > 0) {
                    writer.bit().writeBit(1);
                } else if (value instanceof Boolean && (boolean) value) {
                    writer.bit().writeBit(1);
                } else {
                    writer.bit().writeBit(0);
                }
                break;
            default:
                throw new IllegalArgumentException("Unknown type " + type);
        }
    } catch (RuntimeException ex) {
        String fieldName = (writer.getField() != null) ? writer.getField().getName() : "null_vector";
        throw new RuntimeException("Unable to write value for field " + fieldName + " using value " + value, ex);
    }
}
Also used : LocalDateTime(java.time.LocalDateTime) ArrowBuf(org.apache.arrow.memory.ArrowBuf) LocalDate(java.time.LocalDate) Date(java.util.Date) LocalDate(java.time.LocalDate) BigDecimal(java.math.BigDecimal) BigDecimal(java.math.BigDecimal) ZonedDateTime(java.time.ZonedDateTime)

Example 3 with ArrowBuf

use of org.apache.arrow.memory.ArrowBuf in project aws-athena-query-federation by awslabs.

the class BlockAllocatorImpl method createBuffer.

/**
 * Creates an ArrowBuf and registers it for later clean up if the ArrowBuff isn't explicitly closed by the caller.
 *
 * @see com.amazonaws.athena.connector.lambda.data.BlockAllocator
 */
public ArrowBuf createBuffer(int size) {
    ArrowBuf buffer = null;
    try {
        buffer = rootAllocator.buffer(size);
        arrowBufs.add(buffer);
        return buffer;
    } catch (Exception ex) {
        if (buffer != null) {
            buffer.close();
        }
        throw ex;
    }
}
Also used : ArrowBuf(org.apache.arrow.memory.ArrowBuf)

Example 4 with ArrowBuf

use of org.apache.arrow.memory.ArrowBuf in project aws-athena-query-federation by awslabs.

the class ExampleRecordHandler method makeFactory.

/**
 * Since GeneratedRowWriter doesn't yet support complex types (STRUCT, LIST) we use this to
 * create our own FieldWriters via customer FieldWriterFactory. In this case we are producing
 * FieldWriters that only work for our exact example schema. This will be enhanced with a more
 * generic solution in a future release.
 */
private FieldWriterFactory makeFactory(Field field, RowContext rowContext) {
    Types.MinorType fieldType = Types.getMinorTypeForArrowType(field.getType());
    switch(fieldType) {
        case LIST:
            Field child = field.getChildren().get(0);
            Types.MinorType childType = Types.getMinorTypeForArrowType(child.getType());
            switch(childType) {
                case LIST:
                    return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
                        UnionListWriter writer = ((ListVector) vector).getWriter();
                        writer.setPosition(rowNum);
                        writer.startList();
                        BaseWriter.ListWriter innerWriter = writer.list();
                        innerWriter.startList();
                        for (int i = 0; i < 3; i++) {
                            byte[] bytes = String.valueOf(1000 + i).getBytes(Charsets.UTF_8);
                            try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
                                buf.writeBytes(bytes);
                                innerWriter.varChar().writeVarChar(0, (int) (buf.readableBytes()), buf);
                            }
                        }
                        innerWriter.endList();
                        writer.endList();
                        ((ListVector) vector).setNotNull(rowNum);
                        return true;
                    };
                case STRUCT:
                    return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
                        UnionListWriter writer = ((ListVector) vector).getWriter();
                        writer.setPosition(rowNum);
                        writer.startList();
                        BaseWriter.StructWriter structWriter = writer.struct();
                        structWriter.start();
                        byte[] bytes = "chars".getBytes(Charsets.UTF_8);
                        try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
                            buf.writeBytes(bytes);
                            structWriter.varChar("varchar").writeVarChar(0, (int) (buf.readableBytes()), buf);
                        }
                        structWriter.bigInt("bigint").writeBigInt(100L);
                        structWriter.end();
                        writer.endList();
                        ((ListVector) vector).setNotNull(rowNum);
                        return true;
                    };
                default:
                    throw new IllegalArgumentException("Unsupported type " + childType);
            }
        case MAP:
            return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
                UnionMapWriter writer = ((MapVector) vector).getWriter();
                writer.setPosition(rowNum);
                writer.startMap();
                writer.startEntry();
                byte[] bytes = "chars".getBytes(Charsets.UTF_8);
                try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
                    buf.writeBytes(bytes);
                    writer.key().varChar("key").writeVarChar(0, (int) (buf.readableBytes()), buf);
                }
                writer.value().integer("value").writeInt(1001);
                writer.endEntry();
                writer.endMap();
                ((MapVector) vector).setNotNull(rowNum);
                return true;
            };
        default:
            throw new IllegalArgumentException("Unsupported type " + fieldType);
    }
}
Also used : Types(org.apache.arrow.vector.types.Types) BaseWriter(org.apache.arrow.vector.complex.writer.BaseWriter) ArrowBuf(org.apache.arrow.memory.ArrowBuf) ConstraintProjector(com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintProjector) FieldVector(org.apache.arrow.vector.FieldVector) FieldWriter(com.amazonaws.athena.connector.lambda.data.writers.fieldwriters.FieldWriter) Field(org.apache.arrow.vector.types.pojo.Field) UnionMapWriter(org.apache.arrow.vector.complex.impl.UnionMapWriter) UnionListWriter(org.apache.arrow.vector.complex.impl.UnionListWriter) ListVector(org.apache.arrow.vector.complex.ListVector) BigIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.BigIntExtractor) DecimalExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DecimalExtractor) DateDayExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DateDayExtractor) TinyIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.TinyIntExtractor) VarBinaryExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.VarBinaryExtractor) BitExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.BitExtractor) IntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.IntExtractor) Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Extractor) Float8Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Float8Extractor) SmallIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.SmallIntExtractor) VarCharExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.VarCharExtractor) Float4Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Float4Extractor) DateMilliExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DateMilliExtractor) MapVector(org.apache.arrow.vector.complex.MapVector)

Example 5 with ArrowBuf

use of org.apache.arrow.memory.ArrowBuf in project hive by apache.

the class Deserializer method readList.

private void readList(FieldVector arrowVector, ListColumnVector hiveVector, ListTypeInfo typeInfo) {
    final int size = arrowVector.getValueCount();
    hiveVector.ensureSize(size, false);
    final ArrowBuf offsets = arrowVector.getOffsetBuffer();
    final int OFFSET_WIDTH = 4;
    read(arrowVector.getChildrenFromFields().get(0), hiveVector.child, typeInfo.getListElementTypeInfo());
    for (int i = 0; i < size; i++) {
        if (arrowVector.isNull(i)) {
            VectorizedBatchUtil.setNullColIsNullValue(hiveVector, i);
        } else {
            hiveVector.isNull[i] = false;
            final int offset = offsets.getInt(i * OFFSET_WIDTH);
            hiveVector.offsets[i] = offset;
            hiveVector.lengths[i] = offsets.getInt((i + 1) * OFFSET_WIDTH) - offset;
        }
    }
}
Also used : ArrowBuf(org.apache.arrow.memory.ArrowBuf)

Aggregations

ArrowBuf (org.apache.arrow.memory.ArrowBuf)13 BigDecimal (java.math.BigDecimal)3 LocalDate (java.time.LocalDate)3 LocalDateTime (java.time.LocalDateTime)3 ZonedDateTime (java.time.ZonedDateTime)3 Date (java.util.Date)3 RootAllocator (org.apache.arrow.memory.RootAllocator)3 VisibleForTesting (org.apache.arrow.util.VisibleForTesting)3 FlatBufferBuilder (com.google.flatbuffers.FlatBufferBuilder)2 Table (io.deephaven.engine.table.Table)2 SafeCloseable (io.deephaven.util.SafeCloseable)2 ByteBuffer (java.nio.ByteBuffer)2 ListVector (org.apache.arrow.vector.complex.ListVector)2 ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)2 Test (org.junit.Test)2 BigIntExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.BigIntExtractor)1 BitExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.BitExtractor)1 DateDayExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.DateDayExtractor)1 DateMilliExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.DateMilliExtractor)1 DecimalExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.DecimalExtractor)1