Search in sources :

Example 1 with UnionListWriter

use of org.apache.arrow.vector.complex.impl.UnionListWriter in project aws-athena-query-federation by awslabs.

the class ExampleRecordHandler method makeFactory.

/**
 * Since GeneratedRowWriter doesn't yet support complex types (STRUCT, LIST) we use this to
 * create our own FieldWriters via customer FieldWriterFactory. In this case we are producing
 * FieldWriters that only work for our exact example schema. This will be enhanced with a more
 * generic solution in a future release.
 */
private FieldWriterFactory makeFactory(Field field, RowContext rowContext) {
    Types.MinorType fieldType = Types.getMinorTypeForArrowType(field.getType());
    switch(fieldType) {
        case LIST:
            Field child = field.getChildren().get(0);
            Types.MinorType childType = Types.getMinorTypeForArrowType(child.getType());
            switch(childType) {
                case LIST:
                    return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
                        UnionListWriter writer = ((ListVector) vector).getWriter();
                        writer.setPosition(rowNum);
                        writer.startList();
                        BaseWriter.ListWriter innerWriter = writer.list();
                        innerWriter.startList();
                        for (int i = 0; i < 3; i++) {
                            byte[] bytes = String.valueOf(1000 + i).getBytes(Charsets.UTF_8);
                            try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
                                buf.writeBytes(bytes);
                                innerWriter.varChar().writeVarChar(0, (int) (buf.readableBytes()), buf);
                            }
                        }
                        innerWriter.endList();
                        writer.endList();
                        ((ListVector) vector).setNotNull(rowNum);
                        return true;
                    };
                case STRUCT:
                    return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
                        UnionListWriter writer = ((ListVector) vector).getWriter();
                        writer.setPosition(rowNum);
                        writer.startList();
                        BaseWriter.StructWriter structWriter = writer.struct();
                        structWriter.start();
                        byte[] bytes = "chars".getBytes(Charsets.UTF_8);
                        try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
                            buf.writeBytes(bytes);
                            structWriter.varChar("varchar").writeVarChar(0, (int) (buf.readableBytes()), buf);
                        }
                        structWriter.bigInt("bigint").writeBigInt(100L);
                        structWriter.end();
                        writer.endList();
                        ((ListVector) vector).setNotNull(rowNum);
                        return true;
                    };
                default:
                    throw new IllegalArgumentException("Unsupported type " + childType);
            }
        case MAP:
            return (FieldVector vector, Extractor extractor, ConstraintProjector constraint) -> (FieldWriter) (Object context, int rowNum) -> {
                UnionMapWriter writer = ((MapVector) vector).getWriter();
                writer.setPosition(rowNum);
                writer.startMap();
                writer.startEntry();
                byte[] bytes = "chars".getBytes(Charsets.UTF_8);
                try (ArrowBuf buf = vector.getAllocator().buffer(bytes.length)) {
                    buf.writeBytes(bytes);
                    writer.key().varChar("key").writeVarChar(0, (int) (buf.readableBytes()), buf);
                }
                writer.value().integer("value").writeInt(1001);
                writer.endEntry();
                writer.endMap();
                ((MapVector) vector).setNotNull(rowNum);
                return true;
            };
        default:
            throw new IllegalArgumentException("Unsupported type " + fieldType);
    }
}
Also used : Types(org.apache.arrow.vector.types.Types) BaseWriter(org.apache.arrow.vector.complex.writer.BaseWriter) ArrowBuf(org.apache.arrow.memory.ArrowBuf) ConstraintProjector(com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintProjector) FieldVector(org.apache.arrow.vector.FieldVector) FieldWriter(com.amazonaws.athena.connector.lambda.data.writers.fieldwriters.FieldWriter) Field(org.apache.arrow.vector.types.pojo.Field) UnionMapWriter(org.apache.arrow.vector.complex.impl.UnionMapWriter) UnionListWriter(org.apache.arrow.vector.complex.impl.UnionListWriter) ListVector(org.apache.arrow.vector.complex.ListVector) BigIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.BigIntExtractor) DecimalExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DecimalExtractor) DateDayExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DateDayExtractor) TinyIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.TinyIntExtractor) VarBinaryExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.VarBinaryExtractor) BitExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.BitExtractor) IntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.IntExtractor) Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Extractor) Float8Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Float8Extractor) SmallIntExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.SmallIntExtractor) VarCharExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.VarCharExtractor) Float4Extractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.Float4Extractor) DateMilliExtractor(com.amazonaws.athena.connector.lambda.data.writers.extractors.DateMilliExtractor) MapVector(org.apache.arrow.vector.complex.MapVector)

Example 2 with UnionListWriter

use of org.apache.arrow.vector.complex.impl.UnionListWriter in project aws-athena-query-federation by awslabs.

the class BlockUtils method setComplexValue.

/**
 * Used to set complex values (Struct, List, etc...) on the provided FieldVector.
 *
 * @param vector The FieldVector into which we should write the provided value.
 * @param pos The row number that the value should be written to.
 * @param resolver The FieldResolver that can be used to map your value to the complex type (mostly for Structs, Maps).
 * @param value The value to write.
 * @note This method incurs more Object overhead (heap churn) than using Arrow's native interface. Users of this Utility
 * should weigh their performance needs vs. the readability / ease of use.
 */
public static void setComplexValue(FieldVector vector, int pos, FieldResolver resolver, Object value) {
    if (vector instanceof MapVector) {
        UnionMapWriter writer = ((MapVector) vector).getWriter();
        writer.setPosition(pos);
        writeMap(vector.getAllocator(), writer, vector.getField(), pos, value, resolver);
        writer.endMap();
    } else if (vector instanceof ListVector) {
        if (value != null) {
            UnionListWriter writer = ((ListVector) vector).getWriter();
            writer.setPosition(pos);
            writeList(vector.getAllocator(), writer, vector.getField(), pos, ((List) value), resolver);
            ((ListVector) vector).setNotNull(pos);
        }
    } else if (vector instanceof StructVector) {
        StructWriter writer = ((StructVector) vector).getWriter();
        writer.setPosition(pos);
        writeStruct(vector.getAllocator(), writer, vector.getField(), pos, value, resolver);
    } else {
        throw new RuntimeException("Unsupported 'Complex' vector " + vector.getClass().getSimpleName() + " for field " + vector.getField().getName());
    }
}
Also used : StructWriter(org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter) StructVector(org.apache.arrow.vector.complex.StructVector) UnionMapWriter(org.apache.arrow.vector.complex.impl.UnionMapWriter) ListVector(org.apache.arrow.vector.complex.ListVector) UnionListWriter(org.apache.arrow.vector.complex.impl.UnionListWriter) MapVector(org.apache.arrow.vector.complex.MapVector)

Example 3 with UnionListWriter

use of org.apache.arrow.vector.complex.impl.UnionListWriter in project aws-athena-query-federation by awslabs.

the class BlockUtils method unsetRow.

/**
 * In some filtering situations it can be useful to 'unset' a row as an indication to a later processing stage
 * that the row is irrelevant. The mechanism by which we 'unset' a row is actually field type specific and as such
 * this method is not supported for all field types.
 *
 * @param row The row number to unset in the provided Block.
 * @param block The Block where we'd like to unset the specified row.
 */
public static void unsetRow(int row, Block block) {
    for (FieldVector vector : block.getFieldVectors()) {
        switch(vector.getMinorType()) {
            case TIMESTAMPMILLITZ:
                ((TimeStampMilliTZVector) vector).setNull(row);
                break;
            case DATEDAY:
                ((DateDayVector) vector).setNull(row);
                break;
            case DATEMILLI:
                ((DateMilliVector) vector).setNull(row);
                break;
            case TINYINT:
                ((TinyIntVector) vector).setNull(row);
                break;
            case UINT1:
                ((UInt1Vector) vector).setNull(row);
                break;
            case SMALLINT:
                ((SmallIntVector) vector).setNull(row);
                break;
            case UINT2:
                ((UInt2Vector) vector).setNull(row);
                break;
            case UINT4:
                ((UInt4Vector) vector).setNull(row);
                break;
            case INT:
                ((IntVector) vector).setNull(row);
                break;
            case UINT8:
                ((UInt8Vector) vector).setNull(row);
                break;
            case BIGINT:
                ((BigIntVector) vector).setNull(row);
                break;
            case FLOAT4:
                ((Float4Vector) vector).setNull(row);
                break;
            case FLOAT8:
                ((Float8Vector) vector).setNull(row);
                break;
            case DECIMAL:
                ((DecimalVector) vector).setNull(row);
                break;
            case VARBINARY:
                ((VarBinaryVector) vector).setNull(row);
                break;
            case VARCHAR:
                ((VarCharVector) vector).setNull(row);
                break;
            case BIT:
                ((BitVector) vector).setNull(row);
                break;
            case STRUCT:
                ((StructVector) vector).setNull(row);
                break;
            case LIST:
                UnionListWriter writer = ((ListVector) vector).getWriter();
                writer.setPosition(row);
                writer.startList();
                writer.endList();
                writer.setValueCount(0);
                break;
            case MAP:
                ((MapVector) vector).setNull(row);
                break;
            default:
                throw new IllegalArgumentException("Unknown type " + vector.getMinorType());
        }
    }
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float4Vector(org.apache.arrow.vector.Float4Vector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) FieldVector(org.apache.arrow.vector.FieldVector) UInt8Vector(org.apache.arrow.vector.UInt8Vector) UInt4Vector(org.apache.arrow.vector.UInt4Vector) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) BigIntVector(org.apache.arrow.vector.BigIntVector) UnionListWriter(org.apache.arrow.vector.complex.impl.UnionListWriter) ListVector(org.apache.arrow.vector.complex.ListVector) DateMilliVector(org.apache.arrow.vector.DateMilliVector) DateDayVector(org.apache.arrow.vector.DateDayVector) UInt1Vector(org.apache.arrow.vector.UInt1Vector) UInt2Vector(org.apache.arrow.vector.UInt2Vector) DecimalVector(org.apache.arrow.vector.DecimalVector) TimeStampMilliTZVector(org.apache.arrow.vector.TimeStampMilliTZVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) MapVector(org.apache.arrow.vector.complex.MapVector)

Aggregations

ListVector (org.apache.arrow.vector.complex.ListVector)3 MapVector (org.apache.arrow.vector.complex.MapVector)3 UnionListWriter (org.apache.arrow.vector.complex.impl.UnionListWriter)3 FieldVector (org.apache.arrow.vector.FieldVector)2 StructVector (org.apache.arrow.vector.complex.StructVector)2 UnionMapWriter (org.apache.arrow.vector.complex.impl.UnionMapWriter)2 BigIntExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.BigIntExtractor)1 BitExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.BitExtractor)1 DateDayExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.DateDayExtractor)1 DateMilliExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.DateMilliExtractor)1 DecimalExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.DecimalExtractor)1 Extractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.Extractor)1 Float4Extractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.Float4Extractor)1 Float8Extractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.Float8Extractor)1 IntExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.IntExtractor)1 SmallIntExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.SmallIntExtractor)1 TinyIntExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.TinyIntExtractor)1 VarBinaryExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.VarBinaryExtractor)1 VarCharExtractor (com.amazonaws.athena.connector.lambda.data.writers.extractors.VarCharExtractor)1 FieldWriter (com.amazonaws.athena.connector.lambda.data.writers.fieldwriters.FieldWriter)1