Search in sources :

Example 6 with ListVector

use of org.apache.arrow.vector.complex.ListVector in project flink by apache.

the class ArrowUtils method createArrowFieldWriterForRow.

private static ArrowFieldWriter<RowData> createArrowFieldWriterForRow(ValueVector vector, LogicalType fieldType) {
    if (vector instanceof TinyIntVector) {
        return TinyIntWriter.forRow((TinyIntVector) vector);
    } else if (vector instanceof SmallIntVector) {
        return SmallIntWriter.forRow((SmallIntVector) vector);
    } else if (vector instanceof IntVector) {
        return IntWriter.forRow((IntVector) vector);
    } else if (vector instanceof BigIntVector) {
        return BigIntWriter.forRow((BigIntVector) vector);
    } else if (vector instanceof BitVector) {
        return BooleanWriter.forRow((BitVector) vector);
    } else if (vector instanceof Float4Vector) {
        return FloatWriter.forRow((Float4Vector) vector);
    } else if (vector instanceof Float8Vector) {
        return DoubleWriter.forRow((Float8Vector) vector);
    } else if (vector instanceof VarCharVector) {
        return VarCharWriter.forRow((VarCharVector) vector);
    } else if (vector instanceof VarBinaryVector) {
        return VarBinaryWriter.forRow((VarBinaryVector) vector);
    } else if (vector instanceof DecimalVector) {
        DecimalVector decimalVector = (DecimalVector) vector;
        return DecimalWriter.forRow(decimalVector, getPrecision(decimalVector), decimalVector.getScale());
    } else if (vector instanceof DateDayVector) {
        return DateWriter.forRow((DateDayVector) vector);
    } else if (vector instanceof TimeSecVector || vector instanceof TimeMilliVector || vector instanceof TimeMicroVector || vector instanceof TimeNanoVector) {
        return TimeWriter.forRow(vector);
    } else if (vector instanceof TimeStampVector && ((ArrowType.Timestamp) vector.getField().getType()).getTimezone() == null) {
        int precision;
        if (fieldType instanceof LocalZonedTimestampType) {
            precision = ((LocalZonedTimestampType) fieldType).getPrecision();
        } else {
            precision = ((TimestampType) fieldType).getPrecision();
        }
        return TimestampWriter.forRow(vector, precision);
    } else if (vector instanceof ListVector) {
        ListVector listVector = (ListVector) vector;
        LogicalType elementType = ((ArrayType) fieldType).getElementType();
        return ArrayWriter.forRow(listVector, createArrowFieldWriterForArray(listVector.getDataVector(), elementType));
    } else if (vector instanceof StructVector) {
        RowType rowType = (RowType) fieldType;
        ArrowFieldWriter<RowData>[] fieldsWriters = new ArrowFieldWriter[rowType.getFieldCount()];
        for (int i = 0; i < fieldsWriters.length; i++) {
            fieldsWriters[i] = createArrowFieldWriterForRow(((StructVector) vector).getVectorById(i), rowType.getTypeAt(i));
        }
        return RowWriter.forRow((StructVector) vector, fieldsWriters);
    } else {
        throw new UnsupportedOperationException(String.format("Unsupported type %s.", fieldType));
    }
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) Float4Vector(org.apache.arrow.vector.Float4Vector) TimeSecVector(org.apache.arrow.vector.TimeSecVector) TimeMilliVector(org.apache.arrow.vector.TimeMilliVector) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) ArrowFieldWriter(org.apache.flink.table.runtime.arrow.writers.ArrowFieldWriter) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) DateDayVector(org.apache.arrow.vector.DateDayVector) DecimalVector(org.apache.arrow.vector.DecimalVector) TimeNanoVector(org.apache.arrow.vector.TimeNanoVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) BigIntVector(org.apache.arrow.vector.BigIntVector) TimeStampVector(org.apache.arrow.vector.TimeStampVector) TimeMicroVector(org.apache.arrow.vector.TimeMicroVector) ListVector(org.apache.arrow.vector.complex.ListVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector)

Example 7 with ListVector

use of org.apache.arrow.vector.complex.ListVector in project beam by apache.

the class ArrowConversionTest method rowIterator.

@Test
public void rowIterator() {
    org.apache.arrow.vector.types.pojo.Schema schema = new org.apache.arrow.vector.types.pojo.Schema(asList(field("int32", new ArrowType.Int(32, true)), field("float64", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), field("string", new ArrowType.Utf8()), field("timestampMicroUTC", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC")), field("timestampMilliUTC", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")), field("int32_list", new ArrowType.List(), field("int32s", new ArrowType.Int(32, true))), field("boolean", new ArrowType.Bool()), field("fixed_size_binary", new ArrowType.FixedSizeBinary(3))));
    Schema beamSchema = ArrowConversion.ArrowSchemaTranslator.toBeamSchema(schema);
    VectorSchemaRoot expectedSchemaRoot = VectorSchemaRoot.create(schema, allocator);
    expectedSchemaRoot.allocateNew();
    expectedSchemaRoot.setRowCount(16);
    IntVector intVector = (IntVector) expectedSchemaRoot.getFieldVectors().get(0);
    Float8Vector floatVector = (Float8Vector) expectedSchemaRoot.getFieldVectors().get(1);
    VarCharVector strVector = (VarCharVector) expectedSchemaRoot.getFieldVectors().get(2);
    TimeStampMicroTZVector timestampMicroUtcVector = (TimeStampMicroTZVector) expectedSchemaRoot.getFieldVectors().get(3);
    TimeStampMilliTZVector timeStampMilliTZVector = (TimeStampMilliTZVector) expectedSchemaRoot.getFieldVectors().get(4);
    ListVector int32ListVector = (ListVector) expectedSchemaRoot.getFieldVectors().get(5);
    IntVector int32ListElementVector = int32ListVector.<IntVector>addOrGetVector(new org.apache.arrow.vector.types.pojo.FieldType(false, new ArrowType.Int(32, true), null)).getVector();
    BitVector boolVector = (BitVector) expectedSchemaRoot.getFieldVectors().get(6);
    FixedSizeBinaryVector fixedSizeBinaryVector = (FixedSizeBinaryVector) expectedSchemaRoot.getFieldVectors().get(7);
    ArrayList<Row> expectedRows = new ArrayList<>();
    for (int i = 0; i < 16; i++) {
        DateTime dt = new DateTime(2019, 1, i + 1, i, i, i, DateTimeZone.UTC);
        expectedRows.add(Row.withSchema(beamSchema).addValues(i, i + .1 * i, "" + i, dt, dt, ImmutableList.of(i), (i % 2) != 0, new byte[] { (byte) i, (byte) (i + 1), (byte) (i + 2) }).build());
        intVector.set(i, i);
        floatVector.set(i, i + .1 * i);
        strVector.set(i, new Text("" + i));
        timestampMicroUtcVector.set(i, dt.getMillis() * 1000);
        timeStampMilliTZVector.set(i, dt.getMillis());
        int32ListVector.startNewValue(i);
        int32ListElementVector.set(i, i);
        int32ListVector.endValue(i, 1);
        boolVector.set(i, i % 2);
        fixedSizeBinaryVector.set(i, new byte[] { (byte) i, (byte) (i + 1), (byte) (i + 2) });
    }
    assertThat(ImmutableList.copyOf(ArrowConversion.rowsFromRecordBatch(beamSchema, expectedSchemaRoot)), IsIterableContainingInOrder.contains(expectedRows.stream().map((row) -> equalTo(row)).collect(ImmutableList.toImmutableList())));
    expectedSchemaRoot.close();
}
Also used : IsIterableContainingInOrder(org.hamcrest.collection.IsIterableContainingInOrder) DateTimeZone(org.joda.time.DateTimeZone) Float8Vector(org.apache.arrow.vector.Float8Vector) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) RunWith(org.junit.runner.RunWith) TimeUnit(org.apache.arrow.vector.types.TimeUnit) Text(org.apache.arrow.vector.util.Text) TimeStampMilliTZVector(org.apache.arrow.vector.TimeStampMilliTZVector) ArrayList(java.util.ArrayList) FloatingPointPrecision(org.apache.arrow.vector.types.FloatingPointPrecision) Arrays.asList(java.util.Arrays.asList) RootAllocator(org.apache.arrow.memory.RootAllocator) After(org.junit.After) ListVector(org.apache.arrow.vector.complex.ListVector) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) FixedSizeBinaryVector(org.apache.arrow.vector.FixedSizeBinaryVector) Row(org.apache.beam.sdk.values.Row) BufferAllocator(org.apache.arrow.memory.BufferAllocator) Before(org.junit.Before) Field(org.apache.beam.sdk.schemas.Schema.Field) IntVector(org.apache.arrow.vector.IntVector) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) DateTime(org.joda.time.DateTime) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) Schema(org.apache.beam.sdk.schemas.Schema) Matchers.equalTo(org.hamcrest.Matchers.equalTo) VarCharVector(org.apache.arrow.vector.VarCharVector) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) BitVector(org.apache.arrow.vector.BitVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) BitVector(org.apache.arrow.vector.BitVector) Schema(org.apache.beam.sdk.schemas.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) TimeStampMilliTZVector(org.apache.arrow.vector.TimeStampMilliTZVector) IntVector(org.apache.arrow.vector.IntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) Text(org.apache.arrow.vector.util.Text) FixedSizeBinaryVector(org.apache.arrow.vector.FixedSizeBinaryVector) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) ListVector(org.apache.arrow.vector.complex.ListVector) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 8 with ListVector

use of org.apache.arrow.vector.complex.ListVector in project carbondata by apache.

the class ArrowWriter method createFieldWriter.

private static ArrowFieldWriter createFieldWriter(ValueVector valueVector) {
    if (valueVector instanceof BitVector) {
        return new BooleanWriter((BitVector) valueVector);
    } else if (valueVector instanceof TinyIntVector) {
        return new ByteWriter((TinyIntVector) valueVector);
    } else if (valueVector instanceof SmallIntVector) {
        return new ShortWriter((SmallIntVector) valueVector);
    } else if (valueVector instanceof IntVector) {
        return new IntWriter((IntVector) valueVector);
    } else if (valueVector instanceof BigIntVector) {
        return new LongWriter((BigIntVector) valueVector);
    } else if (valueVector instanceof DecimalVector) {
        DecimalVector decimalVector = (DecimalVector) valueVector;
        final Field field = decimalVector.getField();
        ArrowType.Decimal c = (ArrowType.Decimal) field.getType();
        return new DecimalWriter((DecimalVector) valueVector, c.getPrecision(), c.getScale());
    } else if (valueVector instanceof VarCharVector) {
        return new StringWriter((VarCharVector) valueVector);
    } else if (valueVector instanceof Float4Vector) {
        return new FloatWriter((Float4Vector) valueVector);
    } else if (valueVector instanceof Float8Vector) {
        return new DoubleWriter((Float8Vector) valueVector);
    } else if (valueVector instanceof ListVector) {
        ArrowFieldWriter elementVector = createFieldWriter(((ListVector) valueVector).getDataVector());
        return new ArrayWriter((ListVector) valueVector, elementVector);
    } else if (valueVector instanceof StructVector) {
        StructVector s = (StructVector) valueVector;
        List<ArrowFieldWriter> arrowFieldWriters = new ArrayList<>();
        for (int i = 0; i < s.size(); i++) {
            arrowFieldWriters.add(createFieldWriter(s.getChildByOrdinal(i)));
        }
        return new StructWriter(s, arrowFieldWriters.toArray(new ArrowFieldWriter[arrowFieldWriters.size()]));
    } else if (valueVector instanceof VarBinaryVector) {
        return new BinaryWriter((VarBinaryVector) valueVector);
    } else if (valueVector instanceof DateDayVector) {
        return new DateWriter((DateDayVector) valueVector);
    } else if (valueVector instanceof TimeStampMicroTZVector) {
        return new TimeStampWriter((TimeStampMicroTZVector) valueVector);
    } else {
        throw new UnsupportedOperationException("Invalid data type");
    }
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) Float4Vector(org.apache.arrow.vector.Float4Vector) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) ArrayList(java.util.ArrayList) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) Field(org.apache.arrow.vector.types.pojo.Field) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) DateDayVector(org.apache.arrow.vector.DateDayVector) DecimalVector(org.apache.arrow.vector.DecimalVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) BigIntVector(org.apache.arrow.vector.BigIntVector) ListVector(org.apache.arrow.vector.complex.ListVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector)

Aggregations

ListVector (org.apache.arrow.vector.complex.ListVector)8 BitVector (org.apache.arrow.vector.BitVector)5 Float8Vector (org.apache.arrow.vector.Float8Vector)5 IntVector (org.apache.arrow.vector.IntVector)5 VarCharVector (org.apache.arrow.vector.VarCharVector)5 StructVector (org.apache.arrow.vector.complex.StructVector)5 BigIntVector (org.apache.arrow.vector.BigIntVector)4 DateDayVector (org.apache.arrow.vector.DateDayVector)4 DecimalVector (org.apache.arrow.vector.DecimalVector)4 Float4Vector (org.apache.arrow.vector.Float4Vector)4 SmallIntVector (org.apache.arrow.vector.SmallIntVector)4 TinyIntVector (org.apache.arrow.vector.TinyIntVector)4 VarBinaryVector (org.apache.arrow.vector.VarBinaryVector)4 TimeMicroVector (org.apache.arrow.vector.TimeMicroVector)3 TimeMilliVector (org.apache.arrow.vector.TimeMilliVector)3 TimeNanoVector (org.apache.arrow.vector.TimeNanoVector)3 TimeSecVector (org.apache.arrow.vector.TimeSecVector)3 TimeStampVector (org.apache.arrow.vector.TimeStampVector)3 RowType (org.apache.flink.table.types.logical.RowType)3 DataflowException (edu.uci.ics.texera.api.exception.DataflowException)2