Search in sources :

Example 6 with VarCharVector

use of org.apache.arrow.vector.VarCharVector in project flink by apache.

the class ArrowUtils method createArrowFieldWriterForRow.

private static ArrowFieldWriter<RowData> createArrowFieldWriterForRow(ValueVector vector, LogicalType fieldType) {
    if (vector instanceof TinyIntVector) {
        return TinyIntWriter.forRow((TinyIntVector) vector);
    } else if (vector instanceof SmallIntVector) {
        return SmallIntWriter.forRow((SmallIntVector) vector);
    } else if (vector instanceof IntVector) {
        return IntWriter.forRow((IntVector) vector);
    } else if (vector instanceof BigIntVector) {
        return BigIntWriter.forRow((BigIntVector) vector);
    } else if (vector instanceof BitVector) {
        return BooleanWriter.forRow((BitVector) vector);
    } else if (vector instanceof Float4Vector) {
        return FloatWriter.forRow((Float4Vector) vector);
    } else if (vector instanceof Float8Vector) {
        return DoubleWriter.forRow((Float8Vector) vector);
    } else if (vector instanceof VarCharVector) {
        return VarCharWriter.forRow((VarCharVector) vector);
    } else if (vector instanceof VarBinaryVector) {
        return VarBinaryWriter.forRow((VarBinaryVector) vector);
    } else if (vector instanceof DecimalVector) {
        DecimalVector decimalVector = (DecimalVector) vector;
        return DecimalWriter.forRow(decimalVector, getPrecision(decimalVector), decimalVector.getScale());
    } else if (vector instanceof DateDayVector) {
        return DateWriter.forRow((DateDayVector) vector);
    } else if (vector instanceof TimeSecVector || vector instanceof TimeMilliVector || vector instanceof TimeMicroVector || vector instanceof TimeNanoVector) {
        return TimeWriter.forRow(vector);
    } else if (vector instanceof TimeStampVector && ((ArrowType.Timestamp) vector.getField().getType()).getTimezone() == null) {
        int precision;
        if (fieldType instanceof LocalZonedTimestampType) {
            precision = ((LocalZonedTimestampType) fieldType).getPrecision();
        } else {
            precision = ((TimestampType) fieldType).getPrecision();
        }
        return TimestampWriter.forRow(vector, precision);
    } else if (vector instanceof ListVector) {
        ListVector listVector = (ListVector) vector;
        LogicalType elementType = ((ArrayType) fieldType).getElementType();
        return ArrayWriter.forRow(listVector, createArrowFieldWriterForArray(listVector.getDataVector(), elementType));
    } else if (vector instanceof StructVector) {
        RowType rowType = (RowType) fieldType;
        ArrowFieldWriter<RowData>[] fieldsWriters = new ArrowFieldWriter[rowType.getFieldCount()];
        for (int i = 0; i < fieldsWriters.length; i++) {
            fieldsWriters[i] = createArrowFieldWriterForRow(((StructVector) vector).getVectorById(i), rowType.getTypeAt(i));
        }
        return RowWriter.forRow((StructVector) vector, fieldsWriters);
    } else {
        throw new UnsupportedOperationException(String.format("Unsupported type %s.", fieldType));
    }
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) Float4Vector(org.apache.arrow.vector.Float4Vector) TimeSecVector(org.apache.arrow.vector.TimeSecVector) TimeMilliVector(org.apache.arrow.vector.TimeMilliVector) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) ArrowFieldWriter(org.apache.flink.table.runtime.arrow.writers.ArrowFieldWriter) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) DateDayVector(org.apache.arrow.vector.DateDayVector) DecimalVector(org.apache.arrow.vector.DecimalVector) TimeNanoVector(org.apache.arrow.vector.TimeNanoVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) BigIntVector(org.apache.arrow.vector.BigIntVector) TimeStampVector(org.apache.arrow.vector.TimeStampVector) TimeMicroVector(org.apache.arrow.vector.TimeMicroVector) ListVector(org.apache.arrow.vector.complex.ListVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector)

Example 7 with VarCharVector

use of org.apache.arrow.vector.VarCharVector in project beam by apache.

the class ArrowConversionTest method rowIterator.

@Test
public void rowIterator() {
    org.apache.arrow.vector.types.pojo.Schema schema = new org.apache.arrow.vector.types.pojo.Schema(asList(field("int32", new ArrowType.Int(32, true)), field("float64", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), field("string", new ArrowType.Utf8()), field("timestampMicroUTC", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC")), field("timestampMilliUTC", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")), field("int32_list", new ArrowType.List(), field("int32s", new ArrowType.Int(32, true))), field("boolean", new ArrowType.Bool()), field("fixed_size_binary", new ArrowType.FixedSizeBinary(3))));
    Schema beamSchema = ArrowConversion.ArrowSchemaTranslator.toBeamSchema(schema);
    VectorSchemaRoot expectedSchemaRoot = VectorSchemaRoot.create(schema, allocator);
    expectedSchemaRoot.allocateNew();
    expectedSchemaRoot.setRowCount(16);
    IntVector intVector = (IntVector) expectedSchemaRoot.getFieldVectors().get(0);
    Float8Vector floatVector = (Float8Vector) expectedSchemaRoot.getFieldVectors().get(1);
    VarCharVector strVector = (VarCharVector) expectedSchemaRoot.getFieldVectors().get(2);
    TimeStampMicroTZVector timestampMicroUtcVector = (TimeStampMicroTZVector) expectedSchemaRoot.getFieldVectors().get(3);
    TimeStampMilliTZVector timeStampMilliTZVector = (TimeStampMilliTZVector) expectedSchemaRoot.getFieldVectors().get(4);
    ListVector int32ListVector = (ListVector) expectedSchemaRoot.getFieldVectors().get(5);
    IntVector int32ListElementVector = int32ListVector.<IntVector>addOrGetVector(new org.apache.arrow.vector.types.pojo.FieldType(false, new ArrowType.Int(32, true), null)).getVector();
    BitVector boolVector = (BitVector) expectedSchemaRoot.getFieldVectors().get(6);
    FixedSizeBinaryVector fixedSizeBinaryVector = (FixedSizeBinaryVector) expectedSchemaRoot.getFieldVectors().get(7);
    ArrayList<Row> expectedRows = new ArrayList<>();
    for (int i = 0; i < 16; i++) {
        DateTime dt = new DateTime(2019, 1, i + 1, i, i, i, DateTimeZone.UTC);
        expectedRows.add(Row.withSchema(beamSchema).addValues(i, i + .1 * i, "" + i, dt, dt, ImmutableList.of(i), (i % 2) != 0, new byte[] { (byte) i, (byte) (i + 1), (byte) (i + 2) }).build());
        intVector.set(i, i);
        floatVector.set(i, i + .1 * i);
        strVector.set(i, new Text("" + i));
        timestampMicroUtcVector.set(i, dt.getMillis() * 1000);
        timeStampMilliTZVector.set(i, dt.getMillis());
        int32ListVector.startNewValue(i);
        int32ListElementVector.set(i, i);
        int32ListVector.endValue(i, 1);
        boolVector.set(i, i % 2);
        fixedSizeBinaryVector.set(i, new byte[] { (byte) i, (byte) (i + 1), (byte) (i + 2) });
    }
    assertThat(ImmutableList.copyOf(ArrowConversion.rowsFromRecordBatch(beamSchema, expectedSchemaRoot)), IsIterableContainingInOrder.contains(expectedRows.stream().map((row) -> equalTo(row)).collect(ImmutableList.toImmutableList())));
    expectedSchemaRoot.close();
}
Also used : IsIterableContainingInOrder(org.hamcrest.collection.IsIterableContainingInOrder) DateTimeZone(org.joda.time.DateTimeZone) Float8Vector(org.apache.arrow.vector.Float8Vector) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) RunWith(org.junit.runner.RunWith) TimeUnit(org.apache.arrow.vector.types.TimeUnit) Text(org.apache.arrow.vector.util.Text) TimeStampMilliTZVector(org.apache.arrow.vector.TimeStampMilliTZVector) ArrayList(java.util.ArrayList) FloatingPointPrecision(org.apache.arrow.vector.types.FloatingPointPrecision) Arrays.asList(java.util.Arrays.asList) RootAllocator(org.apache.arrow.memory.RootAllocator) After(org.junit.After) ListVector(org.apache.arrow.vector.complex.ListVector) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) FixedSizeBinaryVector(org.apache.arrow.vector.FixedSizeBinaryVector) Row(org.apache.beam.sdk.values.Row) BufferAllocator(org.apache.arrow.memory.BufferAllocator) Before(org.junit.Before) Field(org.apache.beam.sdk.schemas.Schema.Field) IntVector(org.apache.arrow.vector.IntVector) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) DateTime(org.joda.time.DateTime) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) Schema(org.apache.beam.sdk.schemas.Schema) Matchers.equalTo(org.hamcrest.Matchers.equalTo) VarCharVector(org.apache.arrow.vector.VarCharVector) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) BitVector(org.apache.arrow.vector.BitVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) BitVector(org.apache.arrow.vector.BitVector) Schema(org.apache.beam.sdk.schemas.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) TimeStampMilliTZVector(org.apache.arrow.vector.TimeStampMilliTZVector) IntVector(org.apache.arrow.vector.IntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) Text(org.apache.arrow.vector.util.Text) FixedSizeBinaryVector(org.apache.arrow.vector.FixedSizeBinaryVector) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) ListVector(org.apache.arrow.vector.complex.ListVector) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 8 with VarCharVector

use of org.apache.arrow.vector.VarCharVector in project twister2 by DSC-SPIDAL.

the class ArrowAllToAll method onReceive.

@Override
public void onReceive(int source, ChannelBuffer buffer, int length) {
    PendingReceiveTable table = receives.get(source);
    receivedBuffers++;
    ArrowBuf buf = ((ArrowChannelBuffer) buffer).getArrowBuf();
    table.buffers.add(buf);
    if (table.bufferIndex == 0) {
        table.fieldNodes.add(new ArrowFieldNode(table.noArray, 0));
    }
    VectorSchemaRoot schemaRoot = table.root;
    List<FieldVector> fieldVectors = schemaRoot.getFieldVectors();
    // we received everything for this array
    if (table.noBuffers == table.bufferIndex + 1) {
        FieldVector fieldVector = fieldVectors.get(table.columnIndex);
        loadBuffers(fieldVector, fieldVector.getField(), table.buffers.iterator(), table.fieldNodes.iterator());
        table.arrays.add(fieldVector);
        table.buffers.clear();
        if (table.arrays.size() == schemaRoot.getFieldVectors().size()) {
            List<ArrowColumn> columns = new ArrayList<>();
            // create the table
            for (FieldVector v : fieldVectors) {
                ArrowColumn c;
                if (v instanceof BaseFixedWidthVector) {
                    if (v instanceof IntVector) {
                        c = new Int4Column((IntVector) v);
                    } else if (v instanceof Float4Vector) {
                        c = new Float4Column((Float4Vector) v);
                    } else if (v instanceof Float8Vector) {
                        c = new Float8Column((Float8Vector) v);
                    } else if (v instanceof UInt8Vector) {
                        c = new Int8Column((UInt8Vector) v);
                    } else if (v instanceof UInt2Vector) {
                        c = new UInt2Column((UInt2Vector) v);
                    } else {
                        throw new RuntimeException("Un-supported type : " + v.getClass().getName());
                    }
                } else if (v instanceof BaseVariableWidthVector) {
                    if (v instanceof VarCharVector) {
                        c = new StringColumn((VarCharVector) v);
                    } else if (v instanceof VarBinaryVector) {
                        c = new BinaryColumn((VarBinaryVector) v);
                    } else {
                        throw new RuntimeException("Un-supported type : " + v.getClass().getName());
                    }
                } else {
                    throw new RuntimeException("Un-supported type : " + v.getClass().getName());
                }
                columns.add(c);
            }
            Table t = new ArrowTable(schemaRoot.getSchema(), table.noArray, columns);
            LOG.info("Received table from source " + source + " to " + table.target + " count" + t.rowCount());
            recvCallback.onReceive(source, table.target, t);
            table.clear();
        }
    }
}
Also used : BaseFixedWidthVector(org.apache.arrow.vector.BaseFixedWidthVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) ArrowBuf(io.netty.buffer.ArrowBuf) Float4Vector(org.apache.arrow.vector.Float4Vector) BinaryColumn(edu.iu.dsc.tws.common.table.arrow.BinaryColumn) ArrayList(java.util.ArrayList) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) ArrowColumn(edu.iu.dsc.tws.common.table.ArrowColumn) BaseVariableWidthVector(org.apache.arrow.vector.BaseVariableWidthVector) ArrowFieldNode(org.apache.arrow.vector.ipc.message.ArrowFieldNode) Int8Column(edu.iu.dsc.tws.common.table.arrow.Int8Column) StringColumn(edu.iu.dsc.tws.common.table.arrow.StringColumn) Table(edu.iu.dsc.tws.common.table.Table) ArrowTable(edu.iu.dsc.tws.common.table.arrow.ArrowTable) IntVector(org.apache.arrow.vector.IntVector) UInt2Column(edu.iu.dsc.tws.common.table.arrow.UInt2Column) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) FieldVector(org.apache.arrow.vector.FieldVector) Float4Column(edu.iu.dsc.tws.common.table.arrow.Float4Column) UInt8Vector(org.apache.arrow.vector.UInt8Vector) Float8Column(edu.iu.dsc.tws.common.table.arrow.Float8Column) Int4Column(edu.iu.dsc.tws.common.table.arrow.Int4Column) ArrowTable(edu.iu.dsc.tws.common.table.arrow.ArrowTable) UInt2Vector(org.apache.arrow.vector.UInt2Vector)

Example 9 with VarCharVector

use of org.apache.arrow.vector.VarCharVector in project carbondata by apache.

the class ArrowWriter method createFieldWriter.

private static ArrowFieldWriter createFieldWriter(ValueVector valueVector) {
    if (valueVector instanceof BitVector) {
        return new BooleanWriter((BitVector) valueVector);
    } else if (valueVector instanceof TinyIntVector) {
        return new ByteWriter((TinyIntVector) valueVector);
    } else if (valueVector instanceof SmallIntVector) {
        return new ShortWriter((SmallIntVector) valueVector);
    } else if (valueVector instanceof IntVector) {
        return new IntWriter((IntVector) valueVector);
    } else if (valueVector instanceof BigIntVector) {
        return new LongWriter((BigIntVector) valueVector);
    } else if (valueVector instanceof DecimalVector) {
        DecimalVector decimalVector = (DecimalVector) valueVector;
        final Field field = decimalVector.getField();
        ArrowType.Decimal c = (ArrowType.Decimal) field.getType();
        return new DecimalWriter((DecimalVector) valueVector, c.getPrecision(), c.getScale());
    } else if (valueVector instanceof VarCharVector) {
        return new StringWriter((VarCharVector) valueVector);
    } else if (valueVector instanceof Float4Vector) {
        return new FloatWriter((Float4Vector) valueVector);
    } else if (valueVector instanceof Float8Vector) {
        return new DoubleWriter((Float8Vector) valueVector);
    } else if (valueVector instanceof ListVector) {
        ArrowFieldWriter elementVector = createFieldWriter(((ListVector) valueVector).getDataVector());
        return new ArrayWriter((ListVector) valueVector, elementVector);
    } else if (valueVector instanceof StructVector) {
        StructVector s = (StructVector) valueVector;
        List<ArrowFieldWriter> arrowFieldWriters = new ArrayList<>();
        for (int i = 0; i < s.size(); i++) {
            arrowFieldWriters.add(createFieldWriter(s.getChildByOrdinal(i)));
        }
        return new StructWriter(s, arrowFieldWriters.toArray(new ArrowFieldWriter[arrowFieldWriters.size()]));
    } else if (valueVector instanceof VarBinaryVector) {
        return new BinaryWriter((VarBinaryVector) valueVector);
    } else if (valueVector instanceof DateDayVector) {
        return new DateWriter((DateDayVector) valueVector);
    } else if (valueVector instanceof TimeStampMicroTZVector) {
        return new TimeStampWriter((TimeStampMicroTZVector) valueVector);
    } else {
        throw new UnsupportedOperationException("Invalid data type");
    }
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) Float4Vector(org.apache.arrow.vector.Float4Vector) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) ArrayList(java.util.ArrayList) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) Field(org.apache.arrow.vector.types.pojo.Field) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) DateDayVector(org.apache.arrow.vector.DateDayVector) DecimalVector(org.apache.arrow.vector.DecimalVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) BigIntVector(org.apache.arrow.vector.BigIntVector) ListVector(org.apache.arrow.vector.complex.ListVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector)

Aggregations

VarCharVector (org.apache.arrow.vector.VarCharVector)9 Float8Vector (org.apache.arrow.vector.Float8Vector)7 IntVector (org.apache.arrow.vector.IntVector)7 BigIntVector (org.apache.arrow.vector.BigIntVector)6 BitVector (org.apache.arrow.vector.BitVector)6 Float4Vector (org.apache.arrow.vector.Float4Vector)6 VarBinaryVector (org.apache.arrow.vector.VarBinaryVector)6 DateDayVector (org.apache.arrow.vector.DateDayVector)5 SmallIntVector (org.apache.arrow.vector.SmallIntVector)5 TinyIntVector (org.apache.arrow.vector.TinyIntVector)5 ListVector (org.apache.arrow.vector.complex.ListVector)5 DecimalVector (org.apache.arrow.vector.DecimalVector)4 StructVector (org.apache.arrow.vector.complex.StructVector)4 ArrayList (java.util.ArrayList)3 TimeMicroVector (org.apache.arrow.vector.TimeMicroVector)3 TimeMilliVector (org.apache.arrow.vector.TimeMilliVector)3 TimeNanoVector (org.apache.arrow.vector.TimeNanoVector)3 TimeSecVector (org.apache.arrow.vector.TimeSecVector)3 TimeStampVector (org.apache.arrow.vector.TimeStampVector)3 VectorSchemaRoot (org.apache.arrow.vector.VectorSchemaRoot)3