Search in sources :

Example 6 with IntVector

use of org.apache.arrow.vector.IntVector in project beam by apache.

the class ArrowConversionTest method rowIterator.

@Test
public void rowIterator() {
    org.apache.arrow.vector.types.pojo.Schema schema = new org.apache.arrow.vector.types.pojo.Schema(asList(field("int32", new ArrowType.Int(32, true)), field("float64", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), field("string", new ArrowType.Utf8()), field("timestampMicroUTC", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC")), field("timestampMilliUTC", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")), field("int32_list", new ArrowType.List(), field("int32s", new ArrowType.Int(32, true))), field("boolean", new ArrowType.Bool()), field("fixed_size_binary", new ArrowType.FixedSizeBinary(3))));
    Schema beamSchema = ArrowConversion.ArrowSchemaTranslator.toBeamSchema(schema);
    VectorSchemaRoot expectedSchemaRoot = VectorSchemaRoot.create(schema, allocator);
    expectedSchemaRoot.allocateNew();
    expectedSchemaRoot.setRowCount(16);
    IntVector intVector = (IntVector) expectedSchemaRoot.getFieldVectors().get(0);
    Float8Vector floatVector = (Float8Vector) expectedSchemaRoot.getFieldVectors().get(1);
    VarCharVector strVector = (VarCharVector) expectedSchemaRoot.getFieldVectors().get(2);
    TimeStampMicroTZVector timestampMicroUtcVector = (TimeStampMicroTZVector) expectedSchemaRoot.getFieldVectors().get(3);
    TimeStampMilliTZVector timeStampMilliTZVector = (TimeStampMilliTZVector) expectedSchemaRoot.getFieldVectors().get(4);
    ListVector int32ListVector = (ListVector) expectedSchemaRoot.getFieldVectors().get(5);
    IntVector int32ListElementVector = int32ListVector.<IntVector>addOrGetVector(new org.apache.arrow.vector.types.pojo.FieldType(false, new ArrowType.Int(32, true), null)).getVector();
    BitVector boolVector = (BitVector) expectedSchemaRoot.getFieldVectors().get(6);
    FixedSizeBinaryVector fixedSizeBinaryVector = (FixedSizeBinaryVector) expectedSchemaRoot.getFieldVectors().get(7);
    ArrayList<Row> expectedRows = new ArrayList<>();
    for (int i = 0; i < 16; i++) {
        DateTime dt = new DateTime(2019, 1, i + 1, i, i, i, DateTimeZone.UTC);
        expectedRows.add(Row.withSchema(beamSchema).addValues(i, i + .1 * i, "" + i, dt, dt, ImmutableList.of(i), (i % 2) != 0, new byte[] { (byte) i, (byte) (i + 1), (byte) (i + 2) }).build());
        intVector.set(i, i);
        floatVector.set(i, i + .1 * i);
        strVector.set(i, new Text("" + i));
        timestampMicroUtcVector.set(i, dt.getMillis() * 1000);
        timeStampMilliTZVector.set(i, dt.getMillis());
        int32ListVector.startNewValue(i);
        int32ListElementVector.set(i, i);
        int32ListVector.endValue(i, 1);
        boolVector.set(i, i % 2);
        fixedSizeBinaryVector.set(i, new byte[] { (byte) i, (byte) (i + 1), (byte) (i + 2) });
    }
    assertThat(ImmutableList.copyOf(ArrowConversion.rowsFromRecordBatch(beamSchema, expectedSchemaRoot)), IsIterableContainingInOrder.contains(expectedRows.stream().map((row) -> equalTo(row)).collect(ImmutableList.toImmutableList())));
    expectedSchemaRoot.close();
}
Also used : IsIterableContainingInOrder(org.hamcrest.collection.IsIterableContainingInOrder) DateTimeZone(org.joda.time.DateTimeZone) Float8Vector(org.apache.arrow.vector.Float8Vector) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) RunWith(org.junit.runner.RunWith) TimeUnit(org.apache.arrow.vector.types.TimeUnit) Text(org.apache.arrow.vector.util.Text) TimeStampMilliTZVector(org.apache.arrow.vector.TimeStampMilliTZVector) ArrayList(java.util.ArrayList) FloatingPointPrecision(org.apache.arrow.vector.types.FloatingPointPrecision) Arrays.asList(java.util.Arrays.asList) RootAllocator(org.apache.arrow.memory.RootAllocator) After(org.junit.After) ListVector(org.apache.arrow.vector.complex.ListVector) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) FixedSizeBinaryVector(org.apache.arrow.vector.FixedSizeBinaryVector) Row(org.apache.beam.sdk.values.Row) BufferAllocator(org.apache.arrow.memory.BufferAllocator) Before(org.junit.Before) Field(org.apache.beam.sdk.schemas.Schema.Field) IntVector(org.apache.arrow.vector.IntVector) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) DateTime(org.joda.time.DateTime) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) Schema(org.apache.beam.sdk.schemas.Schema) Matchers.equalTo(org.hamcrest.Matchers.equalTo) VarCharVector(org.apache.arrow.vector.VarCharVector) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) BitVector(org.apache.arrow.vector.BitVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) BitVector(org.apache.arrow.vector.BitVector) Schema(org.apache.beam.sdk.schemas.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) TimeStampMilliTZVector(org.apache.arrow.vector.TimeStampMilliTZVector) IntVector(org.apache.arrow.vector.IntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) Text(org.apache.arrow.vector.util.Text) FixedSizeBinaryVector(org.apache.arrow.vector.FixedSizeBinaryVector) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) ListVector(org.apache.arrow.vector.complex.ListVector) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 7 with IntVector

use of org.apache.arrow.vector.IntVector in project twister2 by DSC-SPIDAL.

the class ArrowAllToAll method onReceive.

@Override
public void onReceive(int source, ChannelBuffer buffer, int length) {
    PendingReceiveTable table = receives.get(source);
    receivedBuffers++;
    ArrowBuf buf = ((ArrowChannelBuffer) buffer).getArrowBuf();
    table.buffers.add(buf);
    if (table.bufferIndex == 0) {
        table.fieldNodes.add(new ArrowFieldNode(table.noArray, 0));
    }
    VectorSchemaRoot schemaRoot = table.root;
    List<FieldVector> fieldVectors = schemaRoot.getFieldVectors();
    // we received everything for this array
    if (table.noBuffers == table.bufferIndex + 1) {
        FieldVector fieldVector = fieldVectors.get(table.columnIndex);
        loadBuffers(fieldVector, fieldVector.getField(), table.buffers.iterator(), table.fieldNodes.iterator());
        table.arrays.add(fieldVector);
        table.buffers.clear();
        if (table.arrays.size() == schemaRoot.getFieldVectors().size()) {
            List<ArrowColumn> columns = new ArrayList<>();
            // create the table
            for (FieldVector v : fieldVectors) {
                ArrowColumn c;
                if (v instanceof BaseFixedWidthVector) {
                    if (v instanceof IntVector) {
                        c = new Int4Column((IntVector) v);
                    } else if (v instanceof Float4Vector) {
                        c = new Float4Column((Float4Vector) v);
                    } else if (v instanceof Float8Vector) {
                        c = new Float8Column((Float8Vector) v);
                    } else if (v instanceof UInt8Vector) {
                        c = new Int8Column((UInt8Vector) v);
                    } else if (v instanceof UInt2Vector) {
                        c = new UInt2Column((UInt2Vector) v);
                    } else {
                        throw new RuntimeException("Un-supported type : " + v.getClass().getName());
                    }
                } else if (v instanceof BaseVariableWidthVector) {
                    if (v instanceof VarCharVector) {
                        c = new StringColumn((VarCharVector) v);
                    } else if (v instanceof VarBinaryVector) {
                        c = new BinaryColumn((VarBinaryVector) v);
                    } else {
                        throw new RuntimeException("Un-supported type : " + v.getClass().getName());
                    }
                } else {
                    throw new RuntimeException("Un-supported type : " + v.getClass().getName());
                }
                columns.add(c);
            }
            Table t = new ArrowTable(schemaRoot.getSchema(), table.noArray, columns);
            LOG.info("Received table from source " + source + " to " + table.target + " count" + t.rowCount());
            recvCallback.onReceive(source, table.target, t);
            table.clear();
        }
    }
}
Also used : BaseFixedWidthVector(org.apache.arrow.vector.BaseFixedWidthVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) ArrowBuf(io.netty.buffer.ArrowBuf) Float4Vector(org.apache.arrow.vector.Float4Vector) BinaryColumn(edu.iu.dsc.tws.common.table.arrow.BinaryColumn) ArrayList(java.util.ArrayList) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) ArrowColumn(edu.iu.dsc.tws.common.table.ArrowColumn) BaseVariableWidthVector(org.apache.arrow.vector.BaseVariableWidthVector) ArrowFieldNode(org.apache.arrow.vector.ipc.message.ArrowFieldNode) Int8Column(edu.iu.dsc.tws.common.table.arrow.Int8Column) StringColumn(edu.iu.dsc.tws.common.table.arrow.StringColumn) Table(edu.iu.dsc.tws.common.table.Table) ArrowTable(edu.iu.dsc.tws.common.table.arrow.ArrowTable) IntVector(org.apache.arrow.vector.IntVector) UInt2Column(edu.iu.dsc.tws.common.table.arrow.UInt2Column) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) FieldVector(org.apache.arrow.vector.FieldVector) Float4Column(edu.iu.dsc.tws.common.table.arrow.Float4Column) UInt8Vector(org.apache.arrow.vector.UInt8Vector) Float8Column(edu.iu.dsc.tws.common.table.arrow.Float8Column) Int4Column(edu.iu.dsc.tws.common.table.arrow.Int4Column) ArrowTable(edu.iu.dsc.tws.common.table.arrow.ArrowTable) UInt2Vector(org.apache.arrow.vector.UInt2Vector)

Example 8 with IntVector

use of org.apache.arrow.vector.IntVector in project carbondata by apache.

the class ArrowWriter method createFieldWriter.

private static ArrowFieldWriter createFieldWriter(ValueVector valueVector) {
    if (valueVector instanceof BitVector) {
        return new BooleanWriter((BitVector) valueVector);
    } else if (valueVector instanceof TinyIntVector) {
        return new ByteWriter((TinyIntVector) valueVector);
    } else if (valueVector instanceof SmallIntVector) {
        return new ShortWriter((SmallIntVector) valueVector);
    } else if (valueVector instanceof IntVector) {
        return new IntWriter((IntVector) valueVector);
    } else if (valueVector instanceof BigIntVector) {
        return new LongWriter((BigIntVector) valueVector);
    } else if (valueVector instanceof DecimalVector) {
        DecimalVector decimalVector = (DecimalVector) valueVector;
        final Field field = decimalVector.getField();
        ArrowType.Decimal c = (ArrowType.Decimal) field.getType();
        return new DecimalWriter((DecimalVector) valueVector, c.getPrecision(), c.getScale());
    } else if (valueVector instanceof VarCharVector) {
        return new StringWriter((VarCharVector) valueVector);
    } else if (valueVector instanceof Float4Vector) {
        return new FloatWriter((Float4Vector) valueVector);
    } else if (valueVector instanceof Float8Vector) {
        return new DoubleWriter((Float8Vector) valueVector);
    } else if (valueVector instanceof ListVector) {
        ArrowFieldWriter elementVector = createFieldWriter(((ListVector) valueVector).getDataVector());
        return new ArrayWriter((ListVector) valueVector, elementVector);
    } else if (valueVector instanceof StructVector) {
        StructVector s = (StructVector) valueVector;
        List<ArrowFieldWriter> arrowFieldWriters = new ArrayList<>();
        for (int i = 0; i < s.size(); i++) {
            arrowFieldWriters.add(createFieldWriter(s.getChildByOrdinal(i)));
        }
        return new StructWriter(s, arrowFieldWriters.toArray(new ArrowFieldWriter[arrowFieldWriters.size()]));
    } else if (valueVector instanceof VarBinaryVector) {
        return new BinaryWriter((VarBinaryVector) valueVector);
    } else if (valueVector instanceof DateDayVector) {
        return new DateWriter((DateDayVector) valueVector);
    } else if (valueVector instanceof TimeStampMicroTZVector) {
        return new TimeStampWriter((TimeStampMicroTZVector) valueVector);
    } else {
        throw new UnsupportedOperationException("Invalid data type");
    }
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) Float4Vector(org.apache.arrow.vector.Float4Vector) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) ArrayList(java.util.ArrayList) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) Field(org.apache.arrow.vector.types.pojo.Field) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) DateDayVector(org.apache.arrow.vector.DateDayVector) DecimalVector(org.apache.arrow.vector.DecimalVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) BigIntVector(org.apache.arrow.vector.BigIntVector) ListVector(org.apache.arrow.vector.complex.ListVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector)

Aggregations

Float8Vector (org.apache.arrow.vector.Float8Vector)8 IntVector (org.apache.arrow.vector.IntVector)8 VarCharVector (org.apache.arrow.vector.VarCharVector)7 BitVector (org.apache.arrow.vector.BitVector)6 Float4Vector (org.apache.arrow.vector.Float4Vector)6 VarBinaryVector (org.apache.arrow.vector.VarBinaryVector)6 BigIntVector (org.apache.arrow.vector.BigIntVector)5 DateDayVector (org.apache.arrow.vector.DateDayVector)5 SmallIntVector (org.apache.arrow.vector.SmallIntVector)5 TinyIntVector (org.apache.arrow.vector.TinyIntVector)5 ListVector (org.apache.arrow.vector.complex.ListVector)5 DecimalVector (org.apache.arrow.vector.DecimalVector)4 StructVector (org.apache.arrow.vector.complex.StructVector)4 ArrayList (java.util.ArrayList)3 TimeMicroVector (org.apache.arrow.vector.TimeMicroVector)3 TimeMilliVector (org.apache.arrow.vector.TimeMilliVector)3 TimeNanoVector (org.apache.arrow.vector.TimeNanoVector)3 TimeSecVector (org.apache.arrow.vector.TimeSecVector)3 TimeStampVector (org.apache.arrow.vector.TimeStampVector)3 RowType (org.apache.flink.table.types.logical.RowType)3