Search in sources :

Example 11 with VectorSchemaRoot

use of org.apache.arrow.vector.VectorSchemaRoot in project textdb by TextDB.

the class PythonUDFOpExec method readArrowStream.

/**
 * For every batch, the operator gets the computed sentiment result by calling
 * {@link FlightClient#getStream(Ticket, CallOption...)}.
 * The reading and conversion process is the same as what it does when using Arrow file.
 * {@code getStream} is a non-blocking call, but this method is a blocking call because it waits until the stream
 * is finished.
 *
 * @param client      The FlightClient that manages this.
 * @param channel     The predefined path that specifies where to read the data in Flight Serve.
 * @param resultQueue resultQueue To store the results. Must be empty when it is passed here.
 */
private void readArrowStream(FlightClient client, Channel channel, Queue<Tuple> resultQueue) {
    try {
        FlightInfo info = client.getInfo(FlightDescriptor.path(Collections.singletonList(channel.name)));
        Ticket ticket = info.getEndpoints().get(0).getTicket();
        FlightStream stream = client.getStream(ticket);
        while (stream.next()) {
            // get root
            VectorSchemaRoot root = stream.getRoot();
            for (int i = 0; i < root.getRowCount(); i++) {
                resultQueue.add(ArrowUtils.getTexeraTuple(i, root));
            }
            root.clear();
        }
    } catch (RuntimeException e) {
        System.out.println("NO SUCH FLIGHT!!");
        closeAndThrow(client, e);
    }
}
Also used : VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot)

Example 12 with VectorSchemaRoot

use of org.apache.arrow.vector.VectorSchemaRoot in project flink by apache.

the class ArrowUtilsTest method testCreateArrowWriter.

@Test
public void testCreateArrowWriter() {
    VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
    ArrowWriter<RowData> writer = ArrowUtils.createRowDataArrowWriter(root, rowType);
    ArrowFieldWriter<RowData>[] fieldWriters = writer.getFieldWriters();
    for (int i = 0; i < fieldWriters.length; i++) {
        assertEquals(testFields.get(i).f3, fieldWriters[i].getClass());
    }
}
Also used : VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) ArrowFieldWriter(org.apache.flink.table.runtime.arrow.writers.ArrowFieldWriter) Test(org.junit.Test)

Example 13 with VectorSchemaRoot

use of org.apache.arrow.vector.VectorSchemaRoot in project flink by apache.

the class ArrowUtilsTest method testReadArrowBatches.

@Test
public void testReadArrowBatches() throws IOException {
    VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
    ArrowWriter<RowData> arrowWriter = ArrowUtils.createRowDataArrowWriter(root, rowType);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, baos);
    arrowStreamWriter.start();
    List<RowData> testData = Arrays.asList(new GenericRowData(rowType.getFieldCount()), new GenericRowData(rowType.getFieldCount()), new GenericRowData(rowType.getFieldCount()), new GenericRowData(rowType.getFieldCount()), new GenericRowData(rowType.getFieldCount()));
    int batches = 3;
    List<List<RowData>> subLists = Lists.partition(testData, testData.size() / batches + 1);
    for (List<RowData> subList : subLists) {
        for (RowData value : subList) {
            arrowWriter.write(value);
        }
        arrowWriter.finish();
        arrowStreamWriter.writeBatch();
        arrowWriter.reset();
    }
    assertEquals(batches, ArrowUtils.readArrowBatches(Channels.newChannel(new ByteArrayInputStream(baos.toByteArray()))).length);
}
Also used : VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) ByteArrayInputStream(java.io.ByteArrayInputStream) GenericRowData(org.apache.flink.table.data.GenericRowData) List(java.util.List) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ArrowStreamWriter(org.apache.arrow.vector.ipc.ArrowStreamWriter) Test(org.junit.Test)

Example 14 with VectorSchemaRoot

use of org.apache.arrow.vector.VectorSchemaRoot in project beam by apache.

the class ArrowConversionTest method rowIterator.

@Test
public void rowIterator() {
    org.apache.arrow.vector.types.pojo.Schema schema = new org.apache.arrow.vector.types.pojo.Schema(asList(field("int32", new ArrowType.Int(32, true)), field("float64", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), field("string", new ArrowType.Utf8()), field("timestampMicroUTC", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC")), field("timestampMilliUTC", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")), field("int32_list", new ArrowType.List(), field("int32s", new ArrowType.Int(32, true))), field("boolean", new ArrowType.Bool()), field("fixed_size_binary", new ArrowType.FixedSizeBinary(3))));
    Schema beamSchema = ArrowConversion.ArrowSchemaTranslator.toBeamSchema(schema);
    VectorSchemaRoot expectedSchemaRoot = VectorSchemaRoot.create(schema, allocator);
    expectedSchemaRoot.allocateNew();
    expectedSchemaRoot.setRowCount(16);
    IntVector intVector = (IntVector) expectedSchemaRoot.getFieldVectors().get(0);
    Float8Vector floatVector = (Float8Vector) expectedSchemaRoot.getFieldVectors().get(1);
    VarCharVector strVector = (VarCharVector) expectedSchemaRoot.getFieldVectors().get(2);
    TimeStampMicroTZVector timestampMicroUtcVector = (TimeStampMicroTZVector) expectedSchemaRoot.getFieldVectors().get(3);
    TimeStampMilliTZVector timeStampMilliTZVector = (TimeStampMilliTZVector) expectedSchemaRoot.getFieldVectors().get(4);
    ListVector int32ListVector = (ListVector) expectedSchemaRoot.getFieldVectors().get(5);
    IntVector int32ListElementVector = int32ListVector.<IntVector>addOrGetVector(new org.apache.arrow.vector.types.pojo.FieldType(false, new ArrowType.Int(32, true), null)).getVector();
    BitVector boolVector = (BitVector) expectedSchemaRoot.getFieldVectors().get(6);
    FixedSizeBinaryVector fixedSizeBinaryVector = (FixedSizeBinaryVector) expectedSchemaRoot.getFieldVectors().get(7);
    ArrayList<Row> expectedRows = new ArrayList<>();
    for (int i = 0; i < 16; i++) {
        DateTime dt = new DateTime(2019, 1, i + 1, i, i, i, DateTimeZone.UTC);
        expectedRows.add(Row.withSchema(beamSchema).addValues(i, i + .1 * i, "" + i, dt, dt, ImmutableList.of(i), (i % 2) != 0, new byte[] { (byte) i, (byte) (i + 1), (byte) (i + 2) }).build());
        intVector.set(i, i);
        floatVector.set(i, i + .1 * i);
        strVector.set(i, new Text("" + i));
        timestampMicroUtcVector.set(i, dt.getMillis() * 1000);
        timeStampMilliTZVector.set(i, dt.getMillis());
        int32ListVector.startNewValue(i);
        int32ListElementVector.set(i, i);
        int32ListVector.endValue(i, 1);
        boolVector.set(i, i % 2);
        fixedSizeBinaryVector.set(i, new byte[] { (byte) i, (byte) (i + 1), (byte) (i + 2) });
    }
    assertThat(ImmutableList.copyOf(ArrowConversion.rowsFromRecordBatch(beamSchema, expectedSchemaRoot)), IsIterableContainingInOrder.contains(expectedRows.stream().map((row) -> equalTo(row)).collect(ImmutableList.toImmutableList())));
    expectedSchemaRoot.close();
}
Also used : IsIterableContainingInOrder(org.hamcrest.collection.IsIterableContainingInOrder) DateTimeZone(org.joda.time.DateTimeZone) Float8Vector(org.apache.arrow.vector.Float8Vector) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) RunWith(org.junit.runner.RunWith) TimeUnit(org.apache.arrow.vector.types.TimeUnit) Text(org.apache.arrow.vector.util.Text) TimeStampMilliTZVector(org.apache.arrow.vector.TimeStampMilliTZVector) ArrayList(java.util.ArrayList) FloatingPointPrecision(org.apache.arrow.vector.types.FloatingPointPrecision) Arrays.asList(java.util.Arrays.asList) RootAllocator(org.apache.arrow.memory.RootAllocator) After(org.junit.After) ListVector(org.apache.arrow.vector.complex.ListVector) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) FixedSizeBinaryVector(org.apache.arrow.vector.FixedSizeBinaryVector) Row(org.apache.beam.sdk.values.Row) BufferAllocator(org.apache.arrow.memory.BufferAllocator) Before(org.junit.Before) Field(org.apache.beam.sdk.schemas.Schema.Field) IntVector(org.apache.arrow.vector.IntVector) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) DateTime(org.joda.time.DateTime) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) Schema(org.apache.beam.sdk.schemas.Schema) Matchers.equalTo(org.hamcrest.Matchers.equalTo) VarCharVector(org.apache.arrow.vector.VarCharVector) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) BitVector(org.apache.arrow.vector.BitVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) BitVector(org.apache.arrow.vector.BitVector) Schema(org.apache.beam.sdk.schemas.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) TimeStampMilliTZVector(org.apache.arrow.vector.TimeStampMilliTZVector) IntVector(org.apache.arrow.vector.IntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) Text(org.apache.arrow.vector.util.Text) FixedSizeBinaryVector(org.apache.arrow.vector.FixedSizeBinaryVector) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) ListVector(org.apache.arrow.vector.complex.ListVector) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Example 15 with VectorSchemaRoot

use of org.apache.arrow.vector.VectorSchemaRoot in project twister2 by DSC-SPIDAL.

the class ArrowAllToAll method onReceive.

@Override
public void onReceive(int source, ChannelBuffer buffer, int length) {
    PendingReceiveTable table = receives.get(source);
    receivedBuffers++;
    ArrowBuf buf = ((ArrowChannelBuffer) buffer).getArrowBuf();
    table.buffers.add(buf);
    if (table.bufferIndex == 0) {
        table.fieldNodes.add(new ArrowFieldNode(table.noArray, 0));
    }
    VectorSchemaRoot schemaRoot = table.root;
    List<FieldVector> fieldVectors = schemaRoot.getFieldVectors();
    // we received everything for this array
    if (table.noBuffers == table.bufferIndex + 1) {
        FieldVector fieldVector = fieldVectors.get(table.columnIndex);
        loadBuffers(fieldVector, fieldVector.getField(), table.buffers.iterator(), table.fieldNodes.iterator());
        table.arrays.add(fieldVector);
        table.buffers.clear();
        if (table.arrays.size() == schemaRoot.getFieldVectors().size()) {
            List<ArrowColumn> columns = new ArrayList<>();
            // create the table
            for (FieldVector v : fieldVectors) {
                ArrowColumn c;
                if (v instanceof BaseFixedWidthVector) {
                    if (v instanceof IntVector) {
                        c = new Int4Column((IntVector) v);
                    } else if (v instanceof Float4Vector) {
                        c = new Float4Column((Float4Vector) v);
                    } else if (v instanceof Float8Vector) {
                        c = new Float8Column((Float8Vector) v);
                    } else if (v instanceof UInt8Vector) {
                        c = new Int8Column((UInt8Vector) v);
                    } else if (v instanceof UInt2Vector) {
                        c = new UInt2Column((UInt2Vector) v);
                    } else {
                        throw new RuntimeException("Un-supported type : " + v.getClass().getName());
                    }
                } else if (v instanceof BaseVariableWidthVector) {
                    if (v instanceof VarCharVector) {
                        c = new StringColumn((VarCharVector) v);
                    } else if (v instanceof VarBinaryVector) {
                        c = new BinaryColumn((VarBinaryVector) v);
                    } else {
                        throw new RuntimeException("Un-supported type : " + v.getClass().getName());
                    }
                } else {
                    throw new RuntimeException("Un-supported type : " + v.getClass().getName());
                }
                columns.add(c);
            }
            Table t = new ArrowTable(schemaRoot.getSchema(), table.noArray, columns);
            LOG.info("Received table from source " + source + " to " + table.target + " count" + t.rowCount());
            recvCallback.onReceive(source, table.target, t);
            table.clear();
        }
    }
}
Also used : BaseFixedWidthVector(org.apache.arrow.vector.BaseFixedWidthVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) ArrowBuf(io.netty.buffer.ArrowBuf) Float4Vector(org.apache.arrow.vector.Float4Vector) BinaryColumn(edu.iu.dsc.tws.common.table.arrow.BinaryColumn) ArrayList(java.util.ArrayList) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) ArrowColumn(edu.iu.dsc.tws.common.table.ArrowColumn) BaseVariableWidthVector(org.apache.arrow.vector.BaseVariableWidthVector) ArrowFieldNode(org.apache.arrow.vector.ipc.message.ArrowFieldNode) Int8Column(edu.iu.dsc.tws.common.table.arrow.Int8Column) StringColumn(edu.iu.dsc.tws.common.table.arrow.StringColumn) Table(edu.iu.dsc.tws.common.table.Table) ArrowTable(edu.iu.dsc.tws.common.table.arrow.ArrowTable) IntVector(org.apache.arrow.vector.IntVector) UInt2Column(edu.iu.dsc.tws.common.table.arrow.UInt2Column) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) FieldVector(org.apache.arrow.vector.FieldVector) Float4Column(edu.iu.dsc.tws.common.table.arrow.Float4Column) UInt8Vector(org.apache.arrow.vector.UInt8Vector) Float8Column(edu.iu.dsc.tws.common.table.arrow.Float8Column) Int4Column(edu.iu.dsc.tws.common.table.arrow.Int4Column) ArrowTable(edu.iu.dsc.tws.common.table.arrow.ArrowTable) UInt2Vector(org.apache.arrow.vector.UInt2Vector)

Aggregations

VectorSchemaRoot (org.apache.arrow.vector.VectorSchemaRoot)15 RowData (org.apache.flink.table.data.RowData)4 ByteArrayOutputStream (java.io.ByteArrayOutputStream)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 FieldVector (org.apache.arrow.vector.FieldVector)3 VarCharVector (org.apache.arrow.vector.VarCharVector)3 GenericRowData (org.apache.flink.table.data.GenericRowData)3 Test (org.junit.Test)3 Float8Vector (org.apache.arrow.vector.Float8Vector)2 IntVector (org.apache.arrow.vector.IntVector)2 ArrowStreamWriter (org.apache.arrow.vector.ipc.ArrowStreamWriter)2 BytesColumnVector (org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector)2 ColumnVector (org.apache.hadoop.hive.ql.exec.vector.ColumnVector)2 DecimalColumnVector (org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector)2 DoubleColumnVector (org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector)2 IntervalDayTimeColumnVector (org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector)2 ListColumnVector (org.apache.hadoop.hive.ql.exec.vector.ListColumnVector)2 LongColumnVector (org.apache.hadoop.hive.ql.exec.vector.LongColumnVector)2 MapColumnVector (org.apache.hadoop.hive.ql.exec.vector.MapColumnVector)2