Search in sources :

Example 1 with Text

use of org.apache.arrow.vector.util.Text in project beam by apache.

the class BigQueryIOStorageReadTest method createResponseArrow.

private ReadRowsResponse createResponseArrow(org.apache.arrow.vector.types.pojo.Schema arrowSchema, List<String> name, List<Long> number, double progressAtResponseStart, double progressAtResponseEnd) {
    ArrowRecordBatch serializedRecord;
    try (VectorSchemaRoot schemaRoot = VectorSchemaRoot.create(arrowSchema, allocator)) {
        schemaRoot.allocateNew();
        schemaRoot.setRowCount(name.size());
        VarCharVector strVector = (VarCharVector) schemaRoot.getFieldVectors().get(0);
        BigIntVector bigIntVector = (BigIntVector) schemaRoot.getFieldVectors().get(1);
        for (int i = 0; i < name.size(); i++) {
            bigIntVector.set(i, number.get(i));
            strVector.set(i, new Text(name.get(i)));
        }
        VectorUnloader unLoader = new VectorUnloader(schemaRoot);
        try (org.apache.arrow.vector.ipc.message.ArrowRecordBatch records = unLoader.getRecordBatch()) {
            try (ByteArrayOutputStream os = new ByteArrayOutputStream()) {
                MessageSerializer.serialize(new WriteChannel(Channels.newChannel(os)), records);
                serializedRecord = ArrowRecordBatch.newBuilder().setRowCount(records.getLength()).setSerializedRecordBatch(ByteString.copyFrom(os.toByteArray())).build();
            } catch (IOException e) {
                throw new RuntimeException("Error writing to byte array output stream", e);
            }
        }
    }
    return ReadRowsResponse.newBuilder().setArrowRecordBatch(serializedRecord).setRowCount(name.size()).setStats(StreamStats.newBuilder().setProgress(Progress.newBuilder().setAtResponseStart(progressAtResponseStart).setAtResponseEnd(progressAtResponseEnd))).build();
}
Also used : VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) VarCharVector(org.apache.arrow.vector.VarCharVector) Text(org.apache.arrow.vector.util.Text) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) BigIntVector(org.apache.arrow.vector.BigIntVector) VectorUnloader(org.apache.arrow.vector.VectorUnloader) StatusRuntimeException(io.grpc.StatusRuntimeException) ArrowRecordBatch(com.google.cloud.bigquery.storage.v1.ArrowRecordBatch) WriteChannel(org.apache.arrow.vector.ipc.WriteChannel)

Example 2 with Text

use of org.apache.arrow.vector.util.Text in project beam by apache.

the class ArrowConversionTest method rowIterator.

@Test
public void rowIterator() {
    org.apache.arrow.vector.types.pojo.Schema schema = new org.apache.arrow.vector.types.pojo.Schema(asList(field("int32", new ArrowType.Int(32, true)), field("float64", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), field("string", new ArrowType.Utf8()), field("timestampMicroUTC", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC")), field("timestampMilliUTC", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")), field("int32_list", new ArrowType.List(), field("int32s", new ArrowType.Int(32, true))), field("boolean", new ArrowType.Bool()), field("fixed_size_binary", new ArrowType.FixedSizeBinary(3))));
    Schema beamSchema = ArrowConversion.ArrowSchemaTranslator.toBeamSchema(schema);
    VectorSchemaRoot expectedSchemaRoot = VectorSchemaRoot.create(schema, allocator);
    expectedSchemaRoot.allocateNew();
    expectedSchemaRoot.setRowCount(16);
    IntVector intVector = (IntVector) expectedSchemaRoot.getFieldVectors().get(0);
    Float8Vector floatVector = (Float8Vector) expectedSchemaRoot.getFieldVectors().get(1);
    VarCharVector strVector = (VarCharVector) expectedSchemaRoot.getFieldVectors().get(2);
    TimeStampMicroTZVector timestampMicroUtcVector = (TimeStampMicroTZVector) expectedSchemaRoot.getFieldVectors().get(3);
    TimeStampMilliTZVector timeStampMilliTZVector = (TimeStampMilliTZVector) expectedSchemaRoot.getFieldVectors().get(4);
    ListVector int32ListVector = (ListVector) expectedSchemaRoot.getFieldVectors().get(5);
    IntVector int32ListElementVector = int32ListVector.<IntVector>addOrGetVector(new org.apache.arrow.vector.types.pojo.FieldType(false, new ArrowType.Int(32, true), null)).getVector();
    BitVector boolVector = (BitVector) expectedSchemaRoot.getFieldVectors().get(6);
    FixedSizeBinaryVector fixedSizeBinaryVector = (FixedSizeBinaryVector) expectedSchemaRoot.getFieldVectors().get(7);
    ArrayList<Row> expectedRows = new ArrayList<>();
    for (int i = 0; i < 16; i++) {
        DateTime dt = new DateTime(2019, 1, i + 1, i, i, i, DateTimeZone.UTC);
        expectedRows.add(Row.withSchema(beamSchema).addValues(i, i + .1 * i, "" + i, dt, dt, ImmutableList.of(i), (i % 2) != 0, new byte[] { (byte) i, (byte) (i + 1), (byte) (i + 2) }).build());
        intVector.set(i, i);
        floatVector.set(i, i + .1 * i);
        strVector.set(i, new Text("" + i));
        timestampMicroUtcVector.set(i, dt.getMillis() * 1000);
        timeStampMilliTZVector.set(i, dt.getMillis());
        int32ListVector.startNewValue(i);
        int32ListElementVector.set(i, i);
        int32ListVector.endValue(i, 1);
        boolVector.set(i, i % 2);
        fixedSizeBinaryVector.set(i, new byte[] { (byte) i, (byte) (i + 1), (byte) (i + 2) });
    }
    assertThat(ImmutableList.copyOf(ArrowConversion.rowsFromRecordBatch(beamSchema, expectedSchemaRoot)), IsIterableContainingInOrder.contains(expectedRows.stream().map((row) -> equalTo(row)).collect(ImmutableList.toImmutableList())));
    expectedSchemaRoot.close();
}
Also used : IsIterableContainingInOrder(org.hamcrest.collection.IsIterableContainingInOrder) DateTimeZone(org.joda.time.DateTimeZone) Float8Vector(org.apache.arrow.vector.Float8Vector) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) RunWith(org.junit.runner.RunWith) TimeUnit(org.apache.arrow.vector.types.TimeUnit) Text(org.apache.arrow.vector.util.Text) TimeStampMilliTZVector(org.apache.arrow.vector.TimeStampMilliTZVector) ArrayList(java.util.ArrayList) FloatingPointPrecision(org.apache.arrow.vector.types.FloatingPointPrecision) Arrays.asList(java.util.Arrays.asList) RootAllocator(org.apache.arrow.memory.RootAllocator) After(org.junit.After) ListVector(org.apache.arrow.vector.complex.ListVector) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) FixedSizeBinaryVector(org.apache.arrow.vector.FixedSizeBinaryVector) Row(org.apache.beam.sdk.values.Row) BufferAllocator(org.apache.arrow.memory.BufferAllocator) Before(org.junit.Before) Field(org.apache.beam.sdk.schemas.Schema.Field) IntVector(org.apache.arrow.vector.IntVector) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) DateTime(org.joda.time.DateTime) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) Schema(org.apache.beam.sdk.schemas.Schema) Matchers.equalTo(org.hamcrest.Matchers.equalTo) VarCharVector(org.apache.arrow.vector.VarCharVector) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) BitVector(org.apache.arrow.vector.BitVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) BitVector(org.apache.arrow.vector.BitVector) Schema(org.apache.beam.sdk.schemas.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) TimeStampMilliTZVector(org.apache.arrow.vector.TimeStampMilliTZVector) IntVector(org.apache.arrow.vector.IntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) Text(org.apache.arrow.vector.util.Text) FixedSizeBinaryVector(org.apache.arrow.vector.FixedSizeBinaryVector) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) ListVector(org.apache.arrow.vector.complex.ListVector) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Aggregations

VarCharVector (org.apache.arrow.vector.VarCharVector)2 VectorSchemaRoot (org.apache.arrow.vector.VectorSchemaRoot)2 Text (org.apache.arrow.vector.util.Text)2 ArrowRecordBatch (com.google.cloud.bigquery.storage.v1.ArrowRecordBatch)1 StatusRuntimeException (io.grpc.StatusRuntimeException)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Arrays.asList (java.util.Arrays.asList)1 BufferAllocator (org.apache.arrow.memory.BufferAllocator)1 RootAllocator (org.apache.arrow.memory.RootAllocator)1 BigIntVector (org.apache.arrow.vector.BigIntVector)1 BitVector (org.apache.arrow.vector.BitVector)1 FixedSizeBinaryVector (org.apache.arrow.vector.FixedSizeBinaryVector)1 Float8Vector (org.apache.arrow.vector.Float8Vector)1 IntVector (org.apache.arrow.vector.IntVector)1 TimeStampMicroTZVector (org.apache.arrow.vector.TimeStampMicroTZVector)1 TimeStampMilliTZVector (org.apache.arrow.vector.TimeStampMilliTZVector)1 VectorUnloader (org.apache.arrow.vector.VectorUnloader)1 ListVector (org.apache.arrow.vector.complex.ListVector)1