Search in sources :

Example 1 with FixedSizeBinaryVector

use of org.apache.arrow.vector.FixedSizeBinaryVector in project beam by apache.

the class ArrowConversionTest method rowIterator.

@Test
public void rowIterator() {
    org.apache.arrow.vector.types.pojo.Schema schema = new org.apache.arrow.vector.types.pojo.Schema(asList(field("int32", new ArrowType.Int(32, true)), field("float64", new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)), field("string", new ArrowType.Utf8()), field("timestampMicroUTC", new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC")), field("timestampMilliUTC", new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC")), field("int32_list", new ArrowType.List(), field("int32s", new ArrowType.Int(32, true))), field("boolean", new ArrowType.Bool()), field("fixed_size_binary", new ArrowType.FixedSizeBinary(3))));
    Schema beamSchema = ArrowConversion.ArrowSchemaTranslator.toBeamSchema(schema);
    VectorSchemaRoot expectedSchemaRoot = VectorSchemaRoot.create(schema, allocator);
    expectedSchemaRoot.allocateNew();
    expectedSchemaRoot.setRowCount(16);
    IntVector intVector = (IntVector) expectedSchemaRoot.getFieldVectors().get(0);
    Float8Vector floatVector = (Float8Vector) expectedSchemaRoot.getFieldVectors().get(1);
    VarCharVector strVector = (VarCharVector) expectedSchemaRoot.getFieldVectors().get(2);
    TimeStampMicroTZVector timestampMicroUtcVector = (TimeStampMicroTZVector) expectedSchemaRoot.getFieldVectors().get(3);
    TimeStampMilliTZVector timeStampMilliTZVector = (TimeStampMilliTZVector) expectedSchemaRoot.getFieldVectors().get(4);
    ListVector int32ListVector = (ListVector) expectedSchemaRoot.getFieldVectors().get(5);
    IntVector int32ListElementVector = int32ListVector.<IntVector>addOrGetVector(new org.apache.arrow.vector.types.pojo.FieldType(false, new ArrowType.Int(32, true), null)).getVector();
    BitVector boolVector = (BitVector) expectedSchemaRoot.getFieldVectors().get(6);
    FixedSizeBinaryVector fixedSizeBinaryVector = (FixedSizeBinaryVector) expectedSchemaRoot.getFieldVectors().get(7);
    ArrayList<Row> expectedRows = new ArrayList<>();
    for (int i = 0; i < 16; i++) {
        DateTime dt = new DateTime(2019, 1, i + 1, i, i, i, DateTimeZone.UTC);
        expectedRows.add(Row.withSchema(beamSchema).addValues(i, i + .1 * i, "" + i, dt, dt, ImmutableList.of(i), (i % 2) != 0, new byte[] { (byte) i, (byte) (i + 1), (byte) (i + 2) }).build());
        intVector.set(i, i);
        floatVector.set(i, i + .1 * i);
        strVector.set(i, new Text("" + i));
        timestampMicroUtcVector.set(i, dt.getMillis() * 1000);
        timeStampMilliTZVector.set(i, dt.getMillis());
        int32ListVector.startNewValue(i);
        int32ListElementVector.set(i, i);
        int32ListVector.endValue(i, 1);
        boolVector.set(i, i % 2);
        fixedSizeBinaryVector.set(i, new byte[] { (byte) i, (byte) (i + 1), (byte) (i + 2) });
    }
    assertThat(ImmutableList.copyOf(ArrowConversion.rowsFromRecordBatch(beamSchema, expectedSchemaRoot)), IsIterableContainingInOrder.contains(expectedRows.stream().map((row) -> equalTo(row)).collect(ImmutableList.toImmutableList())));
    expectedSchemaRoot.close();
}
Also used : IsIterableContainingInOrder(org.hamcrest.collection.IsIterableContainingInOrder) DateTimeZone(org.joda.time.DateTimeZone) Float8Vector(org.apache.arrow.vector.Float8Vector) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) RunWith(org.junit.runner.RunWith) TimeUnit(org.apache.arrow.vector.types.TimeUnit) Text(org.apache.arrow.vector.util.Text) TimeStampMilliTZVector(org.apache.arrow.vector.TimeStampMilliTZVector) ArrayList(java.util.ArrayList) FloatingPointPrecision(org.apache.arrow.vector.types.FloatingPointPrecision) Arrays.asList(java.util.Arrays.asList) RootAllocator(org.apache.arrow.memory.RootAllocator) After(org.junit.After) ListVector(org.apache.arrow.vector.complex.ListVector) MatcherAssert.assertThat(org.hamcrest.MatcherAssert.assertThat) FixedSizeBinaryVector(org.apache.arrow.vector.FixedSizeBinaryVector) Row(org.apache.beam.sdk.values.Row) BufferAllocator(org.apache.arrow.memory.BufferAllocator) Before(org.junit.Before) Field(org.apache.beam.sdk.schemas.Schema.Field) IntVector(org.apache.arrow.vector.IntVector) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) DateTime(org.joda.time.DateTime) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) Test(org.junit.Test) JUnit4(org.junit.runners.JUnit4) Schema(org.apache.beam.sdk.schemas.Schema) Matchers.equalTo(org.hamcrest.Matchers.equalTo) VarCharVector(org.apache.arrow.vector.VarCharVector) ImmutableList(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.collect.ImmutableList) BitVector(org.apache.arrow.vector.BitVector) VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) BitVector(org.apache.arrow.vector.BitVector) Schema(org.apache.beam.sdk.schemas.Schema) ArrowType(org.apache.arrow.vector.types.pojo.ArrowType) ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) TimeStampMicroTZVector(org.apache.arrow.vector.TimeStampMicroTZVector) TimeStampMilliTZVector(org.apache.arrow.vector.TimeStampMilliTZVector) IntVector(org.apache.arrow.vector.IntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) Text(org.apache.arrow.vector.util.Text) FixedSizeBinaryVector(org.apache.arrow.vector.FixedSizeBinaryVector) FieldType(org.apache.beam.sdk.schemas.Schema.FieldType) ListVector(org.apache.arrow.vector.complex.ListVector) Row(org.apache.beam.sdk.values.Row) Test(org.junit.Test)

Aggregations

ArrayList (java.util.ArrayList)1 Arrays.asList (java.util.Arrays.asList)1 BufferAllocator (org.apache.arrow.memory.BufferAllocator)1 RootAllocator (org.apache.arrow.memory.RootAllocator)1 BitVector (org.apache.arrow.vector.BitVector)1 FixedSizeBinaryVector (org.apache.arrow.vector.FixedSizeBinaryVector)1 Float8Vector (org.apache.arrow.vector.Float8Vector)1 IntVector (org.apache.arrow.vector.IntVector)1 TimeStampMicroTZVector (org.apache.arrow.vector.TimeStampMicroTZVector)1 TimeStampMilliTZVector (org.apache.arrow.vector.TimeStampMilliTZVector)1 VarCharVector (org.apache.arrow.vector.VarCharVector)1 VectorSchemaRoot (org.apache.arrow.vector.VectorSchemaRoot)1 ListVector (org.apache.arrow.vector.complex.ListVector)1 FloatingPointPrecision (org.apache.arrow.vector.types.FloatingPointPrecision)1 TimeUnit (org.apache.arrow.vector.types.TimeUnit)1 ArrowType (org.apache.arrow.vector.types.pojo.ArrowType)1 Text (org.apache.arrow.vector.util.Text)1 Schema (org.apache.beam.sdk.schemas.Schema)1 Field (org.apache.beam.sdk.schemas.Schema.Field)1 FieldType (org.apache.beam.sdk.schemas.Schema.FieldType)1