Search in sources :

Example 6 with ArrowStreamWriter

use of org.apache.arrow.vector.ipc.ArrowStreamWriter in project flink by apache.

the class ArrowUtilsTest method testReadArrowBatches.

@Test
public void testReadArrowBatches() throws IOException {
    VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
    ArrowWriter<RowData> arrowWriter = ArrowUtils.createRowDataArrowWriter(root, rowType);
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, baos);
    arrowStreamWriter.start();
    List<RowData> testData = Arrays.asList(new GenericRowData(rowType.getFieldCount()), new GenericRowData(rowType.getFieldCount()), new GenericRowData(rowType.getFieldCount()), new GenericRowData(rowType.getFieldCount()), new GenericRowData(rowType.getFieldCount()));
    int batches = 3;
    List<List<RowData>> subLists = Lists.partition(testData, testData.size() / batches + 1);
    for (List<RowData> subList : subLists) {
        for (RowData value : subList) {
            arrowWriter.write(value);
        }
        arrowWriter.finish();
        arrowStreamWriter.writeBatch();
        arrowWriter.reset();
    }
    assertEquals(batches, ArrowUtils.readArrowBatches(Channels.newChannel(new ByteArrayInputStream(baos.toByteArray()))).length);
}
Also used : VectorSchemaRoot(org.apache.arrow.vector.VectorSchemaRoot) GenericRowData(org.apache.flink.table.data.GenericRowData) RowData(org.apache.flink.table.data.RowData) ByteArrayInputStream(java.io.ByteArrayInputStream) GenericRowData(org.apache.flink.table.data.GenericRowData) List(java.util.List) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ArrowStreamWriter(org.apache.arrow.vector.ipc.ArrowStreamWriter) Test(org.junit.Test)

Example 7 with ArrowStreamWriter

use of org.apache.arrow.vector.ipc.ArrowStreamWriter in project flink by apache.

the class ArrowSourceFunctionTestBase method createTestArrowSourceFunction.

/**
 * Create continuous monitoring function with 1 reader-parallelism and interval.
 */
private ArrowSourceFunction createTestArrowSourceFunction(List<RowData> testData, int batches) throws IOException {
    ArrowWriter<RowData> arrowWriter = createArrowWriter();
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, baos);
    arrowStreamWriter.start();
    List<List<RowData>> subLists = Lists.partition(testData, testData.size() / batches + 1);
    for (List<RowData> subList : subLists) {
        for (RowData value : subList) {
            arrowWriter.write(value);
        }
        arrowWriter.finish();
        arrowStreamWriter.writeBatch();
        arrowWriter.reset();
    }
    ArrowSourceFunction arrowSourceFunction = createArrowSourceFunction(ArrowUtils.readArrowBatches(Channels.newChannel(new ByteArrayInputStream(baos.toByteArray()))));
    arrowSourceFunction.setRuntimeContext(Mockito.mock(RuntimeContext.class));
    return arrowSourceFunction;
}
Also used : RowData(org.apache.flink.table.data.RowData) ByteArrayInputStream(java.io.ByteArrayInputStream) ArrayList(java.util.ArrayList) List(java.util.List) ByteArrayOutputStream(java.io.ByteArrayOutputStream) RuntimeContext(org.apache.flink.api.common.functions.RuntimeContext) ArrowStreamWriter(org.apache.arrow.vector.ipc.ArrowStreamWriter)

Example 8 with ArrowStreamWriter

use of org.apache.arrow.vector.ipc.ArrowStreamWriter in project flink by apache.

the class ArrowReaderWriterTestBase method testBasicFunctionality.

@Test
public void testBasicFunctionality() {
    try {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        Tuple2<ArrowWriter<T>, ArrowStreamWriter> tuple2 = createArrowWriter(baos);
        ArrowWriter<T> arrowWriter = tuple2.f0;
        ArrowStreamWriter arrowStreamWriter = tuple2.f1;
        T[] testData = getTestData();
        for (T value : testData) {
            arrowWriter.write(value);
        }
        arrowWriter.finish();
        arrowStreamWriter.writeBatch();
        ArrowReader arrowReader = createArrowReader(new ByteArrayInputStream(baos.toByteArray()));
        for (int i = 0; i < testData.length; i++) {
            RowData deserialized = arrowReader.read(i);
            assertThat("Deserialized value is wrong.", deserialized, CustomEqualityMatcher.deeplyEquals(testData[i]).withChecker(checker));
        }
    } catch (Exception e) {
        System.err.println(e.getMessage());
        e.printStackTrace();
        fail("Exception in test: " + e.getMessage());
    }
}
Also used : RowData(org.apache.flink.table.data.RowData) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteArrayOutputStream(java.io.ByteArrayOutputStream) ArrowStreamWriter(org.apache.arrow.vector.ipc.ArrowStreamWriter) IOException(java.io.IOException) Test(org.junit.Test)

Aggregations

ArrowStreamWriter (org.apache.arrow.vector.ipc.ArrowStreamWriter)8 RowData (org.apache.flink.table.data.RowData)5 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 ByteArrayInputStream (java.io.ByteArrayInputStream)3 VectorSchemaRoot (org.apache.arrow.vector.VectorSchemaRoot)3 ArrayList (java.util.ArrayList)2 List (java.util.List)2 GenericRowData (org.apache.flink.table.data.GenericRowData)2 Test (org.junit.Test)2 IOException (java.io.IOException)1 Iterator (java.util.Iterator)1 BufferAllocator (org.apache.arrow.memory.BufferAllocator)1 ArrowStreamReader (org.apache.arrow.vector.ipc.ArrowStreamReader)1 RuntimeContext (org.apache.flink.api.common.functions.RuntimeContext)1 BinaryRowData (org.apache.flink.table.data.binary.BinaryRowData)1 DataFormatConverters (org.apache.flink.table.data.util.DataFormatConverters)1 DataType (org.apache.flink.table.types.DataType)1 RowType (org.apache.flink.table.types.logical.RowType)1 Row (org.apache.flink.types.Row)1 ArrowWrapperWritable (org.apache.hadoop.hive.ql.io.arrow.ArrowWrapperWritable)1