use of org.apache.arrow.vector.ipc.ArrowStreamWriter in project flink by apache.
the class ArrowUtils method collectAsPandasDataFrame.
/**
* Convert Flink table to Pandas DataFrame.
*/
public static CustomIterator<byte[]> collectAsPandasDataFrame(Table table, int maxArrowBatchSize) throws Exception {
checkArrowUsable();
BufferAllocator allocator = getRootAllocator().newChildAllocator("collectAsPandasDataFrame", 0, Long.MAX_VALUE);
RowType rowType = (RowType) table.getResolvedSchema().toSourceRowDataType().getLogicalType();
DataType defaultRowDataType = TypeConversions.fromLogicalToDataType(rowType);
VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, baos);
arrowStreamWriter.start();
Iterator<Row> results = table.execute().collect();
Iterator<Row> appendOnlyResults;
if (isAppendOnlyTable(table)) {
appendOnlyResults = results;
} else {
appendOnlyResults = filterOutRetractRows(results);
}
ArrowWriter arrowWriter = createRowDataArrowWriter(root, rowType);
Iterator convertedResults = new Iterator<RowData>() {
@Override
public boolean hasNext() {
return appendOnlyResults.hasNext();
}
@Override
public RowData next() {
DataFormatConverters.DataFormatConverter converter = DataFormatConverters.getConverterForDataType(defaultRowDataType);
return (RowData) converter.toInternal(appendOnlyResults.next());
}
};
return new CustomIterator<byte[]>() {
@Override
public boolean hasNext() {
return convertedResults.hasNext();
}
@Override
public byte[] next() {
try {
int i = 0;
while (convertedResults.hasNext() && i < maxArrowBatchSize) {
i++;
arrowWriter.write(convertedResults.next());
}
arrowWriter.finish();
arrowStreamWriter.writeBatch();
return baos.toByteArray();
} catch (Throwable t) {
String msg = "Failed to serialize the data of the table";
LOG.error(msg, t);
throw new RuntimeException(msg, t);
} finally {
arrowWriter.reset();
baos.reset();
if (!hasNext()) {
root.close();
allocator.close();
}
}
}
};
}
use of org.apache.arrow.vector.ipc.ArrowStreamWriter in project flink by apache.
the class ArrowReaderWriterTest method createArrowWriter.
@Override
public Tuple2<ArrowWriter<RowData>, ArrowStreamWriter> createArrowWriter(OutputStream outputStream) throws IOException {
VectorSchemaRoot root = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(rowType), allocator);
ArrowWriter<RowData> arrowWriter = ArrowUtils.createRowDataArrowWriter(root, rowType);
ArrowStreamWriter arrowStreamWriter = new ArrowStreamWriter(root, null, outputStream);
arrowStreamWriter.start();
return Tuple2.of(arrowWriter, arrowStreamWriter);
}
use of org.apache.arrow.vector.ipc.ArrowStreamWriter in project hive by apache.
the class LlapArrowRecordWriter method write.
@Override
public void write(K key, V value) throws IOException {
ArrowWrapperWritable arrowWrapperWritable = (ArrowWrapperWritable) value;
if (arrowStreamWriter == null) {
vectorSchemaRoot = arrowWrapperWritable.getVectorSchemaRoot();
arrowStreamWriter = new ArrowStreamWriter(vectorSchemaRoot, null, out);
allocator = arrowWrapperWritable.getAllocator();
this.out.setAllocator(allocator);
rootVector = arrowWrapperWritable.getRootVector();
} else {
// We need to set the row count for the current vector
// since root is reused by the stream writer.
vectorSchemaRoot.setRowCount(rootVector.getValueCount());
}
arrowStreamWriter.writeBatch();
}
use of org.apache.arrow.vector.ipc.ArrowStreamWriter in project flink by apache.
the class ArrowSerializer method resetWriter.
public void resetWriter() throws IOException {
arrowStreamWriter = new ArrowStreamWriter(rootWriter, null, baos);
arrowStreamWriter.start();
}
use of org.apache.arrow.vector.ipc.ArrowStreamWriter in project flink by apache.
the class ArrowSerializer method open.
public void open(InputStream bais, OutputStream baos) throws Exception {
this.bais = bais;
this.baos = baos;
allocator = ArrowUtils.getRootAllocator().newChildAllocator("allocator", 0, Long.MAX_VALUE);
arrowStreamReader = new ArrowStreamReader(bais, allocator);
rootWriter = VectorSchemaRoot.create(ArrowUtils.toArrowSchema(inputType), allocator);
arrowWriter = createArrowWriter();
arrowStreamWriter = new ArrowStreamWriter(rootWriter, null, baos);
arrowStreamWriter.start();
}
Aggregations