use of com.google.cloud.spark.bigquery.ArrowSchemaConverter in project spark-bigquery-connector by GoogleCloudDataproc.
the class ArrowColumnBatchPartitionReaderContext method next.
public boolean next() throws IOException {
tracer.nextBatchNeeded();
if (closed) {
return false;
}
tracer.rowsParseStarted();
closed = !reader.loadNextBatch();
if (closed) {
return false;
}
VectorSchemaRoot root = reader.root();
if (currentBatch == null) {
// trying to verify from dev@spark but this object
// should only need to get created once. The underlying
// vectors should stay the same.
ColumnVector[] columns = namesInOrder.stream().map(root::getVector).map(vector -> new ArrowSchemaConverter(vector, userProvidedFieldMap.get(vector.getName()))).toArray(ColumnVector[]::new);
currentBatch = new ColumnarBatch(columns);
}
currentBatch.setNumRows(root.getRowCount());
tracer.rowsParseFinished(currentBatch.numRows());
return true;
}
Aggregations