use of com.google.cloud.bigquery.connector.common.BigQueryTracerFactory in project spark-bigquery-connector by GoogleCloudDataproc.
the class BigQueryDataSourceReaderContext method planBatchInputPartitionContexts.
public Stream<InputPartitionContext<ColumnarBatch>> planBatchInputPartitionContexts() {
if (!enableBatchRead()) {
throw new IllegalStateException("Batch reads should not be enabled");
}
ImmutableList<String> selectedFields = schema.map(requiredSchema -> ImmutableList.copyOf(requiredSchema.fieldNames())).orElse(ImmutableList.copyOf(fields.keySet()));
Optional<String> filter = getCombinedFilter();
ReadSessionResponse readSessionResponse = readSessionCreator.create(tableId, selectedFields, filter);
ReadSession readSession = readSessionResponse.getReadSession();
logger.info("Created read session for {}: {} for application id: {}", tableId.toString(), readSession.getName(), applicationId);
if (selectedFields.isEmpty()) {
// means select *
Schema tableSchema = SchemaConverters.getSchemaWithPseudoColumns(readSessionResponse.getReadTableInfo());
selectedFields = tableSchema.getFields().stream().map(Field::getName).collect(ImmutableList.toImmutableList());
}
ImmutableList<String> partitionSelectedFields = selectedFields;
return Streams.stream(Iterables.partition(readSession.getStreamsList(), readSessionCreatorConfig.streamsPerPartition())).map(streams -> new ArrowInputPartitionContext(bigQueryReadClientFactory, bigQueryTracerFactory, streams.stream().map(ReadStream::getName).collect(Collectors.toCollection(ArrayList::new)), readSessionCreatorConfig.toReadRowsHelperOptions(), partitionSelectedFields, readSessionResponse, userProvidedSchema));
}
Aggregations