use of com.google.cloud.bigquery.connector.common.ReadSessionResponse in project spark-bigquery-connector by GoogleCloudDataproc.
the class BigQueryDataSourceReaderContext method createConverter.
private ReadRowsResponseToInternalRowIteratorConverter createConverter(ImmutableList<String> selectedFields, ReadSessionResponse readSessionResponse, Optional<StructType> userProvidedSchema) {
ReadRowsResponseToInternalRowIteratorConverter converter;
DataFormat format = readSessionCreatorConfig.getReadDataFormat();
if (format == DataFormat.AVRO) {
Schema schema = SchemaConverters.getSchemaWithPseudoColumns(readSessionResponse.getReadTableInfo());
if (selectedFields.isEmpty()) {
// means select *
selectedFields = schema.getFields().stream().map(Field::getName).collect(ImmutableList.toImmutableList());
} else {
Set<String> requiredColumnSet = ImmutableSet.copyOf(selectedFields);
schema = Schema.of(schema.getFields().stream().filter(field -> requiredColumnSet.contains(field.getName())).collect(Collectors.toList()));
}
return ReadRowsResponseToInternalRowIteratorConverter.avro(schema, selectedFields, readSessionResponse.getReadSession().getAvroSchema().getSchema(), userProvidedSchema);
}
throw new IllegalArgumentException("No known converted for " + readSessionCreatorConfig.getReadDataFormat());
}
use of com.google.cloud.bigquery.connector.common.ReadSessionResponse in project spark-bigquery-connector by GoogleCloudDataproc.
the class BigQueryDataSourceReaderContext method planBatchInputPartitionContexts.
public Stream<InputPartitionContext<ColumnarBatch>> planBatchInputPartitionContexts() {
if (!enableBatchRead()) {
throw new IllegalStateException("Batch reads should not be enabled");
}
ImmutableList<String> selectedFields = schema.map(requiredSchema -> ImmutableList.copyOf(requiredSchema.fieldNames())).orElse(ImmutableList.copyOf(fields.keySet()));
Optional<String> filter = getCombinedFilter();
ReadSessionResponse readSessionResponse = readSessionCreator.create(tableId, selectedFields, filter);
ReadSession readSession = readSessionResponse.getReadSession();
logger.info("Created read session for {}: {} for application id: {}", tableId.toString(), readSession.getName(), applicationId);
if (selectedFields.isEmpty()) {
// means select *
Schema tableSchema = SchemaConverters.getSchemaWithPseudoColumns(readSessionResponse.getReadTableInfo());
selectedFields = tableSchema.getFields().stream().map(Field::getName).collect(ImmutableList.toImmutableList());
}
ImmutableList<String> partitionSelectedFields = selectedFields;
return Streams.stream(Iterables.partition(readSession.getStreamsList(), readSessionCreatorConfig.streamsPerPartition())).map(streams -> new ArrowInputPartitionContext(bigQueryReadClientFactory, bigQueryTracerFactory, streams.stream().map(ReadStream::getName).collect(Collectors.toCollection(ArrayList::new)), readSessionCreatorConfig.toReadRowsHelperOptions(), partitionSelectedFields, readSessionResponse, userProvidedSchema));
}
use of com.google.cloud.bigquery.connector.common.ReadSessionResponse in project spark-bigquery-connector by GoogleCloudDataproc.
the class BigQueryDataSourceReaderContext method planInputPartitionContexts.
public Stream<InputPartitionContext<InternalRow>> planInputPartitionContexts() {
if (isEmptySchema()) {
// create empty projection
return createEmptyProjectionPartitions();
}
ImmutableList<String> selectedFields = schema.map(requiredSchema -> ImmutableList.copyOf(requiredSchema.fieldNames())).orElse(ImmutableList.copyOf(fields.keySet()));
Optional<String> filter = getCombinedFilter();
ReadSessionResponse readSessionResponse = readSessionCreator.create(tableId, selectedFields, filter);
ReadSession readSession = readSessionResponse.getReadSession();
logger.info("Created read session for {}: {} for application id: {}", tableId.toString(), readSession.getName(), applicationId);
return readSession.getStreamsList().stream().map(stream -> new BigQueryInputPartitionContext(bigQueryReadClientFactory, stream.getName(), readSessionCreatorConfig.toReadRowsHelperOptions(), createConverter(selectedFields, readSessionResponse, userProvidedSchema)));
}
Aggregations