use of com.google.cloud.spark.bigquery.v2.context.DataSourceWriterContext in project spark-bigquery-connector by GoogleCloudDataproc.
the class BigQueryDataSourceV2 method createWriter.
/**
* Returning a DataSourceWriter for the specified parameters. In case the table already exist and
* the SaveMode is "Ignore", an Optional.empty() is returned.
*/
@Override
public Optional<DataSourceWriter> createWriter(String writeUUID, StructType schema, SaveMode mode, DataSourceOptions options) {
Injector injector = createInjector(schema, options.asMap(), new BigQueryDataSourceWriterModule(writeUUID, schema, mode));
// first verify if we need to do anything at all, based on the table existence and the save
// mode.
BigQueryClient bigQueryClient = injector.getInstance(BigQueryClient.class);
SparkBigQueryConfig config = injector.getInstance(SparkBigQueryConfig.class);
TableInfo table = bigQueryClient.getTable(config.getTableId());
if (table != null) {
// table already exists
if (mode == SaveMode.Ignore) {
return Optional.empty();
}
if (mode == SaveMode.ErrorIfExists) {
throw new IllegalArgumentException(String.format("SaveMode is set to ErrorIfExists and table '%s' already exists. Did you want " + "to add data to the table by setting the SaveMode to Append? Example: " + "df.write.format.options.mode(\"append\").save()", BigQueryUtil.friendlyTableName(table.getTableId())));
}
} else {
// table does not exist
// If the CreateDisposition is CREATE_NEVER, and the table does not exist,
// there's no point in writing the data to GCS in the first place as it going
// to fail on the BigQuery side.
boolean createNever = config.getCreateDisposition().map(createDisposition -> createDisposition == JobInfo.CreateDisposition.CREATE_NEVER).orElse(false);
if (createNever) {
throw new IllegalArgumentException(String.format("For table %s Create Disposition is CREATE_NEVER and the table does not exists." + " Aborting the insert", BigQueryUtil.friendlyTableName(config.getTableId())));
}
}
DataSourceWriterContext dataSourceWriterContext = null;
switch(config.getWriteMethod()) {
case DIRECT:
dataSourceWriterContext = injector.getInstance(BigQueryDirectDataSourceWriterContext.class);
break;
case INDIRECT:
dataSourceWriterContext = injector.getInstance(BigQueryIndirectDataSourceWriterContext.class);
break;
}
return Optional.of(new BigQueryDataSourceWriter(dataSourceWriterContext));
}
Aggregations