use of org.apache.spark.sql.sources.v2.writer.DataSourceWriter in project spark-bigquery-connector by GoogleCloudDataproc.
the class BigQueryDataSourceV2 method createWriter.
/**
* Returning a DataSourceWriter for the specified parameters. In case the table already exist and
* the SaveMode is "Ignore", an Optional.empty() is returned.
*/
@Override
public Optional<DataSourceWriter> createWriter(String writeUUID, StructType schema, SaveMode mode, DataSourceOptions options) {
Injector injector = createInjector(schema, options.asMap(), new BigQueryDataSourceWriterModule(writeUUID, schema, mode));
// first verify if we need to do anything at all, based on the table existence and the save
// mode.
BigQueryClient bigQueryClient = injector.getInstance(BigQueryClient.class);
SparkBigQueryConfig config = injector.getInstance(SparkBigQueryConfig.class);
TableInfo table = bigQueryClient.getTable(config.getTableId());
if (table != null) {
// table already exists
if (mode == SaveMode.Ignore) {
return Optional.empty();
}
if (mode == SaveMode.ErrorIfExists) {
throw new IllegalArgumentException(String.format("SaveMode is set to ErrorIfExists and table '%s' already exists. Did you want " + "to add data to the table by setting the SaveMode to Append? Example: " + "df.write.format.options.mode(\"append\").save()", BigQueryUtil.friendlyTableName(table.getTableId())));
}
} else {
// table does not exist
// If the CreateDisposition is CREATE_NEVER, and the table does not exist,
// there's no point in writing the data to GCS in the first place as it going
// to fail on the BigQuery side.
boolean createNever = config.getCreateDisposition().map(createDisposition -> createDisposition == JobInfo.CreateDisposition.CREATE_NEVER).orElse(false);
if (createNever) {
throw new IllegalArgumentException(String.format("For table %s Create Disposition is CREATE_NEVER and the table does not exists." + " Aborting the insert", BigQueryUtil.friendlyTableName(config.getTableId())));
}
}
DataSourceWriterContext dataSourceWriterContext = null;
switch(config.getWriteMethod()) {
case DIRECT:
dataSourceWriterContext = injector.getInstance(BigQueryDirectDataSourceWriterContext.class);
break;
case INDIRECT:
dataSourceWriterContext = injector.getInstance(BigQueryIndirectDataSourceWriterContext.class);
break;
}
return Optional.of(new BigQueryDataSourceWriter(dataSourceWriterContext));
}
use of org.apache.spark.sql.sources.v2.writer.DataSourceWriter in project iceberg by apache.
the class IcebergSource method createWriter.
@Override
public Optional<DataSourceWriter> createWriter(String jobId, StructType dsStruct, SaveMode mode, DataSourceOptions options) {
Preconditions.checkArgument(mode == SaveMode.Append || mode == SaveMode.Overwrite, "Save mode %s is not supported", mode);
Configuration conf = new Configuration(lazyBaseConf());
Table table = getTableAndResolveHadoopConfiguration(options, conf);
SparkWriteConf writeConf = new SparkWriteConf(lazySparkSession(), table, options.asMap());
Preconditions.checkArgument(writeConf.handleTimestampWithoutZone() || !SparkUtil.hasTimestampWithoutZone(table.schema()), SparkUtil.TIMESTAMP_WITHOUT_TIMEZONE_ERROR);
Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsStruct);
TypeUtil.validateWriteSchema(table.schema(), writeSchema, writeConf.checkNullability(), writeConf.checkOrdering());
SparkUtil.validatePartitionTransforms(table.spec());
String appId = lazySparkSession().sparkContext().applicationId();
String wapId = writeConf.wapId();
boolean replacePartitions = mode == SaveMode.Overwrite;
return Optional.of(new Writer(lazySparkSession(), table, writeConf, replacePartitions, appId, wapId, writeSchema, dsStruct));
}
Aggregations