use of com.google.cloud.spark.bigquery.SparkBigQueryConfig in project OpenLineage by OpenLineage.
the class MockBigQueryRelationProvider method createRelationInternal.
@Override
public BigQueryRelation createRelationInternal(SQLContext sqlContext, Map<String, String> parameters, Option<StructType> schema) {
Injector injector = INJECTOR.createGuiceInjector(sqlContext, parameters, schema);
SparkBigQueryConfig config = injector.getInstance(SparkBigQueryConfig.class);
BigQueryClient bigQueryClient = injector.getInstance(BigQueryClient.class);
TableInfo tableInfo = bigQueryClient.getReadTable(config.toReadTableOptions());
Dataset<Row> testRecords = injector.getInstance(new Key<Dataset<Row>>() {
});
return new MockBigQueryRelation(config, tableInfo, sqlContext, testRecords);
}
use of com.google.cloud.spark.bigquery.SparkBigQueryConfig in project spark-bigquery-connector by GoogleCloudDataproc.
the class BigQueryDataSourceV2 method createWriter.
/**
* Returning a DataSourceWriter for the specified parameters. In case the table already exist and
* the SaveMode is "Ignore", an Optional.empty() is returned.
*/
@Override
public Optional<DataSourceWriter> createWriter(String writeUUID, StructType schema, SaveMode mode, DataSourceOptions options) {
Injector injector = createInjector(schema, options.asMap(), new BigQueryDataSourceWriterModule(writeUUID, schema, mode));
// first verify if we need to do anything at all, based on the table existence and the save
// mode.
BigQueryClient bigQueryClient = injector.getInstance(BigQueryClient.class);
SparkBigQueryConfig config = injector.getInstance(SparkBigQueryConfig.class);
TableInfo table = bigQueryClient.getTable(config.getTableId());
if (table != null) {
// table already exists
if (mode == SaveMode.Ignore) {
return Optional.empty();
}
if (mode == SaveMode.ErrorIfExists) {
throw new IllegalArgumentException(String.format("SaveMode is set to ErrorIfExists and table '%s' already exists. Did you want " + "to add data to the table by setting the SaveMode to Append? Example: " + "df.write.format.options.mode(\"append\").save()", BigQueryUtil.friendlyTableName(table.getTableId())));
}
} else {
// table does not exist
// If the CreateDisposition is CREATE_NEVER, and the table does not exist,
// there's no point in writing the data to GCS in the first place as it going
// to fail on the BigQuery side.
boolean createNever = config.getCreateDisposition().map(createDisposition -> createDisposition == JobInfo.CreateDisposition.CREATE_NEVER).orElse(false);
if (createNever) {
throw new IllegalArgumentException(String.format("For table %s Create Disposition is CREATE_NEVER and the table does not exists." + " Aborting the insert", BigQueryUtil.friendlyTableName(config.getTableId())));
}
}
DataSourceWriterContext dataSourceWriterContext = null;
switch(config.getWriteMethod()) {
case DIRECT:
dataSourceWriterContext = injector.getInstance(BigQueryDirectDataSourceWriterContext.class);
break;
case INDIRECT:
dataSourceWriterContext = injector.getInstance(BigQueryIndirectDataSourceWriterContext.class);
break;
}
return Optional.of(new BigQueryDataSourceWriter(dataSourceWriterContext));
}
use of com.google.cloud.spark.bigquery.SparkBigQueryConfig in project OpenLineage by OpenLineage.
the class LogicalPlanSerializerTest method testSerializeBigQueryPlan.
@Test
public void testSerializeBigQueryPlan() throws IOException {
String query = "SELECT date FROM bigquery-public-data.google_analytics_sample.test";
System.setProperty("GOOGLE_CLOUD_PROJECT", "test_serialization");
SparkBigQueryConfig config = SparkBigQueryConfig.from(ImmutableMap.of("query", query, "dataset", "test-dataset", "maxparallelism", "2", "partitionexpirationms", "2"), ImmutableMap.of(), new Configuration(), 10, SQLConf.get(), "", Optional.empty());
BigQueryRelation bigQueryRelation = new BigQueryRelation(config, TableInfo.newBuilder(TableId.of("dataset", "test"), new TestTableDefinition()).build(), mock(SQLContext.class));
LogicalRelation logicalRelation = new LogicalRelation(bigQueryRelation, Seq$.MODULE$.<AttributeReference>newBuilder().$plus$eq(new AttributeReference("name", StringType$.MODULE$, false, Metadata.empty(), ExprId.apply(1L), Seq$.MODULE$.<String>empty())).result(), Option.empty(), false);
InsertIntoDataSourceCommand command = new InsertIntoDataSourceCommand(logicalRelation, logicalRelation, false);
Map<String, Object> commandActualNode = objectMapper.readValue(logicalPlanSerializer.serialize(command), mapTypeReference);
Map<String, Object> bigqueryActualNode = objectMapper.readValue(logicalPlanSerializer.serialize(logicalRelation), mapTypeReference);
Path expectedCommandNodePath = Paths.get("src", "test", "resources", "test_data", "serde", "insertintods-node.json");
Path expectedBigQueryRelationNodePath = Paths.get("src", "test", "resources", "test_data", "serde", "bigqueryrelation-node.json");
Map<String, Object> expectedCommandNode = objectMapper.readValue(expectedCommandNodePath.toFile(), mapTypeReference);
Map<String, Object> expectedBigQueryRelationNode = objectMapper.readValue(expectedBigQueryRelationNodePath.toFile(), mapTypeReference);
assertThat(commandActualNode).satisfies(new MatchesMapRecursively(expectedCommandNode, Collections.singleton("exprId")));
assertThat(bigqueryActualNode).satisfies(new MatchesMapRecursively(expectedBigQueryRelationNode, Collections.singleton("exprId")));
}
Aggregations