use of org.apache.spark.sql.sources.v2.DataSourceOptions in project spark-bigquery-connector by GoogleCloudDataproc.
the class SparkBigQueryConfigTest method testConfigFromGlobalOptions.
@Test
public void testConfigFromGlobalOptions() {
Configuration hadoopConfiguration = new Configuration();
DataSourceOptions options = new DataSourceOptions(ImmutableMap.<String, String>builder().put("table", "dataset.table").build());
ImmutableMap<String, String> globalOptions = ImmutableMap.<String, String>builder().put("viewsEnabled", "true").put("spark.datasource.bigquery.temporaryGcsBucket", "bucket").put("bqStorageReadEndpoint", "ep").put("bqEncodedCreateReadSessionRequest", "ec").put("writeMethod", "direct").build();
SparkBigQueryConfig config = SparkBigQueryConfig.from(options.asMap(), globalOptions, hadoopConfiguration, DEFAULT_PARALLELISM, new SQLConf(), SPARK_VERSION, Optional.empty());
assertThat(config.isViewsEnabled()).isTrue();
assertThat(config.getTemporaryGcsBucket()).isEqualTo(Optional.of("bucket"));
assertThat(config.toReadSessionCreatorConfig().endpoint().get()).isEqualTo("ep");
assertThat(config.toReadSessionCreatorConfig().getRequestEncodedBase().get()).isEqualTo("ec");
assertThat(config.getWriteMethod()).isEqualTo(SparkBigQueryConfig.WriteMethod.DIRECT);
}
use of org.apache.spark.sql.sources.v2.DataSourceOptions in project spark-bigquery-connector by GoogleCloudDataproc.
the class SparkBigQueryConfigTest method testDefaults.
@Test
public void testDefaults() {
Configuration hadoopConfiguration = new Configuration();
DataSourceOptions options = new DataSourceOptions(defaultOptions);
SparkBigQueryConfig config = SparkBigQueryConfig.from(options.asMap(), ImmutableMap.of(), hadoopConfiguration, DEFAULT_PARALLELISM, new SQLConf(), SPARK_VERSION, Optional.empty());
assertThat(config.getTableId()).isEqualTo(TableId.of("dataset", "table"));
assertThat(config.getFilter()).isEqualTo(Optional.empty());
assertThat(config.getSchema()).isEqualTo(Optional.empty());
assertThat(config.getMaxParallelism()).isEqualTo(OptionalInt.empty());
assertThat(config.getTemporaryGcsBucket()).isEqualTo(Optional.empty());
assertThat(config.getIntermediateFormat()).isEqualTo(SparkBigQueryConfig.DEFAULT_INTERMEDIATE_FORMAT);
assertThat(config.getReadDataFormat()).isEqualTo(SparkBigQueryConfig.DEFAULT_READ_DATA_FORMAT);
assertThat(config.getMaterializationProject()).isEqualTo(Optional.empty());
assertThat(config.getMaterializationDataset()).isEqualTo(Optional.empty());
assertThat(config.getPartitionField()).isEqualTo(Optional.empty());
assertThat(config.getPartitionExpirationMs()).isEqualTo(OptionalLong.empty());
assertThat(config.getPartitionRequireFilter()).isEqualTo(Optional.empty());
assertThat(config.getPartitionType()).isEqualTo(Optional.empty());
assertThat(config.getClusteredFields()).isEqualTo(Optional.empty());
assertThat(config.getCreateDisposition()).isEqualTo(Optional.empty());
assertThat(config.getLoadSchemaUpdateOptions()).isEqualTo(ImmutableList.of());
assertThat(config.getMaterializationExpirationTimeInMinutes()).isEqualTo(24 * 60);
assertThat(config.getMaxReadRowsRetries()).isEqualTo(3);
assertThat(config.isUseAvroLogicalTypes()).isFalse();
assertThat(config.getBigQueryClientConnectTimeout()).isEqualTo(60 * 1000);
assertThat(config.getBigQueryClientReadTimeout()).isEqualTo(60 * 1000);
assertThat(config.getBigQueryClientRetrySettings().getMaxAttempts()).isEqualTo(10);
assertThat(config.getArrowCompressionCodec()).isEqualTo(CompressionCodec.COMPRESSION_UNSPECIFIED);
assertThat(config.getWriteMethod()).isEqualTo(SparkBigQueryConfig.WriteMethod.INDIRECT);
assertThat(config.getCacheExpirationTimeInMinutes()).isEqualTo(SparkBigQueryConfig.DEFAULT_CACHE_EXPIRATION_IN_MINUTES);
assertThat(config.getTraceId().isPresent()).isFalse();
assertThat(config.getBigQueryJobLabels()).isEmpty();
}
use of org.apache.spark.sql.sources.v2.DataSourceOptions in project spark-bigquery-connector by GoogleCloudDataproc.
the class SparkBigQueryConfigTest method testInvalidCompressionCodec.
@Test
public void testInvalidCompressionCodec() {
Configuration hadoopConfiguration = new Configuration();
DataSourceOptions options = new DataSourceOptions(ImmutableMap.<String, String>builder().put("table", "test_t").put("dataset", "test_d").put("project", "test_p").put("arrowCompressionCodec", "randomCompression").build());
IllegalArgumentException exception = Assert.assertThrows(IllegalArgumentException.class, () -> SparkBigQueryConfig.from(options.asMap(), ImmutableMap.of(), hadoopConfiguration, DEFAULT_PARALLELISM, new SQLConf(), SPARK_VERSION, Optional.empty()));
assertThat(exception).hasMessageThat().contains("Compression codec 'RANDOMCOMPRESSION' for Arrow is not supported." + " Supported formats are " + Arrays.toString(CompressionCodec.values()));
}
use of org.apache.spark.sql.sources.v2.DataSourceOptions in project spark-bigquery-connector by GoogleCloudDataproc.
the class SparkBigQueryConfigTest method testSerializability.
// "project", "test_project"); // to remove the need for default project
@Test
public void testSerializability() throws IOException {
Configuration hadoopConfiguration = new Configuration();
DataSourceOptions options = new DataSourceOptions(defaultOptions);
// test to make sure all members can be serialized.
new ObjectOutputStream(new ByteArrayOutputStream()).writeObject(SparkBigQueryConfig.from(options.asMap(), ImmutableMap.of(), hadoopConfiguration, DEFAULT_PARALLELISM, new SQLConf(), SPARK_VERSION, Optional.empty()));
}
use of org.apache.spark.sql.sources.v2.DataSourceOptions in project spark-bigquery-connector by GoogleCloudDataproc.
the class SparkBigQueryConfigTest method testGetTableIdWithoutThePartition_PartitionExists.
@Test
public void testGetTableIdWithoutThePartition_PartitionExists() {
Configuration hadoopConfiguration = new Configuration();
DataSourceOptions options = new DataSourceOptions(ImmutableMap.of("table", "dataset.table", "datePartition", "20201010"));
SparkBigQueryConfig config = SparkBigQueryConfig.from(options.asMap(), ImmutableMap.of(), hadoopConfiguration, DEFAULT_PARALLELISM, new SQLConf(), SPARK_VERSION, Optional.empty());
assertThat(config.getTableId().getTable()).isEqualTo("table$20201010");
assertThat(config.getTableIdWithoutThePartition().getTable()).isEqualTo("table");
assertThat(config.getTableIdWithoutThePartition().getDataset()).isEqualTo(config.getTableId().getDataset());
assertThat(config.getTableIdWithoutThePartition().getProject()).isEqualTo(config.getTableId().getProject());
}
Aggregations