use of org.apache.spark.sql.internal.SQLConf in project spark-bigquery-connector by GoogleCloudDataproc.
the class SparkBigQueryConfigTest method testGetTableIdWithoutThePartition_PartitionMissing.
@Test
public void testGetTableIdWithoutThePartition_PartitionMissing() {
Configuration hadoopConfiguration = new Configuration();
DataSourceOptions options = new DataSourceOptions(defaultOptions);
SparkBigQueryConfig config = SparkBigQueryConfig.from(options.asMap(), ImmutableMap.of(), hadoopConfiguration, DEFAULT_PARALLELISM, new SQLConf(), SPARK_VERSION, Optional.empty());
assertThat(config.getTableIdWithoutThePartition().getTable()).isEqualTo(config.getTableId().getTable());
assertThat(config.getTableIdWithoutThePartition().getDataset()).isEqualTo(config.getTableId().getDataset());
assertThat(config.getTableIdWithoutThePartition().getProject()).isEqualTo(config.getTableId().getProject());
}
use of org.apache.spark.sql.internal.SQLConf in project spark-bigquery-connector by GoogleCloudDataproc.
the class SparkBigQueryConfigTest method testConfigFromOptions.
@Test
public void testConfigFromOptions() {
Configuration hadoopConfiguration = new Configuration();
DataSourceOptions options = new DataSourceOptions(ImmutableMap.<String, String>builder().put("table", "test_t").put("dataset", "test_d").put("project", "test_p").put("filter", "test > 0").put("parentProject", "test_pp").put("maxParallelism", "99").put("viewsEnabled", "true").put("viewMaterializationProject", "vmp").put("viewMaterializationDataset", "vmd").put("materializationExpirationTimeInMinutes", "100").put("readDataFormat", "ARROW").put("optimizedEmptyProjection", "false").put("createDisposition", "CREATE_NEVER").put("temporaryGcsBucket", "some_bucket").put("intermediateFormat", "ORC").put("useAvroLogicalTypes", "true").put("partitionRequireFilter", "true").put("partitionType", "HOUR").put("partitionField", "some_field").put("partitionExpirationMs", "999").put("clusteredFields", "field1,field2").put("allowFieldAddition", "true").put("allowFieldRelaxation", "true").put("httpConnectTimeout", "10000").put("httpReadTimeout", "20000").put("httpMaxRetry", "5").put("arrowCompressionCodec", "ZSTD").put("writeMethod", "direct").put("cacheExpirationTimeInMinutes", "100").put("traceJobId", "traceJobId").put("traceApplicationName", "traceApplicationName").put("bigQueryJobLabel.foo", "bar").build());
SparkBigQueryConfig config = SparkBigQueryConfig.from(options.asMap(), ImmutableMap.of(), hadoopConfiguration, DEFAULT_PARALLELISM, new SQLConf(), SPARK_VERSION, Optional.empty());
assertThat(config.getTableId()).isEqualTo(TableId.of("test_p", "test_d", "test_t"));
assertThat(config.getFilter()).isEqualTo(Optional.of("test > 0"));
assertThat(config.getSchema()).isEqualTo(Optional.empty());
assertThat(config.getMaxParallelism()).isEqualTo(OptionalInt.of(99));
assertThat(config.getTemporaryGcsBucket()).isEqualTo(Optional.of("some_bucket"));
assertThat(config.getIntermediateFormat()).isEqualTo(SparkBigQueryConfig.IntermediateFormat.ORC);
assertThat(config.getReadDataFormat()).isEqualTo(DataFormat.ARROW);
assertThat(config.getMaterializationProject()).isEqualTo(Optional.of("vmp"));
assertThat(config.getMaterializationDataset()).isEqualTo(Optional.of("vmd"));
assertThat(config.getPartitionType()).isEqualTo(Optional.of(TimePartitioning.Type.HOUR));
assertThat(config.getPartitionField()).isEqualTo(Optional.of("some_field"));
assertThat(config.getPartitionExpirationMs()).isEqualTo(OptionalLong.of(999));
assertThat(config.getPartitionRequireFilter()).isEqualTo(Optional.of(true));
assertThat(config.getClusteredFields().get()).isEqualTo(ImmutableList.of("field1", "field2"));
assertThat(config.getCreateDisposition()).isEqualTo(Optional.of(JobInfo.CreateDisposition.CREATE_NEVER));
assertThat(config.getLoadSchemaUpdateOptions()).isEqualTo(ImmutableList.of(JobInfo.SchemaUpdateOption.ALLOW_FIELD_ADDITION, JobInfo.SchemaUpdateOption.ALLOW_FIELD_RELAXATION));
assertThat(config.getMaterializationExpirationTimeInMinutes()).isEqualTo(100);
assertThat(config.getMaxReadRowsRetries()).isEqualTo(3);
assertThat(config.isUseAvroLogicalTypes()).isTrue();
assertThat(config.getBigQueryClientConnectTimeout()).isEqualTo(10000);
assertThat(config.getBigQueryClientReadTimeout()).isEqualTo(20000);
assertThat(config.getBigQueryClientRetrySettings().getMaxAttempts()).isEqualTo(5);
assertThat(config.getArrowCompressionCodec()).isEqualTo(CompressionCodec.ZSTD);
assertThat(config.getWriteMethod()).isEqualTo(SparkBigQueryConfig.WriteMethod.DIRECT);
assertThat(config.getCacheExpirationTimeInMinutes()).isEqualTo(100);
assertThat(config.getTraceId()).isEqualTo(Optional.of("Spark:traceApplicationName:traceJobId"));
assertThat(config.getBigQueryJobLabels()).hasSize(1);
assertThat(config.getBigQueryJobLabels()).containsEntry("foo", "bar");
}
use of org.apache.spark.sql.internal.SQLConf in project spark-bigquery-connector by GoogleCloudDataproc.
the class SparkBigQueryProxyAndHttpConfigTest method testConfigViaSparkBigQueryConfigWithAllThreeParameters.
@Test
public void testConfigViaSparkBigQueryConfigWithAllThreeParameters() throws URISyntaxException {
HashMap<String, String> sparkConfigOptions = new HashMap<>(defaultOptions);
sparkConfigOptions.put("table", "dataset.table");
ImmutableMap<String, String> globalOptions = SparkBigQueryConfig.normalizeConf(defaultGlobalOptions);
DataSourceOptions options = new DataSourceOptions(sparkConfigOptions);
SparkBigQueryConfig sparkConfig = SparkBigQueryConfig.from(options.asMap(), globalOptions, defaultHadoopConfiguration, 10, new SQLConf(), "2.4.0", Optional.empty());
SparkBigQueryProxyAndHttpConfig config = (SparkBigQueryProxyAndHttpConfig) sparkConfig.getBigQueryProxyConfig();
assertThat(config.getProxyUri()).isEqualTo(Optional.of(getURI("http", "bq-connector-host", 1234)));
assertThat(config.getProxyUsername()).isEqualTo(Optional.of("bq-connector-user"));
assertThat(config.getProxyPassword()).isEqualTo(Optional.of("bq-connector-password"));
assertThat(config.getHttpMaxRetry()).isEqualTo(Optional.of(10));
assertThat(config.getHttpConnectTimeout()).isEqualTo(Optional.of(10000));
assertThat(config.getHttpReadTimeout()).isEqualTo(Optional.of(20000));
}
use of org.apache.spark.sql.internal.SQLConf in project spark-bigquery-connector by GoogleCloudDataproc.
the class SparkBigQueryProxyAndHttpConfigTest method testConfigViaSparkBigQueryConfigWithGlobalOptionsAndHadoopConfiguration.
@Test
public void testConfigViaSparkBigQueryConfigWithGlobalOptionsAndHadoopConfiguration() throws URISyntaxException {
HashMap<String, String> sparkConfigOptions = new HashMap<>();
sparkConfigOptions.put("table", "dataset.table");
ImmutableMap<String, String> globalOptions = SparkBigQueryConfig.normalizeConf(defaultGlobalOptions);
DataSourceOptions options = new DataSourceOptions(sparkConfigOptions);
SparkBigQueryConfig sparkConfig = SparkBigQueryConfig.from(// contains only one key "table"
options.asMap(), globalOptions, defaultHadoopConfiguration, 10, new SQLConf(), "2.4.0", Optional.empty());
SparkBigQueryProxyAndHttpConfig config = (SparkBigQueryProxyAndHttpConfig) sparkConfig.getBigQueryProxyConfig();
assertThat(config.getProxyUri()).isEqualTo(Optional.of(getURI("http", "bq-connector-host-global", 1234)));
assertThat(config.getProxyUsername()).isEqualTo(Optional.of("bq-connector-user-global"));
assertThat(config.getProxyPassword()).isEqualTo(Optional.of("bq-connector-password-global"));
assertThat(config.getHttpMaxRetry()).isEqualTo(Optional.of(20));
assertThat(config.getHttpConnectTimeout()).isEqualTo(Optional.of(20000));
assertThat(config.getHttpReadTimeout()).isEqualTo(Optional.of(30000));
}
Aggregations