Search in sources :

Example 11 with SQLConf

use of org.apache.spark.sql.internal.SQLConf in project spark-bigquery-connector by GoogleCloudDataproc.

the class SparkBigQueryConfigTest method testGetTableIdWithoutThePartition_PartitionMissing.

@Test
public void testGetTableIdWithoutThePartition_PartitionMissing() {
    Configuration hadoopConfiguration = new Configuration();
    DataSourceOptions options = new DataSourceOptions(defaultOptions);
    SparkBigQueryConfig config = SparkBigQueryConfig.from(options.asMap(), ImmutableMap.of(), hadoopConfiguration, DEFAULT_PARALLELISM, new SQLConf(), SPARK_VERSION, Optional.empty());
    assertThat(config.getTableIdWithoutThePartition().getTable()).isEqualTo(config.getTableId().getTable());
    assertThat(config.getTableIdWithoutThePartition().getDataset()).isEqualTo(config.getTableId().getDataset());
    assertThat(config.getTableIdWithoutThePartition().getProject()).isEqualTo(config.getTableId().getProject());
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DataSourceOptions(org.apache.spark.sql.sources.v2.DataSourceOptions) SQLConf(org.apache.spark.sql.internal.SQLConf) Test(org.junit.Test)

Example 12 with SQLConf

use of org.apache.spark.sql.internal.SQLConf in project spark-bigquery-connector by GoogleCloudDataproc.

the class SparkBigQueryConfigTest method testConfigFromOptions.

@Test
public void testConfigFromOptions() {
    Configuration hadoopConfiguration = new Configuration();
    DataSourceOptions options = new DataSourceOptions(ImmutableMap.<String, String>builder().put("table", "test_t").put("dataset", "test_d").put("project", "test_p").put("filter", "test > 0").put("parentProject", "test_pp").put("maxParallelism", "99").put("viewsEnabled", "true").put("viewMaterializationProject", "vmp").put("viewMaterializationDataset", "vmd").put("materializationExpirationTimeInMinutes", "100").put("readDataFormat", "ARROW").put("optimizedEmptyProjection", "false").put("createDisposition", "CREATE_NEVER").put("temporaryGcsBucket", "some_bucket").put("intermediateFormat", "ORC").put("useAvroLogicalTypes", "true").put("partitionRequireFilter", "true").put("partitionType", "HOUR").put("partitionField", "some_field").put("partitionExpirationMs", "999").put("clusteredFields", "field1,field2").put("allowFieldAddition", "true").put("allowFieldRelaxation", "true").put("httpConnectTimeout", "10000").put("httpReadTimeout", "20000").put("httpMaxRetry", "5").put("arrowCompressionCodec", "ZSTD").put("writeMethod", "direct").put("cacheExpirationTimeInMinutes", "100").put("traceJobId", "traceJobId").put("traceApplicationName", "traceApplicationName").put("bigQueryJobLabel.foo", "bar").build());
    SparkBigQueryConfig config = SparkBigQueryConfig.from(options.asMap(), ImmutableMap.of(), hadoopConfiguration, DEFAULT_PARALLELISM, new SQLConf(), SPARK_VERSION, Optional.empty());
    assertThat(config.getTableId()).isEqualTo(TableId.of("test_p", "test_d", "test_t"));
    assertThat(config.getFilter()).isEqualTo(Optional.of("test > 0"));
    assertThat(config.getSchema()).isEqualTo(Optional.empty());
    assertThat(config.getMaxParallelism()).isEqualTo(OptionalInt.of(99));
    assertThat(config.getTemporaryGcsBucket()).isEqualTo(Optional.of("some_bucket"));
    assertThat(config.getIntermediateFormat()).isEqualTo(SparkBigQueryConfig.IntermediateFormat.ORC);
    assertThat(config.getReadDataFormat()).isEqualTo(DataFormat.ARROW);
    assertThat(config.getMaterializationProject()).isEqualTo(Optional.of("vmp"));
    assertThat(config.getMaterializationDataset()).isEqualTo(Optional.of("vmd"));
    assertThat(config.getPartitionType()).isEqualTo(Optional.of(TimePartitioning.Type.HOUR));
    assertThat(config.getPartitionField()).isEqualTo(Optional.of("some_field"));
    assertThat(config.getPartitionExpirationMs()).isEqualTo(OptionalLong.of(999));
    assertThat(config.getPartitionRequireFilter()).isEqualTo(Optional.of(true));
    assertThat(config.getClusteredFields().get()).isEqualTo(ImmutableList.of("field1", "field2"));
    assertThat(config.getCreateDisposition()).isEqualTo(Optional.of(JobInfo.CreateDisposition.CREATE_NEVER));
    assertThat(config.getLoadSchemaUpdateOptions()).isEqualTo(ImmutableList.of(JobInfo.SchemaUpdateOption.ALLOW_FIELD_ADDITION, JobInfo.SchemaUpdateOption.ALLOW_FIELD_RELAXATION));
    assertThat(config.getMaterializationExpirationTimeInMinutes()).isEqualTo(100);
    assertThat(config.getMaxReadRowsRetries()).isEqualTo(3);
    assertThat(config.isUseAvroLogicalTypes()).isTrue();
    assertThat(config.getBigQueryClientConnectTimeout()).isEqualTo(10000);
    assertThat(config.getBigQueryClientReadTimeout()).isEqualTo(20000);
    assertThat(config.getBigQueryClientRetrySettings().getMaxAttempts()).isEqualTo(5);
    assertThat(config.getArrowCompressionCodec()).isEqualTo(CompressionCodec.ZSTD);
    assertThat(config.getWriteMethod()).isEqualTo(SparkBigQueryConfig.WriteMethod.DIRECT);
    assertThat(config.getCacheExpirationTimeInMinutes()).isEqualTo(100);
    assertThat(config.getTraceId()).isEqualTo(Optional.of("Spark:traceApplicationName:traceJobId"));
    assertThat(config.getBigQueryJobLabels()).hasSize(1);
    assertThat(config.getBigQueryJobLabels()).containsEntry("foo", "bar");
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DataSourceOptions(org.apache.spark.sql.sources.v2.DataSourceOptions) SQLConf(org.apache.spark.sql.internal.SQLConf) Test(org.junit.Test)

Example 13 with SQLConf

use of org.apache.spark.sql.internal.SQLConf in project spark-bigquery-connector by GoogleCloudDataproc.

the class SparkBigQueryProxyAndHttpConfigTest method testConfigViaSparkBigQueryConfigWithAllThreeParameters.

@Test
public void testConfigViaSparkBigQueryConfigWithAllThreeParameters() throws URISyntaxException {
    HashMap<String, String> sparkConfigOptions = new HashMap<>(defaultOptions);
    sparkConfigOptions.put("table", "dataset.table");
    ImmutableMap<String, String> globalOptions = SparkBigQueryConfig.normalizeConf(defaultGlobalOptions);
    DataSourceOptions options = new DataSourceOptions(sparkConfigOptions);
    SparkBigQueryConfig sparkConfig = SparkBigQueryConfig.from(options.asMap(), globalOptions, defaultHadoopConfiguration, 10, new SQLConf(), "2.4.0", Optional.empty());
    SparkBigQueryProxyAndHttpConfig config = (SparkBigQueryProxyAndHttpConfig) sparkConfig.getBigQueryProxyConfig();
    assertThat(config.getProxyUri()).isEqualTo(Optional.of(getURI("http", "bq-connector-host", 1234)));
    assertThat(config.getProxyUsername()).isEqualTo(Optional.of("bq-connector-user"));
    assertThat(config.getProxyPassword()).isEqualTo(Optional.of("bq-connector-password"));
    assertThat(config.getHttpMaxRetry()).isEqualTo(Optional.of(10));
    assertThat(config.getHttpConnectTimeout()).isEqualTo(Optional.of(10000));
    assertThat(config.getHttpReadTimeout()).isEqualTo(Optional.of(20000));
}
Also used : DataSourceOptions(org.apache.spark.sql.sources.v2.DataSourceOptions) SQLConf(org.apache.spark.sql.internal.SQLConf) Test(org.junit.Test)

Example 14 with SQLConf

use of org.apache.spark.sql.internal.SQLConf in project spark-bigquery-connector by GoogleCloudDataproc.

the class SparkBigQueryProxyAndHttpConfigTest method testConfigViaSparkBigQueryConfigWithGlobalOptionsAndHadoopConfiguration.

@Test
public void testConfigViaSparkBigQueryConfigWithGlobalOptionsAndHadoopConfiguration() throws URISyntaxException {
    HashMap<String, String> sparkConfigOptions = new HashMap<>();
    sparkConfigOptions.put("table", "dataset.table");
    ImmutableMap<String, String> globalOptions = SparkBigQueryConfig.normalizeConf(defaultGlobalOptions);
    DataSourceOptions options = new DataSourceOptions(sparkConfigOptions);
    SparkBigQueryConfig sparkConfig = SparkBigQueryConfig.from(// contains only one key "table"
    options.asMap(), globalOptions, defaultHadoopConfiguration, 10, new SQLConf(), "2.4.0", Optional.empty());
    SparkBigQueryProxyAndHttpConfig config = (SparkBigQueryProxyAndHttpConfig) sparkConfig.getBigQueryProxyConfig();
    assertThat(config.getProxyUri()).isEqualTo(Optional.of(getURI("http", "bq-connector-host-global", 1234)));
    assertThat(config.getProxyUsername()).isEqualTo(Optional.of("bq-connector-user-global"));
    assertThat(config.getProxyPassword()).isEqualTo(Optional.of("bq-connector-password-global"));
    assertThat(config.getHttpMaxRetry()).isEqualTo(Optional.of(20));
    assertThat(config.getHttpConnectTimeout()).isEqualTo(Optional.of(20000));
    assertThat(config.getHttpReadTimeout()).isEqualTo(Optional.of(30000));
}
Also used : DataSourceOptions(org.apache.spark.sql.sources.v2.DataSourceOptions) SQLConf(org.apache.spark.sql.internal.SQLConf) Test(org.junit.Test)

Aggregations

SQLConf (org.apache.spark.sql.internal.SQLConf)14 DataSourceOptions (org.apache.spark.sql.sources.v2.DataSourceOptions)10 Test (org.junit.Test)10 Configuration (org.apache.hadoop.conf.Configuration)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 ObjectOutputStream (java.io.ObjectOutputStream)1 Dataset (org.apache.spark.sql.Dataset)1 Row (org.apache.spark.sql.Row)1 SparkSession (org.apache.spark.sql.SparkSession)1 LogicalPlan (org.apache.spark.sql.catalyst.plans.logical.LogicalPlan)1 Distribution (org.apache.spark.sql.connector.distributions.Distribution)1 SortOrder (org.apache.spark.sql.connector.expressions.SortOrder)1