Search in sources :

Example 1 with SQLConf

use of org.apache.spark.sql.internal.SQLConf in project iceberg by apache.

the class IcebergSourceBenchmark method withSQLConf.

protected void withSQLConf(Map<String, String> conf, Action action) {
    SQLConf sqlConf = SQLConf.get();
    Map<String, String> currentConfValues = Maps.newHashMap();
    conf.keySet().forEach(confKey -> {
        if (sqlConf.contains(confKey)) {
            String currentConfValue = sqlConf.getConfString(confKey);
            currentConfValues.put(confKey, currentConfValue);
        }
    });
    conf.forEach((confKey, confValue) -> {
        if (SQLConf.isStaticConfigKey(confKey)) {
            throw new RuntimeException("Cannot modify the value of a static config: " + confKey);
        }
        sqlConf.setConfString(confKey, confValue);
    });
    try {
        action.invoke();
    } finally {
        conf.forEach((confKey, confValue) -> {
            if (currentConfValues.containsKey(confKey)) {
                sqlConf.setConfString(confKey, currentConfValues.get(confKey));
            } else {
                sqlConf.unsetConf(confKey);
            }
        });
    }
}
Also used : SQLConf(org.apache.spark.sql.internal.SQLConf)

Example 2 with SQLConf

use of org.apache.spark.sql.internal.SQLConf in project iceberg by apache.

the class AvroDataTest method withSQLConf.

protected void withSQLConf(Map<String, String> conf, Action action) throws IOException {
    SQLConf sqlConf = SQLConf.get();
    Map<String, String> currentConfValues = Maps.newHashMap();
    conf.keySet().forEach(confKey -> {
        if (sqlConf.contains(confKey)) {
            String currentConfValue = sqlConf.getConfString(confKey);
            currentConfValues.put(confKey, currentConfValue);
        }
    });
    conf.forEach((confKey, confValue) -> {
        if (SQLConf.isStaticConfigKey(confKey)) {
            throw new RuntimeException("Cannot modify the value of a static config: " + confKey);
        }
        sqlConf.setConfString(confKey, confValue);
    });
    try {
        action.invoke();
    } finally {
        conf.forEach((confKey, confValue) -> {
            if (currentConfValues.containsKey(confKey)) {
                sqlConf.setConfString(confKey, currentConfValues.get(confKey));
            } else {
                sqlConf.unsetConf(confKey);
            }
        });
    }
}
Also used : SQLConf(org.apache.spark.sql.internal.SQLConf)

Example 3 with SQLConf

use of org.apache.spark.sql.internal.SQLConf in project spark-bigquery-connector by GoogleCloudDataproc.

the class SparkBigQueryConfigTest method testConfigFromGlobalOptions.

@Test
public void testConfigFromGlobalOptions() {
    Configuration hadoopConfiguration = new Configuration();
    DataSourceOptions options = new DataSourceOptions(ImmutableMap.<String, String>builder().put("table", "dataset.table").build());
    ImmutableMap<String, String> globalOptions = ImmutableMap.<String, String>builder().put("viewsEnabled", "true").put("spark.datasource.bigquery.temporaryGcsBucket", "bucket").put("bqStorageReadEndpoint", "ep").put("bqEncodedCreateReadSessionRequest", "ec").put("writeMethod", "direct").build();
    SparkBigQueryConfig config = SparkBigQueryConfig.from(options.asMap(), globalOptions, hadoopConfiguration, DEFAULT_PARALLELISM, new SQLConf(), SPARK_VERSION, Optional.empty());
    assertThat(config.isViewsEnabled()).isTrue();
    assertThat(config.getTemporaryGcsBucket()).isEqualTo(Optional.of("bucket"));
    assertThat(config.toReadSessionCreatorConfig().endpoint().get()).isEqualTo("ep");
    assertThat(config.toReadSessionCreatorConfig().getRequestEncodedBase().get()).isEqualTo("ec");
    assertThat(config.getWriteMethod()).isEqualTo(SparkBigQueryConfig.WriteMethod.DIRECT);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DataSourceOptions(org.apache.spark.sql.sources.v2.DataSourceOptions) SQLConf(org.apache.spark.sql.internal.SQLConf) Test(org.junit.Test)

Example 4 with SQLConf

use of org.apache.spark.sql.internal.SQLConf in project spark-bigquery-connector by GoogleCloudDataproc.

the class SparkBigQueryConfigTest method testDefaults.

@Test
public void testDefaults() {
    Configuration hadoopConfiguration = new Configuration();
    DataSourceOptions options = new DataSourceOptions(defaultOptions);
    SparkBigQueryConfig config = SparkBigQueryConfig.from(options.asMap(), ImmutableMap.of(), hadoopConfiguration, DEFAULT_PARALLELISM, new SQLConf(), SPARK_VERSION, Optional.empty());
    assertThat(config.getTableId()).isEqualTo(TableId.of("dataset", "table"));
    assertThat(config.getFilter()).isEqualTo(Optional.empty());
    assertThat(config.getSchema()).isEqualTo(Optional.empty());
    assertThat(config.getMaxParallelism()).isEqualTo(OptionalInt.empty());
    assertThat(config.getTemporaryGcsBucket()).isEqualTo(Optional.empty());
    assertThat(config.getIntermediateFormat()).isEqualTo(SparkBigQueryConfig.DEFAULT_INTERMEDIATE_FORMAT);
    assertThat(config.getReadDataFormat()).isEqualTo(SparkBigQueryConfig.DEFAULT_READ_DATA_FORMAT);
    assertThat(config.getMaterializationProject()).isEqualTo(Optional.empty());
    assertThat(config.getMaterializationDataset()).isEqualTo(Optional.empty());
    assertThat(config.getPartitionField()).isEqualTo(Optional.empty());
    assertThat(config.getPartitionExpirationMs()).isEqualTo(OptionalLong.empty());
    assertThat(config.getPartitionRequireFilter()).isEqualTo(Optional.empty());
    assertThat(config.getPartitionType()).isEqualTo(Optional.empty());
    assertThat(config.getClusteredFields()).isEqualTo(Optional.empty());
    assertThat(config.getCreateDisposition()).isEqualTo(Optional.empty());
    assertThat(config.getLoadSchemaUpdateOptions()).isEqualTo(ImmutableList.of());
    assertThat(config.getMaterializationExpirationTimeInMinutes()).isEqualTo(24 * 60);
    assertThat(config.getMaxReadRowsRetries()).isEqualTo(3);
    assertThat(config.isUseAvroLogicalTypes()).isFalse();
    assertThat(config.getBigQueryClientConnectTimeout()).isEqualTo(60 * 1000);
    assertThat(config.getBigQueryClientReadTimeout()).isEqualTo(60 * 1000);
    assertThat(config.getBigQueryClientRetrySettings().getMaxAttempts()).isEqualTo(10);
    assertThat(config.getArrowCompressionCodec()).isEqualTo(CompressionCodec.COMPRESSION_UNSPECIFIED);
    assertThat(config.getWriteMethod()).isEqualTo(SparkBigQueryConfig.WriteMethod.INDIRECT);
    assertThat(config.getCacheExpirationTimeInMinutes()).isEqualTo(SparkBigQueryConfig.DEFAULT_CACHE_EXPIRATION_IN_MINUTES);
    assertThat(config.getTraceId().isPresent()).isFalse();
    assertThat(config.getBigQueryJobLabels()).isEmpty();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DataSourceOptions(org.apache.spark.sql.sources.v2.DataSourceOptions) SQLConf(org.apache.spark.sql.internal.SQLConf) Test(org.junit.Test)

Example 5 with SQLConf

use of org.apache.spark.sql.internal.SQLConf in project spark-bigquery-connector by GoogleCloudDataproc.

the class SparkBigQueryConfigTest method testInvalidCompressionCodec.

@Test
public void testInvalidCompressionCodec() {
    Configuration hadoopConfiguration = new Configuration();
    DataSourceOptions options = new DataSourceOptions(ImmutableMap.<String, String>builder().put("table", "test_t").put("dataset", "test_d").put("project", "test_p").put("arrowCompressionCodec", "randomCompression").build());
    IllegalArgumentException exception = Assert.assertThrows(IllegalArgumentException.class, () -> SparkBigQueryConfig.from(options.asMap(), ImmutableMap.of(), hadoopConfiguration, DEFAULT_PARALLELISM, new SQLConf(), SPARK_VERSION, Optional.empty()));
    assertThat(exception).hasMessageThat().contains("Compression codec 'RANDOMCOMPRESSION' for Arrow is not supported." + " Supported formats are " + Arrays.toString(CompressionCodec.values()));
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DataSourceOptions(org.apache.spark.sql.sources.v2.DataSourceOptions) SQLConf(org.apache.spark.sql.internal.SQLConf) Test(org.junit.Test)

Aggregations

SQLConf (org.apache.spark.sql.internal.SQLConf)14 DataSourceOptions (org.apache.spark.sql.sources.v2.DataSourceOptions)10 Test (org.junit.Test)10 Configuration (org.apache.hadoop.conf.Configuration)7 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 ObjectOutputStream (java.io.ObjectOutputStream)1 Dataset (org.apache.spark.sql.Dataset)1 Row (org.apache.spark.sql.Row)1 SparkSession (org.apache.spark.sql.SparkSession)1 LogicalPlan (org.apache.spark.sql.catalyst.plans.logical.LogicalPlan)1 Distribution (org.apache.spark.sql.connector.distributions.Distribution)1 SortOrder (org.apache.spark.sql.connector.expressions.SortOrder)1