use of org.apache.spark.sql.sources.v2.DataSourceOptions in project iceberg by apache.
the class IcebergSource method createWriter.
@Override
public Optional<DataSourceWriter> createWriter(String jobId, StructType dsStruct, SaveMode mode, DataSourceOptions options) {
Preconditions.checkArgument(mode == SaveMode.Append || mode == SaveMode.Overwrite, "Save mode %s is not supported", mode);
Configuration conf = new Configuration(lazyBaseConf());
Table table = getTableAndResolveHadoopConfiguration(options, conf);
SparkWriteConf writeConf = new SparkWriteConf(lazySparkSession(), table, options.asMap());
Preconditions.checkArgument(writeConf.handleTimestampWithoutZone() || !SparkUtil.hasTimestampWithoutZone(table.schema()), SparkUtil.TIMESTAMP_WITHOUT_TIMEZONE_ERROR);
Schema writeSchema = SparkSchemaUtil.convert(table.schema(), dsStruct);
TypeUtil.validateWriteSchema(table.schema(), writeSchema, writeConf.checkNullability(), writeConf.checkOrdering());
SparkUtil.validatePartitionTransforms(table.spec());
String appId = lazySparkSession().sparkContext().applicationId();
String wapId = writeConf.wapId();
boolean replacePartitions = mode == SaveMode.Overwrite;
return Optional.of(new Writer(lazySparkSession(), table, writeConf, replacePartitions, appId, wapId, writeSchema, dsStruct));
}
use of org.apache.spark.sql.sources.v2.DataSourceOptions in project iceberg by apache.
the class IcebergSource method createReader.
@Override
public DataSourceReader createReader(StructType readSchema, DataSourceOptions options) {
Configuration conf = new Configuration(lazyBaseConf());
Table table = getTableAndResolveHadoopConfiguration(options, conf);
String caseSensitive = lazySparkSession().conf().get("spark.sql.caseSensitive");
Reader reader = new Reader(lazySparkSession(), table, Boolean.parseBoolean(caseSensitive), options);
if (readSchema != null) {
// convert() will fail if readSchema contains fields not in reader.snapshotSchema()
SparkSchemaUtil.convert(reader.snapshotSchema(), readSchema);
reader.pruneColumns(readSchema);
}
return reader;
}
use of org.apache.spark.sql.sources.v2.DataSourceOptions in project iceberg by apache.
the class TestFilteredScan method testInFilter.
@Test
public void testInFilter() {
File location = buildPartitionedTable("partitioned_by_data", PARTITION_BY_DATA, "data_ident", "data");
DataSourceOptions options = new DataSourceOptions(ImmutableMap.of("path", location.toString()));
IcebergSource source = new IcebergSource();
DataSourceReader reader = source.createReader(options);
pushFilters(reader, new In("data", new String[] { "foo", "junction", "brush", null }));
Assert.assertEquals(2, reader.planInputPartitions().size());
}
use of org.apache.spark.sql.sources.v2.DataSourceOptions in project iceberg by apache.
the class TestFilteredScan method testInFilterForTimestamp.
@Test
public void testInFilterForTimestamp() {
File location = buildPartitionedTable("partitioned_by_hour", PARTITION_BY_HOUR, "ts_hour", "ts");
DataSourceOptions options = new DataSourceOptions(ImmutableMap.of("path", location.toString()));
IcebergSource source = new IcebergSource();
DataSourceReader reader = source.createReader(options);
pushFilters(reader, new In("ts", new Timestamp[] { new Timestamp(instant("2017-12-22T00:00:00.123+00:00") / 1000), new Timestamp(instant("2017-12-22T09:20:44.294+00:00") / 1000), new Timestamp(instant("2017-12-22T00:34:00.184+00:00") / 1000), new Timestamp(instant("2017-12-21T15:15:16.230+00:00") / 1000), null }));
Assert.assertEquals("Should create 1 task for 2017-12-21: 15", 1, reader.planInputPartitions().size());
}
use of org.apache.spark.sql.sources.v2.DataSourceOptions in project TileDB-Spark by TileDB-Inc.
the class TileDBDataSourceOptionsTest method testTileDBSchemaExtent.
@Test
public void testTileDBSchemaExtent() throws Exception {
HashMap<String, String> optionMap = new HashMap<>();
optionMap.put("uri", "s3://foo/bar");
optionMap.put("schema.dim.0.extent", "10");
optionMap.put("schema.dim.1.extent", "1025.34");
TileDBDataSourceOptions options = new TileDBDataSourceOptions(new DataSourceOptions(optionMap));
Optional<Long> dim0Extent = options.getSchemaDimensionExtentLong(0);
Optional<Double> dim1Extent = options.getSchemaDimensionExtentDouble(1);
Assert.assertTrue(dim0Extent.isPresent());
Assert.assertEquals(Long.valueOf(10), dim0Extent.get());
Assert.assertTrue(dim1Extent.isPresent());
Assert.assertEquals(Double.parseDouble("1025.34"), (double) dim1Extent.get(), 0.001);
Assert.assertFalse(options.getSchemaDimensionExtentLong(2).isPresent());
Assert.assertFalse(options.getSchemaDimensionExtentLong(1).isPresent());
}
Aggregations