use of org.apache.spark.sql.sources.v2.reader.DataSourceReader in project iceberg by apache.
the class IcebergSource method createReader.
@Override
public DataSourceReader createReader(StructType readSchema, DataSourceOptions options) {
Configuration conf = new Configuration(lazyBaseConf());
Table table = getTableAndResolveHadoopConfiguration(options, conf);
String caseSensitive = lazySparkSession().conf().get("spark.sql.caseSensitive");
Reader reader = new Reader(lazySparkSession(), table, Boolean.parseBoolean(caseSensitive), options);
if (readSchema != null) {
// convert() will fail if readSchema contains fields not in reader.snapshotSchema()
SparkSchemaUtil.convert(reader.snapshotSchema(), readSchema);
reader.pruneColumns(readSchema);
}
return reader;
}
use of org.apache.spark.sql.sources.v2.reader.DataSourceReader in project iceberg by apache.
the class TestFilteredScan method pushFilters.
private void pushFilters(DataSourceReader reader, Filter... filters) {
Assertions.assertThat(reader).isInstanceOf(SupportsPushDownFilters.class);
SupportsPushDownFilters filterable = (SupportsPushDownFilters) reader;
filterable.pushFilters(filters);
}
use of org.apache.spark.sql.sources.v2.reader.DataSourceReader in project iceberg by apache.
the class TestFilteredScan method testInFilter.
@Test
public void testInFilter() {
File location = buildPartitionedTable("partitioned_by_data", PARTITION_BY_DATA, "data_ident", "data");
DataSourceOptions options = new DataSourceOptions(ImmutableMap.of("path", location.toString()));
IcebergSource source = new IcebergSource();
DataSourceReader reader = source.createReader(options);
pushFilters(reader, new In("data", new String[] { "foo", "junction", "brush", null }));
Assert.assertEquals(2, reader.planInputPartitions().size());
}
use of org.apache.spark.sql.sources.v2.reader.DataSourceReader in project iceberg by apache.
the class TestFilteredScan method testInFilterForTimestamp.
@Test
public void testInFilterForTimestamp() {
File location = buildPartitionedTable("partitioned_by_hour", PARTITION_BY_HOUR, "ts_hour", "ts");
DataSourceOptions options = new DataSourceOptions(ImmutableMap.of("path", location.toString()));
IcebergSource source = new IcebergSource();
DataSourceReader reader = source.createReader(options);
pushFilters(reader, new In("ts", new Timestamp[] { new Timestamp(instant("2017-12-22T00:00:00.123+00:00") / 1000), new Timestamp(instant("2017-12-22T09:20:44.294+00:00") / 1000), new Timestamp(instant("2017-12-22T00:34:00.184+00:00") / 1000), new Timestamp(instant("2017-12-21T15:15:16.230+00:00") / 1000), null }));
Assert.assertEquals("Should create 1 task for 2017-12-21: 15", 1, reader.planInputPartitions().size());
}
Aggregations