use of org.apache.spark.sql.util.CaseInsensitiveStringMap in project iceberg by apache.
the class TestFilteredScan method testPartitionedByIdNotStartsWith.
@Test
public void testPartitionedByIdNotStartsWith() {
Table table = buildPartitionedTable("partitioned_by_id", PARTITION_BY_ID, "id_ident", "id");
CaseInsensitiveStringMap options = new CaseInsensitiveStringMap(ImmutableMap.of("path", table.location()));
SparkScanBuilder builder = new SparkScanBuilder(spark, TABLES.load(options.get("path")), options);
pushFilters(builder, new Not(new StringStartsWith("data", "junc")));
Batch scan = builder.build().toBatch();
Assert.assertEquals(9, scan.planInputPartitions().length);
}
use of org.apache.spark.sql.util.CaseInsensitiveStringMap in project iceberg by apache.
the class TestFilteredScan method testHourPartitionedTimestampFilters.
@SuppressWarnings("checkstyle:AvoidNestedBlocks")
@Test
public void testHourPartitionedTimestampFilters() {
Table table = buildPartitionedTable("partitioned_by_hour", PARTITION_BY_HOUR, "ts_hour", "ts");
CaseInsensitiveStringMap options = new CaseInsensitiveStringMap(ImmutableMap.of("path", table.location()));
Batch unfiltered = new SparkScanBuilder(spark, TABLES.load(options.get("path")), options).build().toBatch();
Assert.assertEquals("Unfiltered table should created 9 read tasks", 9, unfiltered.planInputPartitions().length);
{
SparkScanBuilder builder = new SparkScanBuilder(spark, TABLES.load(options.get("path")), options);
pushFilters(builder, LessThan.apply("ts", "2017-12-22T00:00:00+00:00"));
Batch scan = builder.build().toBatch();
InputPartition[] tasks = scan.planInputPartitions();
Assert.assertEquals("Should create 4 tasks for 2017-12-21: 15, 17, 21, 22", 4, tasks.length);
assertEqualsSafe(SCHEMA.asStruct(), expected(8, 9, 7, 6, 5), read(table.location(), vectorized, "ts < cast('2017-12-22 00:00:00+00:00' as timestamp)"));
}
{
SparkScanBuilder builder = new SparkScanBuilder(spark, TABLES.load(options.get("path")), options);
pushFilters(builder, And.apply(GreaterThan.apply("ts", "2017-12-22T06:00:00+00:00"), LessThan.apply("ts", "2017-12-22T08:00:00+00:00")));
Batch scan = builder.build().toBatch();
InputPartition[] tasks = scan.planInputPartitions();
Assert.assertEquals("Should create 2 tasks for 2017-12-22: 6, 7", 2, tasks.length);
assertEqualsSafe(SCHEMA.asStruct(), expected(2, 1), read(table.location(), vectorized, "ts > cast('2017-12-22 06:00:00+00:00' as timestamp) and " + "ts < cast('2017-12-22 08:00:00+00:00' as timestamp)"));
}
}
use of org.apache.spark.sql.util.CaseInsensitiveStringMap in project iceberg by apache.
the class TestFilteredScan method testUnpartitionedIDFilters.
@Test
public void testUnpartitionedIDFilters() {
CaseInsensitiveStringMap options = new CaseInsensitiveStringMap(ImmutableMap.of("path", unpartitioned.toString()));
SparkScanBuilder builder = new SparkScanBuilder(spark, TABLES.load(options.get("path")), options);
for (int i = 0; i < 10; i += 1) {
pushFilters(builder, EqualTo.apply("id", i));
Batch scan = builder.build().toBatch();
InputPartition[] partitions = scan.planInputPartitions();
Assert.assertEquals("Should only create one task for a small file", 1, partitions.length);
// validate row filtering
assertEqualsSafe(SCHEMA.asStruct(), expected(i), read(unpartitioned.toString(), vectorized, "id = " + i));
}
}
use of org.apache.spark.sql.util.CaseInsensitiveStringMap in project iceberg by apache.
the class TestFilteredScan method testPartitionedByDataStartsWithFilter.
@Test
public void testPartitionedByDataStartsWithFilter() {
Table table = buildPartitionedTable("partitioned_by_data", PARTITION_BY_DATA, "data_ident", "data");
CaseInsensitiveStringMap options = new CaseInsensitiveStringMap(ImmutableMap.of("path", table.location()));
SparkScanBuilder builder = new SparkScanBuilder(spark, TABLES.load(options.get("path")), options);
pushFilters(builder, new StringStartsWith("data", "junc"));
Batch scan = builder.build().toBatch();
Assert.assertEquals(1, scan.planInputPartitions().length);
}
use of org.apache.spark.sql.util.CaseInsensitiveStringMap in project iceberg by apache.
the class TestFilteredScan method testPartitionedByIdStartsWith.
@Test
public void testPartitionedByIdStartsWith() {
Table table = buildPartitionedTable("partitioned_by_id", PARTITION_BY_ID, "id_ident", "id");
CaseInsensitiveStringMap options = new CaseInsensitiveStringMap(ImmutableMap.of("path", table.location()));
SparkScanBuilder builder = new SparkScanBuilder(spark, TABLES.load(options.get("path")), options);
pushFilters(builder, new StringStartsWith("data", "junc"));
Batch scan = builder.build().toBatch();
Assert.assertEquals(1, scan.planInputPartitions().length);
}
Aggregations