Search in sources :

Example 1 with StringStartsWith

use of org.apache.spark.sql.sources.StringStartsWith in project iceberg by apache.

the class TestFilteredScan method testPartitionedByIdNotStartsWith.

@Test
public void testPartitionedByIdNotStartsWith() {
    Table table = buildPartitionedTable("partitioned_by_id", PARTITION_BY_ID, "id_ident", "id");
    CaseInsensitiveStringMap options = new CaseInsensitiveStringMap(ImmutableMap.of("path", table.location()));
    SparkScanBuilder builder = new SparkScanBuilder(spark, TABLES.load(options.get("path")), options);
    pushFilters(builder, new Not(new StringStartsWith("data", "junc")));
    Batch scan = builder.build().toBatch();
    Assert.assertEquals(9, scan.planInputPartitions().length);
}
Also used : Not(org.apache.spark.sql.sources.Not) Table(org.apache.iceberg.Table) StringStartsWith(org.apache.spark.sql.sources.StringStartsWith) Batch(org.apache.spark.sql.connector.read.Batch) CaseInsensitiveStringMap(org.apache.spark.sql.util.CaseInsensitiveStringMap) Test(org.junit.Test)

Example 2 with StringStartsWith

use of org.apache.spark.sql.sources.StringStartsWith in project iceberg by apache.

the class TestFilteredScan method testPartitionedByDataStartsWithFilter.

@Test
public void testPartitionedByDataStartsWithFilter() {
    Table table = buildPartitionedTable("partitioned_by_data", PARTITION_BY_DATA, "data_ident", "data");
    CaseInsensitiveStringMap options = new CaseInsensitiveStringMap(ImmutableMap.of("path", table.location()));
    SparkScanBuilder builder = new SparkScanBuilder(spark, TABLES.load(options.get("path")), options);
    pushFilters(builder, new StringStartsWith("data", "junc"));
    Batch scan = builder.build().toBatch();
    Assert.assertEquals(1, scan.planInputPartitions().length);
}
Also used : Table(org.apache.iceberg.Table) StringStartsWith(org.apache.spark.sql.sources.StringStartsWith) Batch(org.apache.spark.sql.connector.read.Batch) CaseInsensitiveStringMap(org.apache.spark.sql.util.CaseInsensitiveStringMap) Test(org.junit.Test)

Example 3 with StringStartsWith

use of org.apache.spark.sql.sources.StringStartsWith in project iceberg by apache.

the class TestFilteredScan method testPartitionedByIdStartsWith.

@Test
public void testPartitionedByIdStartsWith() {
    Table table = buildPartitionedTable("partitioned_by_id", PARTITION_BY_ID, "id_ident", "id");
    CaseInsensitiveStringMap options = new CaseInsensitiveStringMap(ImmutableMap.of("path", table.location()));
    SparkScanBuilder builder = new SparkScanBuilder(spark, TABLES.load(options.get("path")), options);
    pushFilters(builder, new StringStartsWith("data", "junc"));
    Batch scan = builder.build().toBatch();
    Assert.assertEquals(1, scan.planInputPartitions().length);
}
Also used : Table(org.apache.iceberg.Table) StringStartsWith(org.apache.spark.sql.sources.StringStartsWith) Batch(org.apache.spark.sql.connector.read.Batch) CaseInsensitiveStringMap(org.apache.spark.sql.util.CaseInsensitiveStringMap) Test(org.junit.Test)

Example 4 with StringStartsWith

use of org.apache.spark.sql.sources.StringStartsWith in project spark-cassandra-bulkreader by jberragan.

the class DataLayerUnsupportedPushDownFiltersTest method testUnsupportedFilters.

@Test
public void testUnsupportedFilters() {
    runTest((partitioner, dir, bridge) -> {
        final TestSchema schema = TestSchema.basic(bridge);
        final List<Path> dataFiles = getFileType(dir, DataLayer.FileType.DATA).collect(Collectors.toList());
        final TestDataLayer dataLayer = new TestDataLayer(bridge, dataFiles, schema.buildSchema());
        List<Filter> unsupportedFilterList = Arrays.asList(new EqualNullSafe("a", 5), new GreaterThan("a", 5), new GreaterThanOrEqual("a", 5), new LessThan("a", 5), new LessThanOrEqual("a", 5), new IsNull("a"), new IsNotNull("a"), new And(new EqualTo("a", 5), new EqualTo("b", 6)), new Or(new EqualTo("a", 5), new EqualTo("b", 6)), new Not(new In("a", new Object[] { 5, 6, 7 })), new StringStartsWith("a", "abc"), new StringEndsWith("a", "abc"), new StringContains("a", "abc"));
        for (Filter unsupportedFilter : unsupportedFilterList) {
            Filter[] allFilters = { unsupportedFilter };
            Filter[] unsupportedFilters = dataLayer.unsupportedPushDownFilters(allFilters);
            assertNotNull(unsupportedFilters);
            // not supported
            assertEquals(1, unsupportedFilters.length);
        }
    });
}
Also used : Path(java.nio.file.Path) Or(org.apache.spark.sql.sources.Or) StringStartsWith(org.apache.spark.sql.sources.StringStartsWith) EqualNullSafe(org.apache.spark.sql.sources.EqualNullSafe) LessThanOrEqual(org.apache.spark.sql.sources.LessThanOrEqual) In(org.apache.spark.sql.sources.In) StringEndsWith(org.apache.spark.sql.sources.StringEndsWith) TestSchema(org.apache.cassandra.spark.TestSchema) GreaterThanOrEqual(org.apache.spark.sql.sources.GreaterThanOrEqual) EqualTo(org.apache.spark.sql.sources.EqualTo) LessThan(org.apache.spark.sql.sources.LessThan) IsNotNull(org.apache.spark.sql.sources.IsNotNull) Not(org.apache.spark.sql.sources.Not) Filter(org.apache.spark.sql.sources.Filter) GreaterThan(org.apache.spark.sql.sources.GreaterThan) And(org.apache.spark.sql.sources.And) TestDataLayer(org.apache.cassandra.spark.TestDataLayer) IsNull(org.apache.spark.sql.sources.IsNull) StringContains(org.apache.spark.sql.sources.StringContains) TestUtils.runTest(org.apache.cassandra.spark.TestUtils.runTest) Test(org.junit.Test)

Example 5 with StringStartsWith

use of org.apache.spark.sql.sources.StringStartsWith in project iceberg by apache.

the class SparkFilters method convert.

public static Expression convert(Filter filter) {
    // avoid using a chain of if instanceof statements by mapping to the expression enum.
    Operation op = FILTERS.get(filter.getClass());
    if (op != null) {
        switch(op) {
            case TRUE:
                return Expressions.alwaysTrue();
            case FALSE:
                return Expressions.alwaysFalse();
            case IS_NULL:
                IsNull isNullFilter = (IsNull) filter;
                return isNull(unquote(isNullFilter.attribute()));
            case NOT_NULL:
                IsNotNull notNullFilter = (IsNotNull) filter;
                return notNull(unquote(notNullFilter.attribute()));
            case LT:
                LessThan lt = (LessThan) filter;
                return lessThan(unquote(lt.attribute()), convertLiteral(lt.value()));
            case LT_EQ:
                LessThanOrEqual ltEq = (LessThanOrEqual) filter;
                return lessThanOrEqual(unquote(ltEq.attribute()), convertLiteral(ltEq.value()));
            case GT:
                GreaterThan gt = (GreaterThan) filter;
                return greaterThan(unquote(gt.attribute()), convertLiteral(gt.value()));
            case GT_EQ:
                GreaterThanOrEqual gtEq = (GreaterThanOrEqual) filter;
                return greaterThanOrEqual(unquote(gtEq.attribute()), convertLiteral(gtEq.value()));
            case // used for both eq and null-safe-eq
            EQ:
                if (filter instanceof EqualTo) {
                    EqualTo eq = (EqualTo) filter;
                    // comparison with null in normal equality is always null. this is probably a mistake.
                    Preconditions.checkNotNull(eq.value(), "Expression is always false (eq is not null-safe): %s", filter);
                    return handleEqual(unquote(eq.attribute()), eq.value());
                } else {
                    EqualNullSafe eq = (EqualNullSafe) filter;
                    if (eq.value() == null) {
                        return isNull(unquote(eq.attribute()));
                    } else {
                        return handleEqual(unquote(eq.attribute()), eq.value());
                    }
                }
            case IN:
                In inFilter = (In) filter;
                return in(unquote(inFilter.attribute()), Stream.of(inFilter.values()).filter(Objects::nonNull).map(SparkFilters::convertLiteral).collect(Collectors.toList()));
            case NOT:
                Not notFilter = (Not) filter;
                Filter childFilter = notFilter.child();
                Operation childOp = FILTERS.get(childFilter.getClass());
                if (childOp == Operation.IN) {
                    // infer an extra notNull predicate for Spark NOT IN filters
                    // as Iceberg expressions don't follow the 3-value SQL boolean logic
                    // col NOT IN (1, 2) in Spark is equivalent to notNull(col) && notIn(col, 1, 2) in Iceberg
                    In childInFilter = (In) childFilter;
                    Expression notIn = notIn(unquote(childInFilter.attribute()), Stream.of(childInFilter.values()).map(SparkFilters::convertLiteral).collect(Collectors.toList()));
                    return and(notNull(childInFilter.attribute()), notIn);
                } else if (hasNoInFilter(childFilter)) {
                    Expression child = convert(childFilter);
                    if (child != null) {
                        return not(child);
                    }
                }
                return null;
            case AND:
                {
                    And andFilter = (And) filter;
                    Expression left = convert(andFilter.left());
                    Expression right = convert(andFilter.right());
                    if (left != null && right != null) {
                        return and(left, right);
                    }
                    return null;
                }
            case OR:
                {
                    Or orFilter = (Or) filter;
                    Expression left = convert(orFilter.left());
                    Expression right = convert(orFilter.right());
                    if (left != null && right != null) {
                        return or(left, right);
                    }
                    return null;
                }
            case STARTS_WITH:
                {
                    StringStartsWith stringStartsWith = (StringStartsWith) filter;
                    return startsWith(unquote(stringStartsWith.attribute()), stringStartsWith.value());
                }
        }
    }
    return null;
}
Also used : Or(org.apache.spark.sql.sources.Or) StringStartsWith(org.apache.spark.sql.sources.StringStartsWith) LessThanOrEqual(org.apache.spark.sql.sources.LessThanOrEqual) EqualNullSafe(org.apache.spark.sql.sources.EqualNullSafe) Expressions.notIn(org.apache.iceberg.expressions.Expressions.notIn) In(org.apache.spark.sql.sources.In) Operation(org.apache.iceberg.expressions.Expression.Operation) GreaterThanOrEqual(org.apache.spark.sql.sources.GreaterThanOrEqual) EqualTo(org.apache.spark.sql.sources.EqualTo) IsNotNull(org.apache.spark.sql.sources.IsNotNull) LessThan(org.apache.spark.sql.sources.LessThan) Not(org.apache.spark.sql.sources.Not) GreaterThan(org.apache.spark.sql.sources.GreaterThan) Filter(org.apache.spark.sql.sources.Filter) Expression(org.apache.iceberg.expressions.Expression) And(org.apache.spark.sql.sources.And) IsNull(org.apache.spark.sql.sources.IsNull)

Aggregations

StringStartsWith (org.apache.spark.sql.sources.StringStartsWith)6 Test (org.junit.Test)5 Table (org.apache.iceberg.Table)4 Batch (org.apache.spark.sql.connector.read.Batch)4 Not (org.apache.spark.sql.sources.Not)4 CaseInsensitiveStringMap (org.apache.spark.sql.util.CaseInsensitiveStringMap)4 And (org.apache.spark.sql.sources.And)2 EqualNullSafe (org.apache.spark.sql.sources.EqualNullSafe)2 EqualTo (org.apache.spark.sql.sources.EqualTo)2 Filter (org.apache.spark.sql.sources.Filter)2 GreaterThan (org.apache.spark.sql.sources.GreaterThan)2 GreaterThanOrEqual (org.apache.spark.sql.sources.GreaterThanOrEqual)2 In (org.apache.spark.sql.sources.In)2 IsNotNull (org.apache.spark.sql.sources.IsNotNull)2 IsNull (org.apache.spark.sql.sources.IsNull)2 LessThan (org.apache.spark.sql.sources.LessThan)2 LessThanOrEqual (org.apache.spark.sql.sources.LessThanOrEqual)2 Or (org.apache.spark.sql.sources.Or)2 Path (java.nio.file.Path)1 TestDataLayer (org.apache.cassandra.spark.TestDataLayer)1