Search in sources :

Example 1 with And

use of org.apache.spark.sql.sources.And in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSpecifyPropertyFilters.

@Test
public void testSpecifyPropertyFilters() {
    final Schema schema = getSchema();
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    final Filter[] filters = new Filter[1];
    // GreaterThan
    filters[0] = new GreaterThan("property1", 5);
    FiltersToOperationConverter converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    Operation operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfAllElements);
    View opView = ((GraphFilters) operation).getView();
    List<TupleAdaptedPredicate<String, ?>> entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertThat(entityPostAggFilters).hasSize(1);
    assertArrayEquals(new String[] { "property1" }, entityPostAggFilters.get(0).getSelection());
    assertEquals(new IsMoreThan(5, false), entityPostAggFilters.get(0).getPredicate());
    for (final String edgeGroup : EDGE_GROUPS) {
        final List<TupleAdaptedPredicate<String, ?>> edgePostAggFilters = opView.getEdge(edgeGroup).getPostAggregationFilterFunctions();
        assertThat(edgePostAggFilters).hasSize(1);
        assertArrayEquals(new String[] { "property1" }, edgePostAggFilters.get(0).getSelection());
        assertEquals(new IsMoreThan(5, false), edgePostAggFilters.get(0).getPredicate());
    }
    // LessThan
    filters[0] = new LessThan("property4", 8L);
    converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfAllElements);
    // Only groups ENTITY_GROUP and EDGE_GROUP should be in the view as only they have property4
    opView = ((GraphFilters) operation).getView();
    entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertThat(entityPostAggFilters).hasSize(1);
    assertArrayEquals(new String[] { "property4" }, entityPostAggFilters.get(0).getSelection());
    assertEquals(new IsLessThan(8L, false), entityPostAggFilters.get(0).getPredicate());
    List<TupleAdaptedPredicate<String, ?>> edgePostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
    assertThat(edgePostAggFilters).hasSize(1);
    assertArrayEquals(new String[] { "property4" }, edgePostAggFilters.get(0).getSelection());
    assertEquals(new IsLessThan(8L, false), edgePostAggFilters.get(0).getPredicate());
    // And
    final Filter left = new GreaterThan("property1", 5);
    final Filter right = new GreaterThan("property4", 8L);
    filters[0] = new And(left, right);
    converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfAllElements);
    // Only groups ENTITY_GROUP and EDGE_GROUP should be in the view as only they have property1 and property4
    opView = ((GraphFilters) operation).getView();
    entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertThat(entityPostAggFilters).hasSize(2);
    final ArrayList<String> expectedProperties = new ArrayList<>();
    expectedProperties.add("property1");
    expectedProperties.add("property4");
    assertThat(entityPostAggFilters.get(0).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection()[0]);
    assertThat(entityPostAggFilters.get(1).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection()[0]);
    final ArrayList<Predicate> expectedFunctions = new ArrayList<>();
    expectedFunctions.add(new IsMoreThan(5, false));
    expectedFunctions.add(new IsMoreThan(8L, false));
    assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getPredicate());
    assertEquals(expectedFunctions.get(1), entityPostAggFilters.get(1).getPredicate());
    edgePostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
    assertThat(edgePostAggFilters).hasSize(2);
    assertThat(edgePostAggFilters.get(0).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(0), edgePostAggFilters.get(0).getSelection()[0]);
    assertThat(edgePostAggFilters.get(1).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(1), edgePostAggFilters.get(1).getSelection()[0]);
}
Also used : TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) SparkSession(org.apache.spark.sql.SparkSession) Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) GraphFilters(uk.gov.gchq.gaffer.operation.graph.GraphFilters) Operation(uk.gov.gchq.gaffer.operation.Operation) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) Predicate(java.util.function.Predicate) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) LessThan(org.apache.spark.sql.sources.LessThan) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) Filter(org.apache.spark.sql.sources.Filter) GreaterThan(org.apache.spark.sql.sources.GreaterThan) GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) And(org.apache.spark.sql.sources.And) IsMoreThan(uk.gov.gchq.koryphe.impl.predicate.IsMoreThan) Test(org.junit.jupiter.api.Test)

Example 2 with And

use of org.apache.spark.sql.sources.And in project Gaffer by gchq.

the class FiltersToOperationConverter method getFunctionsFromFilter.

/**
 * Converts a Spark {@link Filter} to a map from group to a list of Gaffer {@link TupleAdaptedPredicate}s.
 * <p>
 * Note that Spark also applies all the filters provided to the {@code buildScan(String[], Filter[])} method
 * so not implementing some of the provided {@link Filter}s in Gaffer will not cause errors. However, as many as
 * possible should be implemented so that as much filtering as possible happens in iterators running in Accumulo's
 * tablet servers (this avoids unnecessary data transfer from Accumulo to Spark).
 *
 * @param filter The {@link Filter} to transform.
 * @return A map from {@link String} to {@link TupleAdaptedPredicate}s implementing the provided {@link Filter}.
 */
private Map<String, List<TupleAdaptedPredicate<String, ?>>> getFunctionsFromFilter(final Filter filter) {
    final Map<String, List<TupleAdaptedPredicate<String, ?>>> map = new HashMap<>();
    if (filter instanceof EqualTo) {
    // Not dealt with as requires a Predicate<?> that returns null if either the controlValue or the
    // test value is null - the API of Predicate<?> doesn't permit this.
    } else if (filter instanceof EqualNullSafe) {
        final EqualNullSafe equalNullSafe = (EqualNullSafe) filter;
        final Predicate<?> isEqual = new IsEqual(equalNullSafe.value());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(isEqual, new String[] { equalNullSafe.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to IsEqual ({})", filter, equalNullSafe.attribute());
    } else if (filter instanceof GreaterThan) {
        final GreaterThan greaterThan = (GreaterThan) filter;
        final Predicate<?> isMoreThan = new IsMoreThan((Comparable<?>) greaterThan.value(), false);
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(isMoreThan, new String[] { greaterThan.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to isMoreThan ({})", filter, greaterThan.attribute());
    } else if (filter instanceof GreaterThanOrEqual) {
        final GreaterThanOrEqual greaterThan = (GreaterThanOrEqual) filter;
        final Predicate<?> isMoreThan = new IsMoreThan((Comparable<?>) greaterThan.value(), true);
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(isMoreThan, new String[] { greaterThan.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to IsMoreThan ({})", filter, greaterThan.attribute());
    } else if (filter instanceof LessThan) {
        final LessThan lessThan = (LessThan) filter;
        final Predicate<?> isLessThan = new IsLessThan((Comparable<?>) lessThan.value(), false);
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(isLessThan, new String[] { lessThan.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to IsLessThan ({})", filter, lessThan.attribute());
    } else if (filter instanceof LessThanOrEqual) {
        final LessThanOrEqual lessThan = (LessThanOrEqual) filter;
        final Predicate<?> isLessThan = new IsLessThan((Comparable<?>) lessThan.value(), true);
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(isLessThan, new String[] { lessThan.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to LessThanOrEqual ({})", filter, lessThan.attribute());
    } else if (filter instanceof In) {
        final In in = (In) filter;
        final Predicate<?> isIn = new IsIn(new HashSet<>(Arrays.asList(in.values())));
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(isIn, new String[] { in.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to IsIn ({})", filter, in.attribute());
    } else if (filter instanceof IsNull) {
        final IsNull isNull = (IsNull) filter;
        final Predicate<?> doesntExist = new Not<>(new Exists());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(doesntExist, new String[] { isNull.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to Not(Exists) ({})", filter, isNull.attribute());
    } else if (filter instanceof IsNotNull) {
        final IsNotNull isNotNull = (IsNotNull) filter;
        final Predicate<?> exists = new Exists();
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(exists, new String[] { isNotNull.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to Exists ({})", filter, isNotNull.attribute());
    } else if (filter instanceof And) {
        final And and = (And) filter;
        final Map<String, List<TupleAdaptedPredicate<String, ?>>> left = getFunctionsFromFilter(and.left());
        final Map<String, List<TupleAdaptedPredicate<String, ?>>> right = getFunctionsFromFilter(and.right());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                final List<TupleAdaptedPredicate<String, ?>> concatFilters = new ArrayList<>();
                if (null != left.get(group)) {
                    concatFilters.addAll(left.get(group));
                }
                if (null != right.get(group)) {
                    concatFilters.addAll(right.get(group));
                }
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).addAll(concatFilters);
            }
        }
        LOGGER.debug("Converted {} to list of filters ({})", filter, StringUtils.join(map.entrySet(), ','));
    }
    return map;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) EqualNullSafe(org.apache.spark.sql.sources.EqualNullSafe) LessThanOrEqual(org.apache.spark.sql.sources.LessThanOrEqual) HashMap(java.util.HashMap) In(org.apache.spark.sql.sources.In) IsIn(uk.gov.gchq.koryphe.impl.predicate.IsIn) ArrayList(java.util.ArrayList) GreaterThanOrEqual(org.apache.spark.sql.sources.GreaterThanOrEqual) TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) Predicate(java.util.function.Predicate) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) LessThan(org.apache.spark.sql.sources.LessThan) IsNotNull(org.apache.spark.sql.sources.IsNotNull) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) GreaterThan(org.apache.spark.sql.sources.GreaterThan) ArrayList(java.util.ArrayList) List(java.util.List) TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) IsIn(uk.gov.gchq.koryphe.impl.predicate.IsIn) EqualTo(org.apache.spark.sql.sources.EqualTo) IsEqual(uk.gov.gchq.koryphe.impl.predicate.IsEqual) Exists(uk.gov.gchq.koryphe.impl.predicate.Exists) And(org.apache.spark.sql.sources.And) IsNull(org.apache.spark.sql.sources.IsNull) IsMoreThan(uk.gov.gchq.koryphe.impl.predicate.IsMoreThan) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ArrayList (java.util.ArrayList)2 Predicate (java.util.function.Predicate)2 And (org.apache.spark.sql.sources.And)2 GreaterThan (org.apache.spark.sql.sources.GreaterThan)2 LessThan (org.apache.spark.sql.sources.LessThan)2 IsLessThan (uk.gov.gchq.koryphe.impl.predicate.IsLessThan)2 IsMoreThan (uk.gov.gchq.koryphe.impl.predicate.IsMoreThan)2 TupleAdaptedPredicate (uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate)2 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 SparkSession (org.apache.spark.sql.SparkSession)1 EqualNullSafe (org.apache.spark.sql.sources.EqualNullSafe)1 EqualTo (org.apache.spark.sql.sources.EqualTo)1 Filter (org.apache.spark.sql.sources.Filter)1 GreaterThanOrEqual (org.apache.spark.sql.sources.GreaterThanOrEqual)1 In (org.apache.spark.sql.sources.In)1 IsNotNull (org.apache.spark.sql.sources.IsNotNull)1