Search in sources :

Example 1 with In

use of org.apache.spark.sql.sources.In in project Gaffer by gchq.

the class FiltersToOperationConverter method getFunctionsFromFilter.

/**
     * Converts a Spark {@link Filter} to a map from group to a list of Gaffer {@link ConsumerFunctionContext}s.
     * <p>
     * Note that Spark also applies all the filters provided to the <code>buildScan(String[], Filter[])</code> method
     * so not implementing some of the provided {@link Filter}s in Gaffer will not cause errors. However, as many as
     * possible should be implemented so that as much filtering as possible happens in iterators running in Accumulo's
     * tablet servers (this avoids unnecessary data transfer from Accumulo to Spark).
     *
     * @param filter The {@link Filter} to transform.
     * @return A map from {@link String} to {@link ConsumerFunctionContext}s implementing the provided {@link Filter}.
     */
private Map<String, List<ConsumerFunctionContext<String, FilterFunction>>> getFunctionsFromFilter(final Filter filter) {
    final Map<String, List<ConsumerFunctionContext<String, FilterFunction>>> map = new HashMap<>();
    if (filter instanceof EqualTo) {
    // Not dealt with as requires a FilterFunction that returns null if either the controlValue or the
    // test value is null - the API of FilterFunction doesn't permit this.
    } else if (filter instanceof EqualNullSafe) {
        final EqualNullSafe equalNullSafe = (EqualNullSafe) filter;
        final FilterFunction isEqual = new IsEqual(equalNullSafe.value());
        final List<String> properties = Collections.singletonList(equalNullSafe.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isEqual, properties));
            }
        }
        LOGGER.debug("Converted {} to IsEqual ({})", filter, properties.get(0));
    } else if (filter instanceof GreaterThan) {
        final GreaterThan greaterThan = (GreaterThan) filter;
        final FilterFunction isMoreThan = new IsMoreThan((Comparable<?>) greaterThan.value(), false);
        final List<String> properties = Collections.singletonList(greaterThan.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isMoreThan, properties));
            }
        }
        LOGGER.debug("Converted {} to isMoreThan ({})", filter, properties.get(0));
    } else if (filter instanceof GreaterThanOrEqual) {
        final GreaterThanOrEqual greaterThan = (GreaterThanOrEqual) filter;
        final FilterFunction isMoreThan = new IsMoreThan((Comparable<?>) greaterThan.value(), true);
        final List<String> properties = Collections.singletonList(greaterThan.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isMoreThan, properties));
            }
        }
        LOGGER.debug("Converted {} to IsMoreThan ({})", filter, properties.get(0));
    } else if (filter instanceof LessThan) {
        final LessThan lessThan = (LessThan) filter;
        final FilterFunction isLessThan = new IsLessThan((Comparable<?>) lessThan.value(), false);
        final List<String> properties = Collections.singletonList(lessThan.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isLessThan, properties));
            }
        }
        LOGGER.debug("Converted {} to IsLessThan ({})", filter, properties.get(0));
    } else if (filter instanceof LessThanOrEqual) {
        final LessThanOrEqual lessThan = (LessThanOrEqual) filter;
        final FilterFunction isLessThan = new IsLessThan((Comparable<?>) lessThan.value(), true);
        final List<String> properties = Collections.singletonList(lessThan.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isLessThan, properties));
            }
        }
        LOGGER.debug("Converted {} to LessThanOrEqual ({})", filter, properties.get(0));
    } else if (filter instanceof In) {
        final In in = (In) filter;
        final FilterFunction isIn = new IsIn(new HashSet<>(Arrays.asList(in.values())));
        final List<String> properties = Collections.singletonList(in.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isIn, properties));
            }
        }
        LOGGER.debug("Converted {} to IsIn ({})", filter, properties.get(0));
    } else if (filter instanceof IsNull) {
        final IsNull isNull = (IsNull) filter;
        final FilterFunction doesntExist = new Not(new Exists());
        final List<String> properties = Collections.singletonList(isNull.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(doesntExist, properties));
            }
        }
        LOGGER.debug("Converted {} to Not(Exists) ({})", filter, properties.get(0));
    } else if (filter instanceof IsNotNull) {
        final IsNotNull isNotNull = (IsNotNull) filter;
        final FilterFunction exists = new Exists();
        final List<String> properties = Collections.singletonList(isNotNull.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(exists, properties));
            }
        }
        LOGGER.debug("Converted {} to Exists ({})", filter, properties.get(0));
    } else if (filter instanceof And) {
        final And and = (And) filter;
        final Map<String, List<ConsumerFunctionContext<String, FilterFunction>>> left = getFunctionsFromFilter(and.left());
        final Map<String, List<ConsumerFunctionContext<String, FilterFunction>>> right = getFunctionsFromFilter(and.right());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                final List<ConsumerFunctionContext<String, FilterFunction>> concatFilters = new ArrayList<>();
                if (left.get(group) != null) {
                    concatFilters.addAll(left.get(group));
                }
                if (right.get(group) != null) {
                    concatFilters.addAll(right.get(group));
                }
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).addAll(concatFilters);
            }
        }
        LOGGER.debug("Converted {} to list of filters ({})", filter, StringUtils.join(map.entrySet(), ','));
    }
    return map;
}
Also used : FilterFunction(uk.gov.gchq.gaffer.function.FilterFunction) HashSet(java.util.HashSet) Set(java.util.Set) EqualNullSafe(org.apache.spark.sql.sources.EqualNullSafe) LessThanOrEqual(org.apache.spark.sql.sources.LessThanOrEqual) HashMap(java.util.HashMap) IsIn(uk.gov.gchq.gaffer.function.filter.IsIn) In(org.apache.spark.sql.sources.In) ArrayList(java.util.ArrayList) GreaterThanOrEqual(org.apache.spark.sql.sources.GreaterThanOrEqual) LessThan(org.apache.spark.sql.sources.LessThan) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) IsNotNull(org.apache.spark.sql.sources.IsNotNull) ConsumerFunctionContext(uk.gov.gchq.gaffer.function.context.ConsumerFunctionContext) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) GreaterThan(org.apache.spark.sql.sources.GreaterThan) ArrayList(java.util.ArrayList) List(java.util.List) IsIn(uk.gov.gchq.gaffer.function.filter.IsIn) EqualTo(org.apache.spark.sql.sources.EqualTo) IsEqual(uk.gov.gchq.gaffer.function.filter.IsEqual) Not(uk.gov.gchq.gaffer.function.filter.Not) Exists(uk.gov.gchq.gaffer.function.filter.Exists) And(org.apache.spark.sql.sources.And) IsNull(org.apache.spark.sql.sources.IsNull) IsMoreThan(uk.gov.gchq.gaffer.function.filter.IsMoreThan) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with In

use of org.apache.spark.sql.sources.In in project Gaffer by gchq.

the class FiltersToOperationConverter method checkForGroups.

/**
     * Returns the set of all groups in the filter, if the filter specifies that the group must be equal to a certain
     * value.
     *
     * @param filter The {@link Filter} that will be checked for groups.
     * @return A set of strings containing the required groups, <code>null</code> if no groups are specified in the
     * filter.
     */
private Set<String> checkForGroups(final Filter filter) {
    if (filter instanceof EqualTo) {
        final EqualTo equalTo = (EqualTo) filter;
        if (equalTo.attribute().equals(SchemaToStructTypeConverter.GROUP)) {
            LOGGER.info("Filter {} specifies that {} should be {}", filter, SchemaToStructTypeConverter.GROUP, equalTo.value());
            return Collections.singleton((String) equalTo.value());
        }
    } else if (filter instanceof Or) {
        final Or or = (Or) filter;
        if (or.left() instanceof EqualTo && or.right() instanceof EqualTo && ((EqualTo) or.left()).attribute().equals(SchemaToStructTypeConverter.GROUP) && ((EqualTo) or.right()).attribute().equals(SchemaToStructTypeConverter.GROUP)) {
            final Set<String> groups = new HashSet<>();
            groups.add((String) ((EqualTo) or.left()).value());
            groups.add((String) ((EqualTo) or.right()).value());
            LOGGER.info("Filter {} specifies that {} should be {} or {}", filter, SchemaToStructTypeConverter.GROUP, ((EqualTo) or.left()).value(), ((EqualTo) or.right()).value());
            return groups;
        }
    } else if (filter instanceof In) {
        final In in = (In) filter;
        if (in.attribute().equals(SchemaToStructTypeConverter.GROUP)) {
            final Set<String> groups = new HashSet<>();
            for (final Object o : in.values()) {
                groups.add((String) o);
            }
            LOGGER.info("Filter {} specifies that {} should be in {}", filter, SchemaToStructTypeConverter.GROUP, StringUtils.join(in.values(), ','));
            return groups;
        }
    }
    return null;
}
Also used : Or(org.apache.spark.sql.sources.Or) HashSet(java.util.HashSet) Set(java.util.Set) IsIn(uk.gov.gchq.gaffer.function.filter.IsIn) In(org.apache.spark.sql.sources.In) EqualTo(org.apache.spark.sql.sources.EqualTo) HashSet(java.util.HashSet)

Aggregations

HashSet (java.util.HashSet)2 Set (java.util.Set)2 EqualTo (org.apache.spark.sql.sources.EqualTo)2 In (org.apache.spark.sql.sources.In)2 IsIn (uk.gov.gchq.gaffer.function.filter.IsIn)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 And (org.apache.spark.sql.sources.And)1 EqualNullSafe (org.apache.spark.sql.sources.EqualNullSafe)1 GreaterThan (org.apache.spark.sql.sources.GreaterThan)1 GreaterThanOrEqual (org.apache.spark.sql.sources.GreaterThanOrEqual)1 IsNotNull (org.apache.spark.sql.sources.IsNotNull)1 IsNull (org.apache.spark.sql.sources.IsNull)1 LessThan (org.apache.spark.sql.sources.LessThan)1 LessThanOrEqual (org.apache.spark.sql.sources.LessThanOrEqual)1 Or (org.apache.spark.sql.sources.Or)1 FilterFunction (uk.gov.gchq.gaffer.function.FilterFunction)1 ConsumerFunctionContext (uk.gov.gchq.gaffer.function.context.ConsumerFunctionContext)1