Search in sources :

Example 1 with IsNotNull

use of org.apache.spark.sql.sources.IsNotNull in project Gaffer by gchq.

the class FiltersToOperationConverter method getFunctionsFromFilter.

/**
     * Converts a Spark {@link Filter} to a map from group to a list of Gaffer {@link ConsumerFunctionContext}s.
     * <p>
     * Note that Spark also applies all the filters provided to the <code>buildScan(String[], Filter[])</code> method
     * so not implementing some of the provided {@link Filter}s in Gaffer will not cause errors. However, as many as
     * possible should be implemented so that as much filtering as possible happens in iterators running in Accumulo's
     * tablet servers (this avoids unnecessary data transfer from Accumulo to Spark).
     *
     * @param filter The {@link Filter} to transform.
     * @return A map from {@link String} to {@link ConsumerFunctionContext}s implementing the provided {@link Filter}.
     */
private Map<String, List<ConsumerFunctionContext<String, FilterFunction>>> getFunctionsFromFilter(final Filter filter) {
    final Map<String, List<ConsumerFunctionContext<String, FilterFunction>>> map = new HashMap<>();
    if (filter instanceof EqualTo) {
    // Not dealt with as requires a FilterFunction that returns null if either the controlValue or the
    // test value is null - the API of FilterFunction doesn't permit this.
    } else if (filter instanceof EqualNullSafe) {
        final EqualNullSafe equalNullSafe = (EqualNullSafe) filter;
        final FilterFunction isEqual = new IsEqual(equalNullSafe.value());
        final List<String> properties = Collections.singletonList(equalNullSafe.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isEqual, properties));
            }
        }
        LOGGER.debug("Converted {} to IsEqual ({})", filter, properties.get(0));
    } else if (filter instanceof GreaterThan) {
        final GreaterThan greaterThan = (GreaterThan) filter;
        final FilterFunction isMoreThan = new IsMoreThan((Comparable<?>) greaterThan.value(), false);
        final List<String> properties = Collections.singletonList(greaterThan.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isMoreThan, properties));
            }
        }
        LOGGER.debug("Converted {} to isMoreThan ({})", filter, properties.get(0));
    } else if (filter instanceof GreaterThanOrEqual) {
        final GreaterThanOrEqual greaterThan = (GreaterThanOrEqual) filter;
        final FilterFunction isMoreThan = new IsMoreThan((Comparable<?>) greaterThan.value(), true);
        final List<String> properties = Collections.singletonList(greaterThan.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isMoreThan, properties));
            }
        }
        LOGGER.debug("Converted {} to IsMoreThan ({})", filter, properties.get(0));
    } else if (filter instanceof LessThan) {
        final LessThan lessThan = (LessThan) filter;
        final FilterFunction isLessThan = new IsLessThan((Comparable<?>) lessThan.value(), false);
        final List<String> properties = Collections.singletonList(lessThan.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isLessThan, properties));
            }
        }
        LOGGER.debug("Converted {} to IsLessThan ({})", filter, properties.get(0));
    } else if (filter instanceof LessThanOrEqual) {
        final LessThanOrEqual lessThan = (LessThanOrEqual) filter;
        final FilterFunction isLessThan = new IsLessThan((Comparable<?>) lessThan.value(), true);
        final List<String> properties = Collections.singletonList(lessThan.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isLessThan, properties));
            }
        }
        LOGGER.debug("Converted {} to LessThanOrEqual ({})", filter, properties.get(0));
    } else if (filter instanceof In) {
        final In in = (In) filter;
        final FilterFunction isIn = new IsIn(new HashSet<>(Arrays.asList(in.values())));
        final List<String> properties = Collections.singletonList(in.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isIn, properties));
            }
        }
        LOGGER.debug("Converted {} to IsIn ({})", filter, properties.get(0));
    } else if (filter instanceof IsNull) {
        final IsNull isNull = (IsNull) filter;
        final FilterFunction doesntExist = new Not(new Exists());
        final List<String> properties = Collections.singletonList(isNull.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(doesntExist, properties));
            }
        }
        LOGGER.debug("Converted {} to Not(Exists) ({})", filter, properties.get(0));
    } else if (filter instanceof IsNotNull) {
        final IsNotNull isNotNull = (IsNotNull) filter;
        final FilterFunction exists = new Exists();
        final List<String> properties = Collections.singletonList(isNotNull.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(exists, properties));
            }
        }
        LOGGER.debug("Converted {} to Exists ({})", filter, properties.get(0));
    } else if (filter instanceof And) {
        final And and = (And) filter;
        final Map<String, List<ConsumerFunctionContext<String, FilterFunction>>> left = getFunctionsFromFilter(and.left());
        final Map<String, List<ConsumerFunctionContext<String, FilterFunction>>> right = getFunctionsFromFilter(and.right());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                final List<ConsumerFunctionContext<String, FilterFunction>> concatFilters = new ArrayList<>();
                if (left.get(group) != null) {
                    concatFilters.addAll(left.get(group));
                }
                if (right.get(group) != null) {
                    concatFilters.addAll(right.get(group));
                }
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).addAll(concatFilters);
            }
        }
        LOGGER.debug("Converted {} to list of filters ({})", filter, StringUtils.join(map.entrySet(), ','));
    }
    return map;
}
Also used : FilterFunction(uk.gov.gchq.gaffer.function.FilterFunction) HashSet(java.util.HashSet) Set(java.util.Set) EqualNullSafe(org.apache.spark.sql.sources.EqualNullSafe) LessThanOrEqual(org.apache.spark.sql.sources.LessThanOrEqual) HashMap(java.util.HashMap) IsIn(uk.gov.gchq.gaffer.function.filter.IsIn) In(org.apache.spark.sql.sources.In) ArrayList(java.util.ArrayList) GreaterThanOrEqual(org.apache.spark.sql.sources.GreaterThanOrEqual) LessThan(org.apache.spark.sql.sources.LessThan) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) IsNotNull(org.apache.spark.sql.sources.IsNotNull) ConsumerFunctionContext(uk.gov.gchq.gaffer.function.context.ConsumerFunctionContext) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) GreaterThan(org.apache.spark.sql.sources.GreaterThan) ArrayList(java.util.ArrayList) List(java.util.List) IsIn(uk.gov.gchq.gaffer.function.filter.IsIn) EqualTo(org.apache.spark.sql.sources.EqualTo) IsEqual(uk.gov.gchq.gaffer.function.filter.IsEqual) Not(uk.gov.gchq.gaffer.function.filter.Not) Exists(uk.gov.gchq.gaffer.function.filter.Exists) And(org.apache.spark.sql.sources.And) IsNull(org.apache.spark.sql.sources.IsNull) IsMoreThan(uk.gov.gchq.gaffer.function.filter.IsMoreThan) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 And (org.apache.spark.sql.sources.And)1 EqualNullSafe (org.apache.spark.sql.sources.EqualNullSafe)1 EqualTo (org.apache.spark.sql.sources.EqualTo)1 GreaterThan (org.apache.spark.sql.sources.GreaterThan)1 GreaterThanOrEqual (org.apache.spark.sql.sources.GreaterThanOrEqual)1 In (org.apache.spark.sql.sources.In)1 IsNotNull (org.apache.spark.sql.sources.IsNotNull)1 IsNull (org.apache.spark.sql.sources.IsNull)1 LessThan (org.apache.spark.sql.sources.LessThan)1 LessThanOrEqual (org.apache.spark.sql.sources.LessThanOrEqual)1 FilterFunction (uk.gov.gchq.gaffer.function.FilterFunction)1 ConsumerFunctionContext (uk.gov.gchq.gaffer.function.context.ConsumerFunctionContext)1 Exists (uk.gov.gchq.gaffer.function.filter.Exists)1 IsEqual (uk.gov.gchq.gaffer.function.filter.IsEqual)1