Search in sources :

Example 6 with TupleAdaptedPredicate

use of uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate in project Gaffer by gchq.

the class FiltersToOperationConverter method getFunctionsFromFilter.

/**
 * Converts a Spark {@link Filter} to a map from group to a list of Gaffer {@link TupleAdaptedPredicate}s.
 * <p>
 * Note that Spark also applies all the filters provided to the {@code buildScan(String[], Filter[])} method
 * so not implementing some of the provided {@link Filter}s in Gaffer will not cause errors. However, as many as
 * possible should be implemented so that as much filtering as possible happens in iterators running in Accumulo's
 * tablet servers (this avoids unnecessary data transfer from Accumulo to Spark).
 *
 * @param filter The {@link Filter} to transform.
 * @return A map from {@link String} to {@link TupleAdaptedPredicate}s implementing the provided {@link Filter}.
 */
private Map<String, List<TupleAdaptedPredicate<String, ?>>> getFunctionsFromFilter(final Filter filter) {
    final Map<String, List<TupleAdaptedPredicate<String, ?>>> map = new HashMap<>();
    if (filter instanceof EqualTo) {
    // Not dealt with as requires a Predicate<?> that returns null if either the controlValue or the
    // test value is null - the API of Predicate<?> doesn't permit this.
    } else if (filter instanceof EqualNullSafe) {
        final EqualNullSafe equalNullSafe = (EqualNullSafe) filter;
        final Predicate<?> isEqual = new IsEqual(equalNullSafe.value());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(isEqual, new String[] { equalNullSafe.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to IsEqual ({})", filter, equalNullSafe.attribute());
    } else if (filter instanceof GreaterThan) {
        final GreaterThan greaterThan = (GreaterThan) filter;
        final Predicate<?> isMoreThan = new IsMoreThan((Comparable<?>) greaterThan.value(), false);
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(isMoreThan, new String[] { greaterThan.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to isMoreThan ({})", filter, greaterThan.attribute());
    } else if (filter instanceof GreaterThanOrEqual) {
        final GreaterThanOrEqual greaterThan = (GreaterThanOrEqual) filter;
        final Predicate<?> isMoreThan = new IsMoreThan((Comparable<?>) greaterThan.value(), true);
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(isMoreThan, new String[] { greaterThan.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to IsMoreThan ({})", filter, greaterThan.attribute());
    } else if (filter instanceof LessThan) {
        final LessThan lessThan = (LessThan) filter;
        final Predicate<?> isLessThan = new IsLessThan((Comparable<?>) lessThan.value(), false);
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(isLessThan, new String[] { lessThan.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to IsLessThan ({})", filter, lessThan.attribute());
    } else if (filter instanceof LessThanOrEqual) {
        final LessThanOrEqual lessThan = (LessThanOrEqual) filter;
        final Predicate<?> isLessThan = new IsLessThan((Comparable<?>) lessThan.value(), true);
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(isLessThan, new String[] { lessThan.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to LessThanOrEqual ({})", filter, lessThan.attribute());
    } else if (filter instanceof In) {
        final In in = (In) filter;
        final Predicate<?> isIn = new IsIn(new HashSet<>(Arrays.asList(in.values())));
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(isIn, new String[] { in.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to IsIn ({})", filter, in.attribute());
    } else if (filter instanceof IsNull) {
        final IsNull isNull = (IsNull) filter;
        final Predicate<?> doesntExist = new Not<>(new Exists());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(doesntExist, new String[] { isNull.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to Not(Exists) ({})", filter, isNull.attribute());
    } else if (filter instanceof IsNotNull) {
        final IsNotNull isNotNull = (IsNotNull) filter;
        final Predicate<?> exists = new Exists();
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).add(new TupleAdaptedPredicate<>(exists, new String[] { isNotNull.attribute() }));
            }
        }
        LOGGER.debug("Converted {} to Exists ({})", filter, isNotNull.attribute());
    } else if (filter instanceof And) {
        final And and = (And) filter;
        final Map<String, List<TupleAdaptedPredicate<String, ?>>> left = getFunctionsFromFilter(and.left());
        final Map<String, List<TupleAdaptedPredicate<String, ?>>> right = getFunctionsFromFilter(and.right());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (null != relevantGroups) {
            for (final String group : relevantGroups) {
                final List<TupleAdaptedPredicate<String, ?>> concatFilters = new ArrayList<>();
                if (null != left.get(group)) {
                    concatFilters.addAll(left.get(group));
                }
                if (null != right.get(group)) {
                    concatFilters.addAll(right.get(group));
                }
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<>());
                }
                map.get(group).addAll(concatFilters);
            }
        }
        LOGGER.debug("Converted {} to list of filters ({})", filter, StringUtils.join(map.entrySet(), ','));
    }
    return map;
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) EqualNullSafe(org.apache.spark.sql.sources.EqualNullSafe) LessThanOrEqual(org.apache.spark.sql.sources.LessThanOrEqual) HashMap(java.util.HashMap) In(org.apache.spark.sql.sources.In) IsIn(uk.gov.gchq.koryphe.impl.predicate.IsIn) ArrayList(java.util.ArrayList) GreaterThanOrEqual(org.apache.spark.sql.sources.GreaterThanOrEqual) TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) Predicate(java.util.function.Predicate) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) LessThan(org.apache.spark.sql.sources.LessThan) IsNotNull(org.apache.spark.sql.sources.IsNotNull) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) GreaterThan(org.apache.spark.sql.sources.GreaterThan) ArrayList(java.util.ArrayList) List(java.util.List) TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) IsIn(uk.gov.gchq.koryphe.impl.predicate.IsIn) EqualTo(org.apache.spark.sql.sources.EqualTo) IsEqual(uk.gov.gchq.koryphe.impl.predicate.IsEqual) Exists(uk.gov.gchq.koryphe.impl.predicate.Exists) And(org.apache.spark.sql.sources.And) IsNull(org.apache.spark.sql.sources.IsNull) IsMoreThan(uk.gov.gchq.koryphe.impl.predicate.IsMoreThan) HashMap(java.util.HashMap) Map(java.util.Map)

Example 7 with TupleAdaptedPredicate

use of uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate in project Gaffer by gchq.

the class QueryGenerator method getPredicateFromView.

// TODO raise issue saying that could optimise so that only the filters that have not been fully applied
// are reapplied, and it should be able to return the fact that all filters have been applied
// Either the result is:
// - a filter and that fully applies the view
// - a filter and that doesn't fully apply the view
// - no filter and that doesn't fully apply the view
// - no filter and that fully applies the view
// Boolean indicates whether logic was fully applied
private Pair<FilterPredicate, Boolean> getPredicateFromView(final View view, final String group, final boolean isEntityGroup) throws SerialisationException, OperationException {
    if (null == view) {
        return new Pair<>(null, true);
    }
    final ViewElementDefinition ved = view.getElement(group);
    FilterPredicate filterPredicate = null;
    boolean fullyAppliedInAll = true;
    if (null != ved) {
        List<TupleAdaptedPredicate<String, ?>> preAggFilterFunctions = ved.getPreAggregationFilterFunctions();
        if (null != preAggFilterFunctions) {
            for (final TupleAdaptedPredicate<String, ?> filterFunctionContext : preAggFilterFunctions) {
                final JavaPredicateToParquetPredicate predicateConverter = new JavaPredicateToParquetPredicate(schemaUtils, filterFunctionContext.getPredicate(), filterFunctionContext.getSelection(), group);
                filterPredicate = FilterPredicateUtils.and(filterPredicate, predicateConverter.getParquetPredicate());
                if (!predicateConverter.isFullyApplied()) {
                    fullyAppliedInAll = false;
                }
            }
        }
    }
    return new Pair<>(filterPredicate, fullyAppliedInAll);
}
Also used : TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) ViewElementDefinition(uk.gov.gchq.gaffer.data.elementdefinition.view.ViewElementDefinition) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) Pair(uk.gov.gchq.gaffer.commonutil.pair.Pair)

Example 8 with TupleAdaptedPredicate

use of uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate in project Gaffer by gchq.

the class JavaPredicateToParquetPredicate method getOrFilter.

public FilterPredicate getOrFilter(final List<Predicate> predicateList, final String[] selection, final String group, final SchemaUtils schemaUtils) throws SerialisationException {
    FilterPredicate combinedFilter = null;
    for (final Predicate predicate : predicateList) {
        final Predicate filterFunction;
        final String[] newSelection;
        if (predicate instanceof TupleAdaptedPredicate) {
            filterFunction = ((TupleAdaptedPredicate) predicate).getPredicate();
            // Build new selections
            final Integer[] ints = (Integer[]) ((TupleAdaptedPredicate) predicate).getSelection();
            newSelection = new String[ints.length];
            for (int x = 0; x < ints.length; x++) {
                newSelection[x] = selection[ints[x]];
            }
        } else {
            filterFunction = predicate;
            newSelection = selection;
        }
        final JavaPredicateToParquetPredicate predicateConverter = new JavaPredicateToParquetPredicate(schemaUtils, filterFunction, newSelection, group);
        final FilterPredicate parquetPredicate = predicateConverter.getParquetPredicate();
        if (!predicateConverter.fullyApplied) {
            fullyApplied = false;
        }
        combinedFilter = FilterPredicateUtils.or(combinedFilter, parquetPredicate);
    }
    return combinedFilter;
}
Also used : TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) Predicate(java.util.function.Predicate)

Example 9 with TupleAdaptedPredicate

use of uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate in project Gaffer by gchq.

the class JavaPredicateToParquetPredicate method getAndFilter.

public FilterPredicate getAndFilter(final List<Predicate> predicateList, final String[] selection, final String group, final SchemaUtils schemaUtils) throws SerialisationException {
    FilterPredicate combinedFilter = null;
    for (final Predicate predicate : predicateList) {
        final Predicate filterFunction;
        final String[] newSelection;
        if (predicate instanceof TupleAdaptedPredicate) {
            filterFunction = ((TupleAdaptedPredicate) predicate).getPredicate();
            // Build new selections
            final Integer[] ints = (Integer[]) ((TupleAdaptedPredicate) predicate).getSelection();
            newSelection = new String[ints.length];
            for (int x = 0; x < ints.length; x++) {
                newSelection[x] = selection[ints[x]];
            }
        } else {
            filterFunction = predicate;
            newSelection = selection;
        }
        final JavaPredicateToParquetPredicate predicateConverter = new JavaPredicateToParquetPredicate(schemaUtils, filterFunction, newSelection, group);
        final FilterPredicate parquetPredicate = predicateConverter.getParquetPredicate();
        if (!predicateConverter.fullyApplied) {
            fullyApplied = false;
        }
        combinedFilter = FilterPredicateUtils.and(combinedFilter, parquetPredicate);
    }
    return combinedFilter;
}
Also used : TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) FilterPredicate(org.apache.parquet.filter2.predicate.FilterPredicate) TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) Predicate(java.util.function.Predicate)

Example 10 with TupleAdaptedPredicate

use of uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate in project Gaffer by gchq.

the class ViewElementDefinitionTest method shouldJsonSerialiseAndDeserialise.

@Test
public void shouldJsonSerialiseAndDeserialise() throws SerialisationException {
    // Given
    final ViewElementDefinition elementDef = new ViewElementDefinition.Builder().transientProperty(TestPropertyNames.PROP_1, String.class).transientProperty(TestPropertyNames.PROP_2, String.class).properties(TestPropertyNames.COUNT, TestPropertyNames.DATE).preAggregationFilter(new ElementFilter.Builder().select(TestPropertyNames.COUNT).execute(new IsMoreThan(5)).build()).aggregator(new ElementAggregator.Builder().select(TestPropertyNames.COUNT).execute(new Max()).build()).postAggregationFilter(new ElementFilter.Builder().select(TestPropertyNames.COUNT).execute(new IsLessThan(10)).build()).transformer(new ElementTransformer.Builder().select(TestPropertyNames.COUNT).execute(new TestTransform()).project(TestPropertyNames.PROP_1).build()).postTransformFilter(new ElementFilter.Builder().select(TestPropertyNames.PROP_1).execute(new IsEqual("9")).build()).build();
    // When
    final byte[] json = JSONSerialiser.serialise(elementDef, true);
    final ViewElementDefinition deserialisedElementDef = JSONSerialiser.deserialise(json, ViewElementDefinition.class);
    assertEquals(Sets.newHashSet(TestPropertyNames.COUNT, TestPropertyNames.DATE), deserialisedElementDef.getProperties());
    assertNull(deserialisedElementDef.getExcludeProperties());
    final List<TupleAdaptedPredicate<String, ?>> preFilterComponents = deserialisedElementDef.getPreAggregationFilter().getComponents();
    assertThat(preFilterComponents).hasSize(1);
    assertArrayEquals(new String[] { TestPropertyNames.COUNT }, preFilterComponents.get(0).getSelection());
    assertEquals(new IsMoreThan(5), preFilterComponents.get(0).getPredicate());
    final List<TupleAdaptedBinaryOperator<String, ?>> aggComponents = deserialisedElementDef.getAggregator().getComponents();
    assertThat(aggComponents).hasSize(1);
    assertArrayEquals(new String[] { TestPropertyNames.COUNT }, aggComponents.get(0).getSelection());
    assertEquals(new Max(), aggComponents.get(0).getBinaryOperator());
    final List<TupleAdaptedPredicate<String, ?>> postFilterComponents = deserialisedElementDef.getPostAggregationFilter().getComponents();
    assertThat(postFilterComponents).hasSize(1);
    assertArrayEquals(new String[] { TestPropertyNames.COUNT }, postFilterComponents.get(0).getSelection());
    assertEquals(new IsLessThan(10), postFilterComponents.get(0).getPredicate());
    final List<TupleAdaptedFunction<String, ?, ?>> transformComponents = deserialisedElementDef.getTransformer().getComponents();
    assertThat(transformComponents).hasSize(1);
    assertArrayEquals(new String[] { TestPropertyNames.COUNT }, transformComponents.get(0).getSelection());
    assertEquals(new TestTransform(), transformComponents.get(0).getFunction());
    assertArrayEquals(new String[] { TestPropertyNames.PROP_1 }, transformComponents.get(0).getProjection());
    final List<TupleAdaptedPredicate<String, ?>> postTransformFilterComponents = deserialisedElementDef.getPostTransformFilter().getComponents();
    assertThat(postTransformFilterComponents).hasSize(1);
    assertArrayEquals(new String[] { TestPropertyNames.PROP_1 }, postTransformFilterComponents.get(0).getSelection());
    assertEquals(new IsEqual("9"), postTransformFilterComponents.get(0).getPredicate());
}
Also used : TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) Max(uk.gov.gchq.koryphe.impl.binaryoperator.Max) ElementTransformer(uk.gov.gchq.gaffer.data.element.function.ElementTransformer) TupleAdaptedFunction(uk.gov.gchq.koryphe.tuple.function.TupleAdaptedFunction) IsEqual(uk.gov.gchq.koryphe.impl.predicate.IsEqual) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) ElementFilter(uk.gov.gchq.gaffer.data.element.function.ElementFilter) TupleAdaptedBinaryOperator(uk.gov.gchq.koryphe.tuple.binaryoperator.TupleAdaptedBinaryOperator) IsMoreThan(uk.gov.gchq.koryphe.impl.predicate.IsMoreThan) Test(org.junit.jupiter.api.Test)

Aggregations

TupleAdaptedPredicate (uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate)13 Test (org.junit.jupiter.api.Test)8 Predicate (java.util.function.Predicate)7 IsMoreThan (uk.gov.gchq.koryphe.impl.predicate.IsMoreThan)7 ArrayList (java.util.ArrayList)6 IsLessThan (uk.gov.gchq.koryphe.impl.predicate.IsLessThan)6 GreaterThan (org.apache.spark.sql.sources.GreaterThan)5 LessThan (org.apache.spark.sql.sources.LessThan)5 View (uk.gov.gchq.gaffer.data.elementdefinition.view.View)5 SparkSession (org.apache.spark.sql.SparkSession)4 Filter (org.apache.spark.sql.sources.Filter)4 Operation (uk.gov.gchq.gaffer.operation.Operation)4 GraphFilters (uk.gov.gchq.gaffer.operation.graph.GraphFilters)4 Schema (uk.gov.gchq.gaffer.store.schema.Schema)4 HashSet (java.util.HashSet)3 FilterPredicate (org.apache.parquet.filter2.predicate.FilterPredicate)3 EqualTo (org.apache.spark.sql.sources.EqualTo)3 And (org.apache.spark.sql.sources.And)2 ElementFilter (uk.gov.gchq.gaffer.data.element.function.ElementFilter)2 EntityId (uk.gov.gchq.gaffer.data.element.id.EntityId)2