Search in sources :

Example 1 with Filter

use of org.apache.spark.sql.sources.Filter in project Gaffer by gchq.

the class FiltersToOperationConverter method applyPropertyFilters.

private AbstractGetRDD<?> applyPropertyFilters(final View derivedView, final AbstractGetRDD<?> operation) {
    final List<Set<String>> groupsRelatedToFilters = new ArrayList<>();
    for (final Filter filter : filters) {
        final Set<String> groupsRelatedToFilter = getGroupsFromFilter(filter);
        if (groupsRelatedToFilter != null && !groupsRelatedToFilter.isEmpty()) {
            groupsRelatedToFilters.add(groupsRelatedToFilter);
        }
        LOGGER.info("Groups {} are related to filter {}", StringUtils.join(groupsRelatedToFilter, ','), filter);
    }
    LOGGER.info("Groups related to filters are: {}", StringUtils.join(groupsRelatedToFilters, ','));
    // Take the intersection of this list of groups - only these groups can be related to the query
    final Set<String> intersection = new HashSet<>(derivedView.getEntityGroups());
    intersection.addAll(derivedView.getEdgeGroups());
    for (final Set<String> groupsRelatedToFilter : groupsRelatedToFilters) {
        intersection.retainAll(groupsRelatedToFilter);
    }
    LOGGER.info("Groups that can be returned are: {}", StringUtils.join(intersection, ','));
    // Update view with filters and add to operation
    final Map<String, List<ConsumerFunctionContext<String, FilterFunction>>> groupToFunctions = new HashMap<>();
    for (final Filter filter : filters) {
        final Map<String, List<ConsumerFunctionContext<String, FilterFunction>>> map = getFunctionsFromFilter(filter);
        for (final Entry<String, List<ConsumerFunctionContext<String, FilterFunction>>> entry : map.entrySet()) {
            if (!groupToFunctions.containsKey(entry.getKey())) {
                groupToFunctions.put(entry.getKey(), new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
            }
            groupToFunctions.get(entry.getKey()).addAll(entry.getValue());
        }
    }
    LOGGER.info("The following functions will be applied for the given group:");
    for (final Entry<String, List<ConsumerFunctionContext<String, FilterFunction>>> entry : groupToFunctions.entrySet()) {
        LOGGER.info("Group = {}: ", entry.getKey());
        for (final ConsumerFunctionContext<String, FilterFunction> cfc : entry.getValue()) {
            LOGGER.info("\t{} {}", StringUtils.join(cfc.getSelection(), ','), cfc.getFunction());
        }
    }
    boolean updated = false;
    View.Builder builder = new View.Builder();
    for (final String group : derivedView.getEntityGroups()) {
        if (intersection.contains(group)) {
            if (groupToFunctions.get(group) != null) {
                final ViewElementDefinition ved = new ViewElementDefinition.Builder().merge(derivedView.getEntity(group)).postAggregationFilterFunctions(groupToFunctions.get(group)).build();
                LOGGER.info("Adding the following filter functions to the view for group {}:", group);
                for (final ConsumerFunctionContext<String, FilterFunction> cfc : groupToFunctions.get(group)) {
                    LOGGER.info("\t{} {}", StringUtils.join(cfc.getSelection(), ','), cfc.getFunction());
                }
                builder = builder.entity(group, ved);
                updated = true;
            } else {
                LOGGER.info("Not adding any filter functions to the view for group {}", group);
            }
        }
    }
    for (final String group : derivedView.getEdgeGroups()) {
        if (intersection.contains(group)) {
            if (groupToFunctions.get(group) != null) {
                final ViewElementDefinition ved = new ViewElementDefinition.Builder().merge(derivedView.getEdge(group)).postAggregationFilterFunctions(groupToFunctions.get(group)).build();
                LOGGER.info("Adding the following filter functions to the view for group {}:", group);
                for (final ConsumerFunctionContext<String, FilterFunction> cfc : groupToFunctions.get(group)) {
                    LOGGER.info("\t{} {}", StringUtils.join(cfc.getSelection(), ','), cfc.getFunction());
                }
                builder = builder.edge(group, ved);
                updated = true;
            } else {
                LOGGER.info("Not adding any filter functions to the view for group {}", group);
            }
        }
    }
    if (updated) {
        operation.setView(builder.build());
    } else {
        operation.setView(derivedView);
    }
    return operation;
}
Also used : FilterFunction(uk.gov.gchq.gaffer.function.FilterFunction) HashSet(java.util.HashSet) Set(java.util.Set) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) ViewElementDefinition(uk.gov.gchq.gaffer.data.elementdefinition.view.ViewElementDefinition) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) ConsumerFunctionContext(uk.gov.gchq.gaffer.function.context.ConsumerFunctionContext) Filter(org.apache.spark.sql.sources.Filter) ArrayList(java.util.ArrayList) List(java.util.List) HashSet(java.util.HashSet)

Example 2 with Filter

use of org.apache.spark.sql.sources.Filter in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSingleGroupNotInSchema.

@Test
public void testSingleGroupNotInSchema() {
    final Schema schema = getSchema();
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    final Filter[] filters = new Filter[1];
    filters[0] = new EqualTo(SchemaToStructTypeConverter.GROUP, "random");
    final FiltersToOperationConverter converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    final Operation operation = converter.getOperation();
    assertNull(operation);
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) Filter(org.apache.spark.sql.sources.Filter) Schema(uk.gov.gchq.gaffer.store.schema.Schema) Operation(uk.gov.gchq.gaffer.operation.Operation) EqualTo(org.apache.spark.sql.sources.EqualTo) Test(org.junit.jupiter.api.Test)

Example 3 with Filter

use of org.apache.spark.sql.sources.Filter in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSingleGroup.

@Test
public void testSingleGroup() {
    final Schema schema = getSchema();
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    final Filter[] filters = new Filter[1];
    filters[0] = new EqualTo(SchemaToStructTypeConverter.GROUP, ENTITY_GROUP);
    final FiltersToOperationConverter converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    final Operation operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfAllElements);
    assertEquals(Collections.singleton(ENTITY_GROUP), ((GraphFilters) operation).getView().getEntityGroups());
    assertEquals(0, ((GraphFilters) operation).getView().getEdgeGroups().size());
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) Filter(org.apache.spark.sql.sources.Filter) GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) Schema(uk.gov.gchq.gaffer.store.schema.Schema) GraphFilters(uk.gov.gchq.gaffer.operation.graph.GraphFilters) Operation(uk.gov.gchq.gaffer.operation.Operation) EqualTo(org.apache.spark.sql.sources.EqualTo) Test(org.junit.jupiter.api.Test)

Example 4 with Filter

use of org.apache.spark.sql.sources.Filter in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSpecifyVertexAndPropertyFilter.

@Test
public void testSpecifyVertexAndPropertyFilter() {
    final Schema schema = getSchema();
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    // Specify vertex and a filter on property1
    Filter[] filters = new Filter[2];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new EqualTo(SchemaToStructTypeConverter.VERTEX_COL_NAME, "0");
    FiltersToOperationConverter converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    Operation operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(1, ((GraphFilters) operation).getView().getEntityGroups().size());
    assertEquals(0, ((GraphFilters) operation).getView().getEdgeGroups().size());
    final Set<EntityId> seeds = new HashSet<>();
    for (final Object seed : ((GetRDDOfElements) operation).getInput()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
    View opView = ((GraphFilters) operation).getView();
    List<TupleAdaptedPredicate<String, ?>> entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertThat(entityPostAggFilters).hasSize(1);
    final ArrayList<String> expectedProperties = new ArrayList<>();
    expectedProperties.add("property1");
    assertThat(entityPostAggFilters.get(0).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection()[0]);
    final ArrayList<Predicate> expectedFunctions = new ArrayList<>();
    expectedFunctions.add(new IsMoreThan(5, false));
    assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getPredicate());
    // Specify vertex and filters on properties property1 and property4
    filters = new Filter[3];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new EqualTo(SchemaToStructTypeConverter.VERTEX_COL_NAME, "0");
    filters[2] = new LessThan("property4", 8);
    converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(1, ((GraphFilters) operation).getView().getEntityGroups().size());
    assertEquals(0, ((GraphFilters) operation).getView().getEdgeGroups().size());
    seeds.clear();
    for (final Object seed : ((GetRDDOfElements) operation).getInput()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
    opView = ((GraphFilters) operation).getView();
    entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertThat(entityPostAggFilters).hasSize(2);
    expectedProperties.clear();
    expectedProperties.add("property1");
    expectedProperties.add("property4");
    assertThat(entityPostAggFilters.get(0).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection()[0]);
    assertThat(entityPostAggFilters.get(1).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection()[0]);
    expectedFunctions.clear();
    expectedFunctions.add(new IsMoreThan(5, false));
    expectedFunctions.add(new IsLessThan(8, false));
    assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getPredicate());
    assertEquals(expectedFunctions.get(1), entityPostAggFilters.get(1).getPredicate());
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) GraphFilters(uk.gov.gchq.gaffer.operation.graph.GraphFilters) Operation(uk.gov.gchq.gaffer.operation.Operation) TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) Predicate(java.util.function.Predicate) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) LessThan(org.apache.spark.sql.sources.LessThan) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) GreaterThan(org.apache.spark.sql.sources.GreaterThan) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) EqualTo(org.apache.spark.sql.sources.EqualTo) EntityId(uk.gov.gchq.gaffer.data.element.id.EntityId) Filter(org.apache.spark.sql.sources.Filter) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) IsMoreThan(uk.gov.gchq.koryphe.impl.predicate.IsMoreThan) Test(org.junit.jupiter.api.Test)

Example 5 with Filter

use of org.apache.spark.sql.sources.Filter in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSpecifyPropertyFilters.

@Test
public void testSpecifyPropertyFilters() {
    final Schema schema = getSchema();
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    final Filter[] filters = new Filter[1];
    // GreaterThan
    filters[0] = new GreaterThan("property1", 5);
    FiltersToOperationConverter converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    Operation operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfAllElements);
    View opView = ((GraphFilters) operation).getView();
    List<TupleAdaptedPredicate<String, ?>> entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertThat(entityPostAggFilters).hasSize(1);
    assertArrayEquals(new String[] { "property1" }, entityPostAggFilters.get(0).getSelection());
    assertEquals(new IsMoreThan(5, false), entityPostAggFilters.get(0).getPredicate());
    for (final String edgeGroup : EDGE_GROUPS) {
        final List<TupleAdaptedPredicate<String, ?>> edgePostAggFilters = opView.getEdge(edgeGroup).getPostAggregationFilterFunctions();
        assertThat(edgePostAggFilters).hasSize(1);
        assertArrayEquals(new String[] { "property1" }, edgePostAggFilters.get(0).getSelection());
        assertEquals(new IsMoreThan(5, false), edgePostAggFilters.get(0).getPredicate());
    }
    // LessThan
    filters[0] = new LessThan("property4", 8L);
    converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfAllElements);
    // Only groups ENTITY_GROUP and EDGE_GROUP should be in the view as only they have property4
    opView = ((GraphFilters) operation).getView();
    entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertThat(entityPostAggFilters).hasSize(1);
    assertArrayEquals(new String[] { "property4" }, entityPostAggFilters.get(0).getSelection());
    assertEquals(new IsLessThan(8L, false), entityPostAggFilters.get(0).getPredicate());
    List<TupleAdaptedPredicate<String, ?>> edgePostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
    assertThat(edgePostAggFilters).hasSize(1);
    assertArrayEquals(new String[] { "property4" }, edgePostAggFilters.get(0).getSelection());
    assertEquals(new IsLessThan(8L, false), edgePostAggFilters.get(0).getPredicate());
    // And
    final Filter left = new GreaterThan("property1", 5);
    final Filter right = new GreaterThan("property4", 8L);
    filters[0] = new And(left, right);
    converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfAllElements);
    // Only groups ENTITY_GROUP and EDGE_GROUP should be in the view as only they have property1 and property4
    opView = ((GraphFilters) operation).getView();
    entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertThat(entityPostAggFilters).hasSize(2);
    final ArrayList<String> expectedProperties = new ArrayList<>();
    expectedProperties.add("property1");
    expectedProperties.add("property4");
    assertThat(entityPostAggFilters.get(0).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection()[0]);
    assertThat(entityPostAggFilters.get(1).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection()[0]);
    final ArrayList<Predicate> expectedFunctions = new ArrayList<>();
    expectedFunctions.add(new IsMoreThan(5, false));
    expectedFunctions.add(new IsMoreThan(8L, false));
    assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getPredicate());
    assertEquals(expectedFunctions.get(1), entityPostAggFilters.get(1).getPredicate());
    edgePostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
    assertThat(edgePostAggFilters).hasSize(2);
    assertThat(edgePostAggFilters.get(0).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(0), edgePostAggFilters.get(0).getSelection()[0]);
    assertThat(edgePostAggFilters.get(1).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(1), edgePostAggFilters.get(1).getSelection()[0]);
}
Also used : TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) SparkSession(org.apache.spark.sql.SparkSession) Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) GraphFilters(uk.gov.gchq.gaffer.operation.graph.GraphFilters) Operation(uk.gov.gchq.gaffer.operation.Operation) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) Predicate(java.util.function.Predicate) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) LessThan(org.apache.spark.sql.sources.LessThan) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) Filter(org.apache.spark.sql.sources.Filter) GreaterThan(org.apache.spark.sql.sources.GreaterThan) GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) And(org.apache.spark.sql.sources.And) IsMoreThan(uk.gov.gchq.koryphe.impl.predicate.IsMoreThan) Test(org.junit.jupiter.api.Test)

Aggregations

Filter (org.apache.spark.sql.sources.Filter)17 Test (org.junit.jupiter.api.Test)13 Schema (uk.gov.gchq.gaffer.store.schema.Schema)13 SparkSession (org.apache.spark.sql.SparkSession)12 EqualTo (org.apache.spark.sql.sources.EqualTo)11 Operation (uk.gov.gchq.gaffer.operation.Operation)11 GraphFilters (uk.gov.gchq.gaffer.operation.graph.GraphFilters)10 View (uk.gov.gchq.gaffer.data.elementdefinition.view.View)9 HashSet (java.util.HashSet)8 ArrayList (java.util.ArrayList)7 EntitySeed (uk.gov.gchq.gaffer.operation.data.EntitySeed)6 GetRDDOfElements (uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements)6 GreaterThan (org.apache.spark.sql.sources.GreaterThan)5 EntityId (uk.gov.gchq.gaffer.data.element.id.EntityId)5 GetRDDOfAllElements (uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements)5 LessThan (org.apache.spark.sql.sources.LessThan)4 IsLessThan (uk.gov.gchq.koryphe.impl.predicate.IsLessThan)4 IsMoreThan (uk.gov.gchq.koryphe.impl.predicate.IsMoreThan)4 TupleAdaptedPredicate (uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate)4 Set (java.util.Set)3