Search in sources :

Example 1 with EqualTo

use of org.apache.spark.sql.sources.EqualTo in project Gaffer by gchq.

the class FiltersToOperationConverter method getFunctionsFromFilter.

/**
     * Converts a Spark {@link Filter} to a map from group to a list of Gaffer {@link ConsumerFunctionContext}s.
     * <p>
     * Note that Spark also applies all the filters provided to the <code>buildScan(String[], Filter[])</code> method
     * so not implementing some of the provided {@link Filter}s in Gaffer will not cause errors. However, as many as
     * possible should be implemented so that as much filtering as possible happens in iterators running in Accumulo's
     * tablet servers (this avoids unnecessary data transfer from Accumulo to Spark).
     *
     * @param filter The {@link Filter} to transform.
     * @return A map from {@link String} to {@link ConsumerFunctionContext}s implementing the provided {@link Filter}.
     */
private Map<String, List<ConsumerFunctionContext<String, FilterFunction>>> getFunctionsFromFilter(final Filter filter) {
    final Map<String, List<ConsumerFunctionContext<String, FilterFunction>>> map = new HashMap<>();
    if (filter instanceof EqualTo) {
    // Not dealt with as requires a FilterFunction that returns null if either the controlValue or the
    // test value is null - the API of FilterFunction doesn't permit this.
    } else if (filter instanceof EqualNullSafe) {
        final EqualNullSafe equalNullSafe = (EqualNullSafe) filter;
        final FilterFunction isEqual = new IsEqual(equalNullSafe.value());
        final List<String> properties = Collections.singletonList(equalNullSafe.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isEqual, properties));
            }
        }
        LOGGER.debug("Converted {} to IsEqual ({})", filter, properties.get(0));
    } else if (filter instanceof GreaterThan) {
        final GreaterThan greaterThan = (GreaterThan) filter;
        final FilterFunction isMoreThan = new IsMoreThan((Comparable<?>) greaterThan.value(), false);
        final List<String> properties = Collections.singletonList(greaterThan.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isMoreThan, properties));
            }
        }
        LOGGER.debug("Converted {} to isMoreThan ({})", filter, properties.get(0));
    } else if (filter instanceof GreaterThanOrEqual) {
        final GreaterThanOrEqual greaterThan = (GreaterThanOrEqual) filter;
        final FilterFunction isMoreThan = new IsMoreThan((Comparable<?>) greaterThan.value(), true);
        final List<String> properties = Collections.singletonList(greaterThan.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isMoreThan, properties));
            }
        }
        LOGGER.debug("Converted {} to IsMoreThan ({})", filter, properties.get(0));
    } else if (filter instanceof LessThan) {
        final LessThan lessThan = (LessThan) filter;
        final FilterFunction isLessThan = new IsLessThan((Comparable<?>) lessThan.value(), false);
        final List<String> properties = Collections.singletonList(lessThan.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isLessThan, properties));
            }
        }
        LOGGER.debug("Converted {} to IsLessThan ({})", filter, properties.get(0));
    } else if (filter instanceof LessThanOrEqual) {
        final LessThanOrEqual lessThan = (LessThanOrEqual) filter;
        final FilterFunction isLessThan = new IsLessThan((Comparable<?>) lessThan.value(), true);
        final List<String> properties = Collections.singletonList(lessThan.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isLessThan, properties));
            }
        }
        LOGGER.debug("Converted {} to LessThanOrEqual ({})", filter, properties.get(0));
    } else if (filter instanceof In) {
        final In in = (In) filter;
        final FilterFunction isIn = new IsIn(new HashSet<>(Arrays.asList(in.values())));
        final List<String> properties = Collections.singletonList(in.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(isIn, properties));
            }
        }
        LOGGER.debug("Converted {} to IsIn ({})", filter, properties.get(0));
    } else if (filter instanceof IsNull) {
        final IsNull isNull = (IsNull) filter;
        final FilterFunction doesntExist = new Not(new Exists());
        final List<String> properties = Collections.singletonList(isNull.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(doesntExist, properties));
            }
        }
        LOGGER.debug("Converted {} to Not(Exists) ({})", filter, properties.get(0));
    } else if (filter instanceof IsNotNull) {
        final IsNotNull isNotNull = (IsNotNull) filter;
        final FilterFunction exists = new Exists();
        final List<String> properties = Collections.singletonList(isNotNull.attribute());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).add(new ConsumerFunctionContext<>(exists, properties));
            }
        }
        LOGGER.debug("Converted {} to Exists ({})", filter, properties.get(0));
    } else if (filter instanceof And) {
        final And and = (And) filter;
        final Map<String, List<ConsumerFunctionContext<String, FilterFunction>>> left = getFunctionsFromFilter(and.left());
        final Map<String, List<ConsumerFunctionContext<String, FilterFunction>>> right = getFunctionsFromFilter(and.right());
        final Set<String> relevantGroups = getGroupsFromFilter(filter);
        if (relevantGroups != null) {
            for (final String group : relevantGroups) {
                final List<ConsumerFunctionContext<String, FilterFunction>> concatFilters = new ArrayList<>();
                if (left.get(group) != null) {
                    concatFilters.addAll(left.get(group));
                }
                if (right.get(group) != null) {
                    concatFilters.addAll(right.get(group));
                }
                if (!map.containsKey(group)) {
                    map.put(group, new ArrayList<ConsumerFunctionContext<String, FilterFunction>>());
                }
                map.get(group).addAll(concatFilters);
            }
        }
        LOGGER.debug("Converted {} to list of filters ({})", filter, StringUtils.join(map.entrySet(), ','));
    }
    return map;
}
Also used : FilterFunction(uk.gov.gchq.gaffer.function.FilterFunction) HashSet(java.util.HashSet) Set(java.util.Set) EqualNullSafe(org.apache.spark.sql.sources.EqualNullSafe) LessThanOrEqual(org.apache.spark.sql.sources.LessThanOrEqual) HashMap(java.util.HashMap) IsIn(uk.gov.gchq.gaffer.function.filter.IsIn) In(org.apache.spark.sql.sources.In) ArrayList(java.util.ArrayList) GreaterThanOrEqual(org.apache.spark.sql.sources.GreaterThanOrEqual) LessThan(org.apache.spark.sql.sources.LessThan) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) IsNotNull(org.apache.spark.sql.sources.IsNotNull) ConsumerFunctionContext(uk.gov.gchq.gaffer.function.context.ConsumerFunctionContext) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) GreaterThan(org.apache.spark.sql.sources.GreaterThan) ArrayList(java.util.ArrayList) List(java.util.List) IsIn(uk.gov.gchq.gaffer.function.filter.IsIn) EqualTo(org.apache.spark.sql.sources.EqualTo) IsEqual(uk.gov.gchq.gaffer.function.filter.IsEqual) Not(uk.gov.gchq.gaffer.function.filter.Not) Exists(uk.gov.gchq.gaffer.function.filter.Exists) And(org.apache.spark.sql.sources.And) IsNull(org.apache.spark.sql.sources.IsNull) IsMoreThan(uk.gov.gchq.gaffer.function.filter.IsMoreThan) HashMap(java.util.HashMap) Map(java.util.Map)

Example 2 with EqualTo

use of org.apache.spark.sql.sources.EqualTo in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSpecifyVertexAndPropertyFilter.

@Test
public void testSpecifyVertexAndPropertyFilter() throws OperationException {
    final Schema schema = getSchema();
    final SQLContext sqlContext = getSqlContext("testSpecifyVertexAndPropertyFilter");
    // Specify vertex and a filter on property1
    Filter[] filters = new Filter[2];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new EqualTo(SchemaToStructTypeConverter.VERTEX_COL_NAME, "0");
    FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    AbstractGetRDD<?> operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(1, operation.getView().getEntityGroups().size());
    assertEquals(0, operation.getView().getEdgeGroups().size());
    final Set<EntitySeed> seeds = new HashSet<>();
    for (final Object seed : ((GetRDDOfElements) operation).getSeeds()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
    View opView = operation.getView();
    List<ConsumerFunctionContext<String, FilterFunction>> entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertEquals(1, entityPostAggFilters.size());
    final ArrayList<String> expectedProperties = new ArrayList<>();
    expectedProperties.add("property1");
    assertEquals(1, entityPostAggFilters.get(0).getSelection().size());
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection().get(0));
    final ArrayList<FilterFunction> expectedFunctions = new ArrayList<>();
    expectedFunctions.add(new IsMoreThan(5, false));
    assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getFunction());
    // Specify vertex and filters on properties property1 and property4
    filters = new Filter[3];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new EqualTo(SchemaToStructTypeConverter.VERTEX_COL_NAME, "0");
    filters[2] = new LessThan("property4", 8);
    converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(1, operation.getView().getEntityGroups().size());
    assertEquals(0, operation.getView().getEdgeGroups().size());
    seeds.clear();
    for (final Object seed : ((GetRDDOfElements) operation).getSeeds()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
    opView = operation.getView();
    entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertEquals(2, entityPostAggFilters.size());
    expectedProperties.clear();
    expectedProperties.add("property1");
    expectedProperties.add("property4");
    assertEquals(1, entityPostAggFilters.get(0).getSelection().size());
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection().get(0));
    assertEquals(1, entityPostAggFilters.get(1).getSelection().size());
    assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection().get(0));
    expectedFunctions.clear();
    expectedFunctions.add(new IsMoreThan(5, false));
    expectedFunctions.add(new IsLessThan(8, false));
    assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getFunction());
    assertEquals(expectedFunctions.get(1), entityPostAggFilters.get(1).getFunction());
    sqlContext.sparkContext().stop();
}
Also used : FilterFunction(uk.gov.gchq.gaffer.function.FilterFunction) Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) EqualTo(org.apache.spark.sql.sources.EqualTo) LessThan(org.apache.spark.sql.sources.LessThan) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) ConsumerFunctionContext(uk.gov.gchq.gaffer.function.context.ConsumerFunctionContext) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) Filter(org.apache.spark.sql.sources.Filter) GreaterThan(org.apache.spark.sql.sources.GreaterThan) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) IsMoreThan(uk.gov.gchq.gaffer.function.filter.IsMoreThan) SQLContext(org.apache.spark.sql.SQLContext) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 3 with EqualTo

use of org.apache.spark.sql.sources.EqualTo in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSingleGroup.

@Test
public void testSingleGroup() throws OperationException {
    final Schema schema = getSchema();
    final SQLContext sqlContext = getSqlContext("testSingleGroup");
    final Filter[] filters = new Filter[1];
    filters[0] = new EqualTo(SchemaToStructTypeConverter.GROUP, ENTITY_GROUP);
    final FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    final AbstractGetRDD<?> operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfAllElements);
    assertEquals(Collections.singleton(ENTITY_GROUP), operation.getView().getEntityGroups());
    assertEquals(0, operation.getView().getEdgeGroups().size());
    sqlContext.sparkContext().stop();
}
Also used : Filter(org.apache.spark.sql.sources.Filter) GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) Schema(uk.gov.gchq.gaffer.store.schema.Schema) SQLContext(org.apache.spark.sql.SQLContext) EqualTo(org.apache.spark.sql.sources.EqualTo) Test(org.junit.Test)

Example 4 with EqualTo

use of org.apache.spark.sql.sources.EqualTo in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSingleGroupNotInSchema.

@Test
public void testSingleGroupNotInSchema() throws OperationException {
    final Schema schema = getSchema();
    final SQLContext sqlContext = getSqlContext("testSingleGroupNotInSchema");
    final Filter[] filters = new Filter[1];
    filters[0] = new EqualTo(SchemaToStructTypeConverter.GROUP, "random");
    final FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    final AbstractGetRDD<?> operation = converter.getOperation();
    assertNull(operation);
    sqlContext.sparkContext().stop();
}
Also used : Filter(org.apache.spark.sql.sources.Filter) Schema(uk.gov.gchq.gaffer.store.schema.Schema) SQLContext(org.apache.spark.sql.SQLContext) EqualTo(org.apache.spark.sql.sources.EqualTo) Test(org.junit.Test)

Example 5 with EqualTo

use of org.apache.spark.sql.sources.EqualTo in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSpecifySourceOrDestinationAndPropertyFilter.

@Test
public void testSpecifySourceOrDestinationAndPropertyFilter() throws OperationException {
    final Schema schema = getSchema();
    final SQLContext sqlContext = getSqlContext("testSpecifyVertexAndPropertyFilter");
    // Specify src and a filter on property1
    Filter[] filters = new Filter[2];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new EqualTo(SchemaToStructTypeConverter.SRC_COL_NAME, "0");
    FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    AbstractGetRDD<?> operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(0, operation.getView().getEntityGroups().size());
    assertEquals(2, operation.getView().getEdgeGroups().size());
    final Set<EntitySeed> seeds = new HashSet<>();
    for (final Object seed : ((GetRDDOfElements) operation).getSeeds()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
    View opView = operation.getView();
    for (final String edgeGroup : EDGE_GROUPS) {
        final List<ConsumerFunctionContext<String, FilterFunction>> edgePostAggFilters = opView.getEdge(edgeGroup).getPostAggregationFilterFunctions();
        assertEquals(1, edgePostAggFilters.size());
        assertEquals(new ArrayList<>(Collections.singleton("property1")), edgePostAggFilters.get(0).getSelection());
        assertEquals(new IsMoreThan(5, false), edgePostAggFilters.get(0).getFunction());
    }
    // Specify src and filters on property1 and property4
    filters = new Filter[3];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new EqualTo(SchemaToStructTypeConverter.SRC_COL_NAME, "0");
    filters[2] = new LessThan("property4", 8);
    converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(0, operation.getView().getEntityGroups().size());
    assertEquals(1, operation.getView().getEdgeGroups().size());
    seeds.clear();
    for (final Object seed : ((GetRDDOfElements) operation).getSeeds()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
    opView = operation.getView();
    final List<ConsumerFunctionContext<String, FilterFunction>> entityPostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
    assertEquals(2, entityPostAggFilters.size());
    final List<String> expectedProperties = new ArrayList<>();
    expectedProperties.add("property1");
    expectedProperties.add("property4");
    assertEquals(1, entityPostAggFilters.get(0).getSelection().size());
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection().get(0));
    assertEquals(new IsMoreThan(5, false), entityPostAggFilters.get(0).getFunction());
    assertEquals(1, entityPostAggFilters.get(1).getSelection().size());
    assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection().get(0));
    assertEquals(new IsLessThan(8, false), entityPostAggFilters.get(1).getFunction());
    sqlContext.sparkContext().stop();
}
Also used : Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) EqualTo(org.apache.spark.sql.sources.EqualTo) LessThan(org.apache.spark.sql.sources.LessThan) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) ConsumerFunctionContext(uk.gov.gchq.gaffer.function.context.ConsumerFunctionContext) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) Filter(org.apache.spark.sql.sources.Filter) GreaterThan(org.apache.spark.sql.sources.GreaterThan) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) IsMoreThan(uk.gov.gchq.gaffer.function.filter.IsMoreThan) SQLContext(org.apache.spark.sql.SQLContext) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

EqualTo (org.apache.spark.sql.sources.EqualTo)12 Filter (org.apache.spark.sql.sources.Filter)10 SQLContext (org.apache.spark.sql.SQLContext)9 Test (org.junit.Test)9 Schema (uk.gov.gchq.gaffer.store.schema.Schema)9 HashSet (java.util.HashSet)7 EntitySeed (uk.gov.gchq.gaffer.operation.data.EntitySeed)6 GetRDDOfElements (uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements)6 ArrayList (java.util.ArrayList)3 GreaterThan (org.apache.spark.sql.sources.GreaterThan)3 LessThan (org.apache.spark.sql.sources.LessThan)3 View (uk.gov.gchq.gaffer.data.elementdefinition.view.View)3 ConsumerFunctionContext (uk.gov.gchq.gaffer.function.context.ConsumerFunctionContext)3 IsLessThan (uk.gov.gchq.gaffer.function.filter.IsLessThan)3 IsMoreThan (uk.gov.gchq.gaffer.function.filter.IsMoreThan)3 GetRDDOfAllElements (uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements)3 Set (java.util.Set)2 In (org.apache.spark.sql.sources.In)2 Or (org.apache.spark.sql.sources.Or)2 FilterFunction (uk.gov.gchq.gaffer.function.FilterFunction)2