Search in sources :

Example 1 with GetRDDOfElements

use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements in project Gaffer by gchq.

the class GetRDDOfElementsHandlerTest method checkGetCorrectElementsInRDDForEntitySeed.

@Test
public void checkGetCorrectElementsInRDDForEntitySeed() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final List<Element> elements = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity(ENTITY_GROUP);
        entity.setVertex("" + i);
        final Edge edge1 = new Edge(EDGE_GROUP);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty("count", 2);
        final Edge edge2 = new Edge(EDGE_GROUP);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty("count", 4);
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
    }
    final User user = new User();
    graph1.execute(new AddElements(elements), user);
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final SparkContext sparkContext = new SparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    // Check get correct edges for "1"
    GetRDDOfElements<EntitySeed> rddQuery = new GetRDDOfElements.Builder<EntitySeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EntitySeed("1"))).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    RDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    Set<Element> results = new HashSet<>();
    // NB: IDE suggests the cast in the following line is unnecessary but compilation fails without it
    Element[] returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    final Set<Element> expectedElements = new HashSet<>();
    final Entity entity1 = new Entity(ENTITY_GROUP);
    entity1.setVertex("1");
    final Edge edge1B = new Edge(EDGE_GROUP);
    edge1B.setSource("1");
    edge1B.setDestination("B");
    edge1B.setDirected(false);
    edge1B.putProperty("count", 2);
    final Edge edge1C = new Edge(EDGE_GROUP);
    edge1C.setSource("1");
    edge1C.setDestination("C");
    edge1C.setDirected(false);
    edge1C.putProperty("count", 4);
    expectedElements.add(entity1);
    expectedElements.add(edge1B);
    expectedElements.add(edge1C);
    assertEquals(expectedElements, results);
    // Check get correct edges for "1" when specify entities only
    rddQuery = new GetRDDOfElements.Builder<EntitySeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EntitySeed("1"))).view(new View.Builder().entity(ENTITY_GROUP).build()).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    expectedElements.clear();
    expectedElements.add(entity1);
    assertEquals(expectedElements, results);
    // Check get correct edges for "1" when specify edges only
    rddQuery = new GetRDDOfElements.Builder<EntitySeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EntitySeed("1"))).view(new View.Builder().edge(EDGE_GROUP).build()).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    expectedElements.clear();
    expectedElements.add(edge1B);
    expectedElements.add(edge1C);
    assertEquals(expectedElements, results);
    // Check get correct edges for "1" and "5"
    Set<EntitySeed> seeds = new HashSet<>();
    seeds.add(new EntitySeed("1"));
    seeds.add(new EntitySeed("5"));
    rddQuery = new GetRDDOfElements.Builder<EntitySeed>().sparkContext(sparkContext).seeds(seeds).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    final Entity entity5 = new Entity(ENTITY_GROUP);
    entity5.setVertex("5");
    final Edge edge5B = new Edge(EDGE_GROUP);
    edge5B.setSource("5");
    edge5B.setDestination("B");
    edge5B.setDirected(false);
    edge5B.putProperty("count", 2);
    final Edge edge5C = new Edge(EDGE_GROUP);
    edge5C.setSource("5");
    edge5C.setDestination("C");
    edge5C.setDirected(false);
    edge5C.putProperty("count", 4);
    expectedElements.clear();
    expectedElements.add(entity1);
    expectedElements.add(edge1B);
    expectedElements.add(edge1C);
    expectedElements.add(entity5);
    expectedElements.add(edge5B);
    expectedElements.add(edge5C);
    assertEquals(expectedElements, results);
    sparkContext.stop();
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) Graph(uk.gov.gchq.gaffer.graph.Graph) SparkContext(org.apache.spark.SparkContext) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) Test(org.junit.Test)

Example 2 with GetRDDOfElements

use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements in project Gaffer by gchq.

the class GetRDDOfElementsHandlerTest method checkGetCorrectElementsInRDDForEdgeSeed.

@Test
public void checkGetCorrectElementsInRDDForEdgeSeed() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final List<Element> elements = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity(ENTITY_GROUP);
        entity.setVertex("" + i);
        final Edge edge1 = new Edge(EDGE_GROUP);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty("count", 2);
        final Edge edge2 = new Edge(EDGE_GROUP);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty("count", 4);
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
    }
    final User user = new User();
    graph1.execute(new AddElements(elements), user);
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInRDDForEdgeSeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final SparkContext sparkContext = new SparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    // Check get correct edges for EdgeSeed 1 -> B
    GetRDDOfElements<EdgeSeed> rddQuery = new GetRDDOfElements.Builder<EdgeSeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).includeEdges(GetOperation.IncludeEdgeType.ALL).includeEntities(false).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    RDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    Set<Element> results = new HashSet<>();
    // NB: IDE suggests the cast in the following line is unnecessary but compilation fails without it
    Element[] returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    final Set<Element> expectedElements = new HashSet<>();
    final Edge edge1B = new Edge(EDGE_GROUP);
    edge1B.setSource("1");
    edge1B.setDestination("B");
    edge1B.setDirected(false);
    edge1B.putProperty("count", 2);
    expectedElements.add(edge1B);
    assertEquals(expectedElements, results);
    // Check get entity for 1 when query for 1 -> B and specify entities only
    rddQuery = new GetRDDOfElements.Builder<EdgeSeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).includeEntities(true).includeEdges(GetOperation.IncludeEdgeType.NONE).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    expectedElements.clear();
    final Entity entity1 = new Entity(ENTITY_GROUP);
    entity1.setVertex("1");
    expectedElements.add(entity1);
    assertEquals(expectedElements, results);
    // Check get correct edges for 1 -> B when specify edges only
    rddQuery = new GetRDDOfElements.Builder<EdgeSeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).view(new View.Builder().edge(EDGE_GROUP).build()).includeEntities(false).includeEdges(GetOperation.IncludeEdgeType.ALL).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    expectedElements.clear();
    expectedElements.add(edge1B);
    assertEquals(expectedElements, results);
    // Check get correct edges for 1 -> B and 5 -> C
    Set<EdgeSeed> seeds = new HashSet<>();
    seeds.add(new EdgeSeed("1", "B", false));
    seeds.add(new EdgeSeed("5", "C", false));
    rddQuery = new GetRDDOfElements.Builder<EdgeSeed>().sparkContext(sparkContext).includeEntities(false).seeds(seeds).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    final Edge edge5C = new Edge(EDGE_GROUP);
    edge5C.setSource("5");
    edge5C.setDestination("C");
    edge5C.setDirected(false);
    edge5C.putProperty("count", 4);
    expectedElements.clear();
    expectedElements.add(edge1B);
    expectedElements.add(edge5C);
    assertEquals(expectedElements, results);
    sparkContext.stop();
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) Graph(uk.gov.gchq.gaffer.graph.Graph) SparkContext(org.apache.spark.SparkContext) EdgeSeed(uk.gov.gchq.gaffer.operation.data.EdgeSeed) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) Test(org.junit.Test)

Example 3 with GetRDDOfElements

use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSpecifyVertexAndPropertyFilter.

@Test
public void testSpecifyVertexAndPropertyFilter() {
    final Schema schema = getSchema();
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    // Specify vertex and a filter on property1
    Filter[] filters = new Filter[2];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new EqualTo(SchemaToStructTypeConverter.VERTEX_COL_NAME, "0");
    FiltersToOperationConverter converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    Operation operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(1, ((GraphFilters) operation).getView().getEntityGroups().size());
    assertEquals(0, ((GraphFilters) operation).getView().getEdgeGroups().size());
    final Set<EntityId> seeds = new HashSet<>();
    for (final Object seed : ((GetRDDOfElements) operation).getInput()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
    View opView = ((GraphFilters) operation).getView();
    List<TupleAdaptedPredicate<String, ?>> entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertThat(entityPostAggFilters).hasSize(1);
    final ArrayList<String> expectedProperties = new ArrayList<>();
    expectedProperties.add("property1");
    assertThat(entityPostAggFilters.get(0).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection()[0]);
    final ArrayList<Predicate> expectedFunctions = new ArrayList<>();
    expectedFunctions.add(new IsMoreThan(5, false));
    assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getPredicate());
    // Specify vertex and filters on properties property1 and property4
    filters = new Filter[3];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new EqualTo(SchemaToStructTypeConverter.VERTEX_COL_NAME, "0");
    filters[2] = new LessThan("property4", 8);
    converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(1, ((GraphFilters) operation).getView().getEntityGroups().size());
    assertEquals(0, ((GraphFilters) operation).getView().getEdgeGroups().size());
    seeds.clear();
    for (final Object seed : ((GetRDDOfElements) operation).getInput()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
    opView = ((GraphFilters) operation).getView();
    entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertThat(entityPostAggFilters).hasSize(2);
    expectedProperties.clear();
    expectedProperties.add("property1");
    expectedProperties.add("property4");
    assertThat(entityPostAggFilters.get(0).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection()[0]);
    assertThat(entityPostAggFilters.get(1).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection()[0]);
    expectedFunctions.clear();
    expectedFunctions.add(new IsMoreThan(5, false));
    expectedFunctions.add(new IsLessThan(8, false));
    assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getPredicate());
    assertEquals(expectedFunctions.get(1), entityPostAggFilters.get(1).getPredicate());
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) GraphFilters(uk.gov.gchq.gaffer.operation.graph.GraphFilters) Operation(uk.gov.gchq.gaffer.operation.Operation) TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) Predicate(java.util.function.Predicate) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) LessThan(org.apache.spark.sql.sources.LessThan) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) GreaterThan(org.apache.spark.sql.sources.GreaterThan) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) EqualTo(org.apache.spark.sql.sources.EqualTo) EntityId(uk.gov.gchq.gaffer.data.element.id.EntityId) Filter(org.apache.spark.sql.sources.Filter) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) IsMoreThan(uk.gov.gchq.koryphe.impl.predicate.IsMoreThan) Test(org.junit.jupiter.api.Test)

Example 4 with GetRDDOfElements

use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSpecifyVertex.

@Test
public void testSpecifyVertex() {
    final Schema schema = getSchema();
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    final Filter[] filters = new Filter[1];
    filters[0] = new EqualTo(SchemaToStructTypeConverter.VERTEX_COL_NAME, "0");
    final FiltersToOperationConverter converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    final Operation operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(Collections.singleton(ENTITY_GROUP), ((GraphFilters) operation).getView().getEntityGroups());
    assertEquals(0, ((GraphFilters) operation).getView().getEdgeGroups().size());
    final Set<EntityId> seeds = new HashSet<>();
    for (final Object seed : ((GetRDDOfElements) operation).getInput()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) Schema(uk.gov.gchq.gaffer.store.schema.Schema) GraphFilters(uk.gov.gchq.gaffer.operation.graph.GraphFilters) Operation(uk.gov.gchq.gaffer.operation.Operation) EqualTo(org.apache.spark.sql.sources.EqualTo) EntityId(uk.gov.gchq.gaffer.data.element.id.EntityId) Filter(org.apache.spark.sql.sources.Filter) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test)

Example 5 with GetRDDOfElements

use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSpecifySourceOrDestinationAndPropertyFilter.

@Test
public void testSpecifySourceOrDestinationAndPropertyFilter() {
    final Schema schema = getSchema();
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    // Specify src and a filter on property1
    Filter[] filters = new Filter[2];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new EqualTo(SchemaToStructTypeConverter.SRC_COL_NAME, "0");
    FiltersToOperationConverter converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    Operation operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(0, ((GraphFilters) operation).getView().getEntityGroups().size());
    assertEquals(2, ((GraphFilters) operation).getView().getEdgeGroups().size());
    final Set<EntityId> seeds = new HashSet<>();
    for (final Object seed : ((GetRDDOfElements) operation).getInput()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
    View opView = ((GraphFilters) operation).getView();
    for (final String edgeGroup : EDGE_GROUPS) {
        final List<TupleAdaptedPredicate<String, ?>> edgePostAggFilters = opView.getEdge(edgeGroup).getPostAggregationFilterFunctions();
        assertThat(edgePostAggFilters).hasSize(1);
        assertArrayEquals(new String[] { "property1" }, edgePostAggFilters.get(0).getSelection());
        assertEquals(new IsMoreThan(5, false), edgePostAggFilters.get(0).getPredicate());
    }
    // Specify src and filters on property1 and property4
    filters = new Filter[3];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new EqualTo(SchemaToStructTypeConverter.SRC_COL_NAME, "0");
    filters[2] = new LessThan("property4", 8);
    converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(0, ((GraphFilters) operation).getView().getEntityGroups().size());
    assertEquals(1, ((GraphFilters) operation).getView().getEdgeGroups().size());
    seeds.clear();
    for (final Object seed : ((GetRDDOfElements) operation).getInput()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
    opView = ((GraphFilters) operation).getView();
    final List<TupleAdaptedPredicate<String, ?>> entityPostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
    assertThat(entityPostAggFilters).hasSize(2);
    final List<String> expectedProperties = new ArrayList<>();
    expectedProperties.add("property1");
    expectedProperties.add("property4");
    assertThat(entityPostAggFilters.get(0).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection()[0]);
    assertEquals(new IsMoreThan(5, false), entityPostAggFilters.get(0).getPredicate());
    assertThat(entityPostAggFilters.get(1).getSelection()).hasSize(1);
    assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection()[0]);
    assertEquals(new IsLessThan(8, false), entityPostAggFilters.get(1).getPredicate());
}
Also used : TupleAdaptedPredicate(uk.gov.gchq.koryphe.tuple.predicate.TupleAdaptedPredicate) SparkSession(org.apache.spark.sql.SparkSession) Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) GraphFilters(uk.gov.gchq.gaffer.operation.graph.GraphFilters) Operation(uk.gov.gchq.gaffer.operation.Operation) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) EqualTo(org.apache.spark.sql.sources.EqualTo) EntityId(uk.gov.gchq.gaffer.data.element.id.EntityId) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) LessThan(org.apache.spark.sql.sources.LessThan) IsLessThan(uk.gov.gchq.koryphe.impl.predicate.IsLessThan) Filter(org.apache.spark.sql.sources.Filter) GreaterThan(org.apache.spark.sql.sources.GreaterThan) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) IsMoreThan(uk.gov.gchq.koryphe.impl.predicate.IsMoreThan) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test)

Aggregations

GetRDDOfElements (uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements)11 HashSet (java.util.HashSet)9 Test (org.junit.jupiter.api.Test)8 EntitySeed (uk.gov.gchq.gaffer.operation.data.EntitySeed)8 View (uk.gov.gchq.gaffer.data.elementdefinition.view.View)7 ArrayList (java.util.ArrayList)6 EqualTo (org.apache.spark.sql.sources.EqualTo)6 Filter (org.apache.spark.sql.sources.Filter)6 Configuration (org.apache.hadoop.conf.Configuration)5 SparkSession (org.apache.spark.sql.SparkSession)5 Element (uk.gov.gchq.gaffer.data.element.Element)5 EntityId (uk.gov.gchq.gaffer.data.element.id.EntityId)5 Graph (uk.gov.gchq.gaffer.graph.Graph)5 Operation (uk.gov.gchq.gaffer.operation.Operation)5 GraphFilters (uk.gov.gchq.gaffer.operation.graph.GraphFilters)5 Schema (uk.gov.gchq.gaffer.store.schema.Schema)5 User (uk.gov.gchq.gaffer.user.User)5 Edge (uk.gov.gchq.gaffer.data.element.Edge)4 Entity (uk.gov.gchq.gaffer.data.element.Entity)4 AddElements (uk.gov.gchq.gaffer.operation.impl.add.AddElements)4