Search in sources :

Example 6 with GetRDDOfElements

use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements in project Gaffer by gchq.

the class GetRDDOfElementsHandlerTest method checkHadoopConfIsPassedThrough.

@Test
public void checkHadoopConfIsPassedThrough() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().config(new GraphConfig.Builder().graphId("graphId").build()).addSchema(getClass().getResourceAsStream("/schema/elements.json")).addSchema(getClass().getResourceAsStream("/schema/types.json")).addSchema(getClass().getResourceAsStream("/schema/serialisation.json")).storeProperties(PROPERTIES).build();
    final User user = new User();
    final Configuration conf = new Configuration();
    conf.set("AN_OPTION", "A_VALUE");
    final String encodedConf = AbstractGetRDDHandler.convertConfigurationToString(conf);
    final GetRDDOfElements rddQuery = new GetRDDOfElements.Builder().input(new EdgeSeed("1", "B", false)).option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, encodedConf).build();
    final RDD<Element> rdd = graph1.execute(rddQuery, user);
    assertEquals(encodedConf, rddQuery.getOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY));
    assertEquals("A_VALUE", rdd.sparkContext().hadoopConfiguration().get("AN_OPTION"));
}
Also used : Graph(uk.gov.gchq.gaffer.graph.Graph) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) EdgeSeed(uk.gov.gchq.gaffer.operation.data.EdgeSeed) Element(uk.gov.gchq.gaffer.data.element.Element) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) Test(org.junit.jupiter.api.Test)

Example 7 with GetRDDOfElements

use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements in project Gaffer by gchq.

the class GetRDDOfElementsHandlerTest method checkGetCorrectElementsInRDDForEdgeId.

@Test
public void checkGetCorrectElementsInRDDForEdgeId() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().config(new GraphConfig.Builder().graphId("graphId").build()).addSchema(getClass().getResourceAsStream("/schema/elements.json")).addSchema(getClass().getResourceAsStream("/schema/types.json")).addSchema(getClass().getResourceAsStream("/schema/serialisation.json")).storeProperties(PROPERTIES).build();
    final List<Element> elements = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity.Builder().group(TestGroups.ENTITY).vertex("" + i).build();
        final Edge edge1 = new Edge.Builder().group(TestGroups.EDGE).source("" + i).dest("B").directed(false).property("count", 2).build();
        final Edge edge2 = new Edge.Builder().group(TestGroups.EDGE).source("" + i).dest("C").directed(false).property("count", 4).build();
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
    }
    final User user = new User();
    graph1.execute(new AddElements.Builder().input(elements).build(), user);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final String configurationString = AbstractGetRDDHandler.convertConfigurationToString(configuration);
    // Check get correct edges for EdgeSeed 1 -> B
    GetRDDOfElements rddQuery = new GetRDDOfElements.Builder().input(new EdgeSeed("1", "B", false)).view(new View.Builder().edge(EDGE_GROUP).build()).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    RDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    Set<Element> results = new HashSet<>();
    // NB: IDE suggests the cast in the following line is unnecessary but compilation fails without it
    Element[] returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    final Set<Element> expectedElements = new HashSet<>();
    final Edge edge1B = new Edge.Builder().group(TestGroups.EDGE).source("1").dest("B").directed(false).property("count", 2).build();
    expectedElements.add(edge1B);
    assertEquals(expectedElements, results);
    // Check get entity for 1 when query for 1 -> B and specify entities only
    rddQuery = new GetRDDOfElements.Builder().input(new EdgeSeed("1", "B", false)).view(new View.Builder().entity(ENTITY_GROUP).build()).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    expectedElements.clear();
    final Entity entity1 = new Entity.Builder().group(TestGroups.ENTITY).vertex("1").build();
    expectedElements.add(entity1);
    assertEquals(expectedElements, results);
    // Check get correct edges for 1 -> B when specify edges only
    rddQuery = new GetRDDOfElements.Builder().input(new EdgeSeed("1", "B", false)).view(new View.Builder().edge(EDGE_GROUP).build()).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    expectedElements.clear();
    expectedElements.add(edge1B);
    assertEquals(expectedElements, results);
    // Check get correct edges for 1 -> B and 5 -> C
    rddQuery = new GetRDDOfElements.Builder().view(new View.Builder().edge(EDGE_GROUP).build()).input(new EdgeSeed("1", "B", false), new EdgeSeed("5", "C", false)).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    results.addAll(Arrays.asList(returnedElements));
    final Edge edge5C = new Edge.Builder().group(TestGroups.EDGE).source("5").dest("C").directed(false).property("count", 4).build();
    expectedElements.clear();
    expectedElements.add(edge1B);
    expectedElements.add(edge5C);
    assertEquals(expectedElements, results);
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) Graph(uk.gov.gchq.gaffer.graph.Graph) EdgeSeed(uk.gov.gchq.gaffer.operation.data.EdgeSeed) Edge(uk.gov.gchq.gaffer.data.element.Edge) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test)

Example 8 with GetRDDOfElements

use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements in project Gaffer by gchq.

the class GetRDDOfElementsHandlerTest method checkGetCorrectElementsInRDDForEntityId.

@Test
public void checkGetCorrectElementsInRDDForEntityId() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().config(new GraphConfig.Builder().graphId("graphId").build()).addSchema(getClass().getResourceAsStream("/schema/elements.json")).addSchema(getClass().getResourceAsStream("/schema/types.json")).addSchema(getClass().getResourceAsStream("/schema/serialisation.json")).storeProperties(PROPERTIES).build();
    final List<Element> elements = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity.Builder().group(TestGroups.ENTITY).vertex("" + i).build();
        final Edge edge1 = new Edge.Builder().group(TestGroups.EDGE).source("" + i).dest("B").directed(false).property("count", 2).build();
        final Edge edge2 = new Edge.Builder().group(TestGroups.EDGE).source("" + i).dest("C").directed(false).property("count", 4).build();
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
    }
    final User user = new User();
    graph1.execute(new AddElements.Builder().input(elements).build(), user);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final String configurationString = AbstractGetRDDHandler.convertConfigurationToString(configuration);
    // Check get correct edges for "1"
    GetRDDOfElements rddQuery = new GetRDDOfElements.Builder().input(new EntitySeed("1")).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    RDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    Set<Element> results = new HashSet<>();
    // NB: IDE suggests the cast in the following line is unnecessary but compilation fails without it
    Element[] returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    final Set<Element> expectedElements = new HashSet<>();
    final Entity entity1 = new Entity.Builder().group(TestGroups.ENTITY).vertex("1").build();
    final Edge edge1B = new Edge.Builder().group(TestGroups.EDGE).source("1").dest("B").directed(false).property("count", 2).build();
    final Edge edge1C = new Edge.Builder().group(TestGroups.EDGE).source("1").dest("C").directed(false).property("count", 4).build();
    expectedElements.add(entity1);
    expectedElements.add(edge1B);
    expectedElements.add(edge1C);
    assertEquals(expectedElements, results);
    // Check get correct edges for "1" when specify entities only
    rddQuery = new GetRDDOfElements.Builder().input(new EntitySeed("1")).view(new View.Builder().entity(ENTITY_GROUP).build()).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    expectedElements.clear();
    expectedElements.add(entity1);
    assertEquals(expectedElements, results);
    // Check get correct edges for "1" when specify edges only
    rddQuery = new GetRDDOfElements.Builder().input(new EntitySeed("1")).view(new View.Builder().edge(EDGE_GROUP).build()).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    expectedElements.clear();
    expectedElements.add(edge1B);
    expectedElements.add(edge1C);
    assertEquals(expectedElements, results);
    // Check get correct edges for "1" and "5"
    Set seeds = new HashSet<>();
    seeds.add(new EntitySeed("1"));
    seeds.add(new EntitySeed("5"));
    rddQuery = new GetRDDOfElements.Builder().input(seeds).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    results.addAll(Arrays.asList(returnedElements));
    final Entity entity5 = new Entity.Builder().group(TestGroups.ENTITY).vertex("5").build();
    final Edge edge5B = new Edge.Builder().group(TestGroups.EDGE).source("5").dest("B").directed(false).property("count", 2).build();
    final Edge edge5C = new Edge.Builder().group(TestGroups.EDGE).source("5").dest("C").directed(false).property("count", 4).build();
    expectedElements.clear();
    expectedElements.add(entity1);
    expectedElements.add(edge1B);
    expectedElements.add(edge1C);
    expectedElements.add(entity5);
    expectedElements.add(edge5B);
    expectedElements.add(edge5C);
    assertEquals(expectedElements, results);
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) HashSet(java.util.HashSet) Set(java.util.Set) Configuration(org.apache.hadoop.conf.Configuration) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) Graph(uk.gov.gchq.gaffer.graph.Graph) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) Edge(uk.gov.gchq.gaffer.data.element.Edge) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test)

Example 9 with GetRDDOfElements

use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements in project Gaffer by gchq.

the class FiltersToOperationConverter method applyVertexSourceDestinationFilters.

private Output<RDD<Element>> applyVertexSourceDestinationFilters(final View view) {
    View clonedView = view.clone();
    Output<RDD<Element>> operation = null;
    for (final Filter filter : filters) {
        if (filter instanceof EqualTo) {
            final EqualTo equalTo = (EqualTo) filter;
            final String attribute = equalTo.attribute();
            if (attribute.equals(SchemaToStructTypeConverter.VERTEX_COL_NAME)) {
                // Only entities are relevant, so remove any edge groups from the view
                LOGGER.info("Found EqualTo filter with attribute {}, setting views to only contain entity groups", attribute);
                View.Builder viewBuilder = new View.Builder();
                for (final String entityGroup : view.getEntityGroups()) {
                    viewBuilder = viewBuilder.entity(entityGroup);
                }
                clonedView = viewBuilder.build();
                LOGGER.info("Setting operation to GetRDDOfElements");
                operation = new GetRDDOfElements.Builder().input(new EntitySeed(equalTo.value())).view(clonedView).build();
                break;
            } else if (attribute.equals(SchemaToStructTypeConverter.SRC_COL_NAME) || attribute.equals(SchemaToStructTypeConverter.DST_COL_NAME)) {
                // Only edges are relevant, so remove any entity groups from the view
                LOGGER.info("Found EqualTo filter with attribute {}, setting views to only contain edge groups", attribute);
                View.Builder viewBuilder = new View.Builder();
                for (final String edgeGroup : view.getEdgeGroups()) {
                    viewBuilder = viewBuilder.edge(edgeGroup);
                }
                clonedView = viewBuilder.build();
                LOGGER.info("Setting operation to GetRDDOfElements");
                operation = new GetRDDOfElements.Builder().input(new EntitySeed(equalTo.value())).view(clonedView).build();
                break;
            }
        }
    }
    if (null == operation) {
        LOGGER.debug("Setting operation to GetRDDOfAllElements");
        operation = new GetRDDOfAllElements.Builder().view(clonedView).build();
    }
    return operation;
}
Also used : View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) EqualTo(org.apache.spark.sql.sources.EqualTo) RDD(org.apache.spark.rdd.RDD) Filter(org.apache.spark.sql.sources.Filter) GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements)

Example 10 with GetRDDOfElements

use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSpecifySource.

@Test
public void testSpecifySource() {
    final Schema schema = getSchema();
    final SparkSession sparkSession = SparkSessionProvider.getSparkSession();
    final Filter[] filters = new Filter[1];
    filters[0] = new EqualTo(SchemaToStructTypeConverter.SRC_COL_NAME, "0");
    FiltersToOperationConverter converter = new FiltersToOperationConverter(getViewFromSchema(schema), schema, filters);
    Operation operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(0, ((GraphFilters) operation).getView().getEntityGroups().size());
    assertEquals(EDGE_GROUPS, ((GraphFilters) operation).getView().getEdgeGroups());
    final Set<EntityId> seeds = new HashSet<>();
    for (final Object seed : ((GetRDDOfElements) operation).getInput()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
}
Also used : SparkSession(org.apache.spark.sql.SparkSession) Schema(uk.gov.gchq.gaffer.store.schema.Schema) GraphFilters(uk.gov.gchq.gaffer.operation.graph.GraphFilters) Operation(uk.gov.gchq.gaffer.operation.Operation) EqualTo(org.apache.spark.sql.sources.EqualTo) EntityId(uk.gov.gchq.gaffer.data.element.id.EntityId) Filter(org.apache.spark.sql.sources.Filter) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) Test(org.junit.jupiter.api.Test)

Aggregations

GetRDDOfElements (uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements)11 HashSet (java.util.HashSet)9 Test (org.junit.jupiter.api.Test)8 EntitySeed (uk.gov.gchq.gaffer.operation.data.EntitySeed)8 View (uk.gov.gchq.gaffer.data.elementdefinition.view.View)7 ArrayList (java.util.ArrayList)6 EqualTo (org.apache.spark.sql.sources.EqualTo)6 Filter (org.apache.spark.sql.sources.Filter)6 Configuration (org.apache.hadoop.conf.Configuration)5 SparkSession (org.apache.spark.sql.SparkSession)5 Element (uk.gov.gchq.gaffer.data.element.Element)5 EntityId (uk.gov.gchq.gaffer.data.element.id.EntityId)5 Graph (uk.gov.gchq.gaffer.graph.Graph)5 Operation (uk.gov.gchq.gaffer.operation.Operation)5 GraphFilters (uk.gov.gchq.gaffer.operation.graph.GraphFilters)5 Schema (uk.gov.gchq.gaffer.store.schema.Schema)5 User (uk.gov.gchq.gaffer.user.User)5 Edge (uk.gov.gchq.gaffer.data.element.Edge)4 Entity (uk.gov.gchq.gaffer.data.element.Entity)4 AddElements (uk.gov.gchq.gaffer.operation.impl.add.AddElements)4