Search in sources :

Example 21 with ByteArrayOutputStream

use of org.apache.commons.io.output.ByteArrayOutputStream in project Gaffer by gchq.

the class GetJavaRDDOfAllElementsHandlerTest method checkGetAllElementsInJavaRDDWithVisibility.

@Test
public void checkGetAllElementsInJavaRDDWithVisibility() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchemaWithVisibility.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final List<Element> elements = new ArrayList<>();
    for (int i = 0; i < 1; i++) {
        final Entity entity = new Entity(TestGroups.ENTITY);
        entity.setVertex("" + i);
        entity.putProperty("visibility", "public");
        final Edge edge1 = new Edge(TestGroups.EDGE);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty(TestPropertyNames.COUNT, 2);
        edge1.putProperty("visibility", "private");
        final Edge edge2 = new Edge(TestGroups.EDGE);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty(TestPropertyNames.COUNT, 4);
        edge2.putProperty("visibility", "public");
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
    }
    final User user = new User("user", Collections.singleton("public"));
    graph1.execute(new AddElements(elements), user);
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    // Create user with just public auth, and user with both private and public
    final Set<String> publicNotPrivate = new HashSet<>();
    publicNotPrivate.add("public");
    final User userWithPublicNotPrivate = new User("user1", publicNotPrivate);
    final Set<String> privateAuth = new HashSet<>();
    privateAuth.add("public");
    privateAuth.add("private");
    final User userWithPrivate = new User("user2", privateAuth);
    // Calculate correct results for 2 users
    final Set<Element> expectedElementsPublicNotPrivate = new HashSet<>();
    final Set<Element> expectedElementsPrivate = new HashSet<>();
    for (final Element element : elements) {
        expectedElementsPrivate.add(element);
        if (element.getProperty("visibility").equals("public")) {
            expectedElementsPublicNotPrivate.add(element);
        }
    }
    // Check get correct edges for user with just public
    GetJavaRDDOfAllElements rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    JavaRDD<Element> rdd = graph1.execute(rddQuery, userWithPublicNotPrivate);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>(rdd.collect());
    assertEquals(expectedElementsPublicNotPrivate, results);
    // Check get correct edges for user with both private and public
    rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, userWithPrivate);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    results.addAll(rdd.collect());
    assertEquals(expectedElementsPrivate, results);
    sparkContext.stop();
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) GetJavaRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) Graph(uk.gov.gchq.gaffer.graph.Graph) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 22 with ByteArrayOutputStream

use of org.apache.commons.io.output.ByteArrayOutputStream in project Gaffer by gchq.

the class GetJavaRDDOfAllElementsHandlerTest method checkGetAllElementsInJavaRDD.

@Test
public void checkGetAllElementsInJavaRDD() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final List<Element> elements = new ArrayList<>();
    final Set<Element> expectedElements = new HashSet<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity(TestGroups.ENTITY);
        entity.setVertex("" + i);
        final Edge edge1 = new Edge(TestGroups.EDGE);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty(TestPropertyNames.COUNT, 2);
        final Edge edge2 = new Edge(TestGroups.EDGE);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty(TestPropertyNames.COUNT, 4);
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
        expectedElements.add(edge1);
        expectedElements.add(edge2);
        expectedElements.add(entity);
    }
    final User user = new User();
    graph1.execute(new AddElements(elements), user);
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    // Check get correct edges for "1"
    final GetJavaRDDOfAllElements rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    final JavaRDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>(rdd.collect());
    assertEquals(expectedElements, results);
    sparkContext.stop();
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) GetJavaRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) Graph(uk.gov.gchq.gaffer.graph.Graph) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 23 with ByteArrayOutputStream

use of org.apache.commons.io.output.ByteArrayOutputStream in project Gaffer by gchq.

the class GetJavaRDDOfElementsHandlerTest method checkGetCorrectElementsInRDDForEdgeSeed.

@Test
public void checkGetCorrectElementsInRDDForEdgeSeed() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final List<Element> elements = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity(ENTITY_GROUP);
        entity.setVertex("" + i);
        final Edge edge1 = new Edge(EDGE_GROUP);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty("count", 2);
        final Edge edge2 = new Edge(EDGE_GROUP);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty("count", 4);
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
    }
    final User user = new User();
    graph1.execute(new AddElements(elements), user);
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEdgeSeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    // Check get correct edges for EdgeSeed 1 -> B
    GetJavaRDDOfElements<EdgeSeed> rddQuery = new GetJavaRDDOfElements.Builder<EdgeSeed>().javaSparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).setIncludeEdges(GetOperation.IncludeEdgeType.ALL).setIncludeEntities(false).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    JavaRDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>();
    results.addAll(rdd.collect());
    final Set<Element> expectedElements = new HashSet<>();
    final Edge edge1B = new Edge(EDGE_GROUP);
    edge1B.setSource("1");
    edge1B.setDestination("B");
    edge1B.setDirected(false);
    edge1B.putProperty("count", 2);
    expectedElements.add(edge1B);
    assertEquals(expectedElements, results);
    // Check get entity for 1 when query for 1 -> B and specify entities only
    rddQuery = new GetJavaRDDOfElements.Builder<EdgeSeed>().javaSparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).setIncludeEntities(true).setIncludeEdges(GetOperation.IncludeEdgeType.NONE).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    results.addAll(rdd.collect());
    expectedElements.clear();
    final Entity entity1 = new Entity(ENTITY_GROUP);
    entity1.setVertex("1");
    expectedElements.add(entity1);
    assertEquals(expectedElements, results);
    // Check get correct edges for 1 -> B when specify edges only
    rddQuery = new GetJavaRDDOfElements.Builder<EdgeSeed>().javaSparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).view(new View.Builder().edge(EDGE_GROUP).build()).setIncludeEntities(false).setIncludeEdges(GetOperation.IncludeEdgeType.ALL).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    results.addAll(rdd.collect());
    expectedElements.clear();
    expectedElements.add(edge1B);
    assertEquals(expectedElements, results);
    // Check get correct edges for 1 -> B and 5 -> C
    Set<EdgeSeed> seeds = new HashSet<>();
    seeds.add(new EdgeSeed("1", "B", false));
    seeds.add(new EdgeSeed("5", "C", false));
    rddQuery = new GetJavaRDDOfElements.Builder<EdgeSeed>().javaSparkContext(sparkContext).setIncludeEntities(false).seeds(seeds).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    results.addAll(rdd.collect());
    final Edge edge5C = new Edge(EDGE_GROUP);
    edge5C.setSource("5");
    edge5C.setDestination("C");
    edge5C.setDirected(false);
    edge5C.putProperty("count", 4);
    expectedElements.clear();
    expectedElements.add(edge1B);
    expectedElements.add(edge5C);
    assertEquals(expectedElements, results);
    sparkContext.stop();
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) GetJavaRDDOfElements(uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfElements) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) HashSet(java.util.HashSet) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) Graph(uk.gov.gchq.gaffer.graph.Graph) EdgeSeed(uk.gov.gchq.gaffer.operation.data.EdgeSeed) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) Test(org.junit.Test)

Example 24 with ByteArrayOutputStream

use of org.apache.commons.io.output.ByteArrayOutputStream in project Gaffer by gchq.

the class GetRDDOfElementsHandlerTest method checkGetCorrectElementsInRDDForEntitySeed.

@Test
public void checkGetCorrectElementsInRDDForEntitySeed() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final List<Element> elements = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity(ENTITY_GROUP);
        entity.setVertex("" + i);
        final Edge edge1 = new Edge(EDGE_GROUP);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty("count", 2);
        final Edge edge2 = new Edge(EDGE_GROUP);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty("count", 4);
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
    }
    final User user = new User();
    graph1.execute(new AddElements(elements), user);
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final SparkContext sparkContext = new SparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    // Check get correct edges for "1"
    GetRDDOfElements<EntitySeed> rddQuery = new GetRDDOfElements.Builder<EntitySeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EntitySeed("1"))).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    RDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    Set<Element> results = new HashSet<>();
    // NB: IDE suggests the cast in the following line is unnecessary but compilation fails without it
    Element[] returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    final Set<Element> expectedElements = new HashSet<>();
    final Entity entity1 = new Entity(ENTITY_GROUP);
    entity1.setVertex("1");
    final Edge edge1B = new Edge(EDGE_GROUP);
    edge1B.setSource("1");
    edge1B.setDestination("B");
    edge1B.setDirected(false);
    edge1B.putProperty("count", 2);
    final Edge edge1C = new Edge(EDGE_GROUP);
    edge1C.setSource("1");
    edge1C.setDestination("C");
    edge1C.setDirected(false);
    edge1C.putProperty("count", 4);
    expectedElements.add(entity1);
    expectedElements.add(edge1B);
    expectedElements.add(edge1C);
    assertEquals(expectedElements, results);
    // Check get correct edges for "1" when specify entities only
    rddQuery = new GetRDDOfElements.Builder<EntitySeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EntitySeed("1"))).view(new View.Builder().entity(ENTITY_GROUP).build()).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    expectedElements.clear();
    expectedElements.add(entity1);
    assertEquals(expectedElements, results);
    // Check get correct edges for "1" when specify edges only
    rddQuery = new GetRDDOfElements.Builder<EntitySeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EntitySeed("1"))).view(new View.Builder().edge(EDGE_GROUP).build()).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    expectedElements.clear();
    expectedElements.add(edge1B);
    expectedElements.add(edge1C);
    assertEquals(expectedElements, results);
    // Check get correct edges for "1" and "5"
    Set<EntitySeed> seeds = new HashSet<>();
    seeds.add(new EntitySeed("1"));
    seeds.add(new EntitySeed("5"));
    rddQuery = new GetRDDOfElements.Builder<EntitySeed>().sparkContext(sparkContext).seeds(seeds).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    final Entity entity5 = new Entity(ENTITY_GROUP);
    entity5.setVertex("5");
    final Edge edge5B = new Edge(EDGE_GROUP);
    edge5B.setSource("5");
    edge5B.setDestination("B");
    edge5B.setDirected(false);
    edge5B.putProperty("count", 2);
    final Edge edge5C = new Edge(EDGE_GROUP);
    edge5C.setSource("5");
    edge5C.setDestination("C");
    edge5C.setDirected(false);
    edge5C.putProperty("count", 4);
    expectedElements.clear();
    expectedElements.add(entity1);
    expectedElements.add(edge1B);
    expectedElements.add(edge1C);
    expectedElements.add(entity5);
    expectedElements.add(edge5B);
    expectedElements.add(edge5C);
    assertEquals(expectedElements, results);
    sparkContext.stop();
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) Graph(uk.gov.gchq.gaffer.graph.Graph) SparkContext(org.apache.spark.SparkContext) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) Test(org.junit.Test)

Example 25 with ByteArrayOutputStream

use of org.apache.commons.io.output.ByteArrayOutputStream in project Gaffer by gchq.

the class GetRDDOfElementsHandlerTest method checkGetCorrectElementsInRDDForEdgeSeed.

@Test
public void checkGetCorrectElementsInRDDForEdgeSeed() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final List<Element> elements = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity(ENTITY_GROUP);
        entity.setVertex("" + i);
        final Edge edge1 = new Edge(EDGE_GROUP);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty("count", 2);
        final Edge edge2 = new Edge(EDGE_GROUP);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty("count", 4);
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
    }
    final User user = new User();
    graph1.execute(new AddElements(elements), user);
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInRDDForEdgeSeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final SparkContext sparkContext = new SparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    // Check get correct edges for EdgeSeed 1 -> B
    GetRDDOfElements<EdgeSeed> rddQuery = new GetRDDOfElements.Builder<EdgeSeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).includeEdges(GetOperation.IncludeEdgeType.ALL).includeEntities(false).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    RDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    Set<Element> results = new HashSet<>();
    // NB: IDE suggests the cast in the following line is unnecessary but compilation fails without it
    Element[] returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    final Set<Element> expectedElements = new HashSet<>();
    final Edge edge1B = new Edge(EDGE_GROUP);
    edge1B.setSource("1");
    edge1B.setDestination("B");
    edge1B.setDirected(false);
    edge1B.putProperty("count", 2);
    expectedElements.add(edge1B);
    assertEquals(expectedElements, results);
    // Check get entity for 1 when query for 1 -> B and specify entities only
    rddQuery = new GetRDDOfElements.Builder<EdgeSeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).includeEntities(true).includeEdges(GetOperation.IncludeEdgeType.NONE).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    expectedElements.clear();
    final Entity entity1 = new Entity(ENTITY_GROUP);
    entity1.setVertex("1");
    expectedElements.add(entity1);
    assertEquals(expectedElements, results);
    // Check get correct edges for 1 -> B when specify edges only
    rddQuery = new GetRDDOfElements.Builder<EdgeSeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).view(new View.Builder().edge(EDGE_GROUP).build()).includeEntities(false).includeEdges(GetOperation.IncludeEdgeType.ALL).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    expectedElements.clear();
    expectedElements.add(edge1B);
    assertEquals(expectedElements, results);
    // Check get correct edges for 1 -> B and 5 -> C
    Set<EdgeSeed> seeds = new HashSet<>();
    seeds.add(new EdgeSeed("1", "B", false));
    seeds.add(new EdgeSeed("5", "C", false));
    rddQuery = new GetRDDOfElements.Builder<EdgeSeed>().sparkContext(sparkContext).includeEntities(false).seeds(seeds).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    returnedElements = (Element[]) rdd.collect();
    for (int i = 0; i < returnedElements.length; i++) {
        results.add(returnedElements[i]);
    }
    final Edge edge5C = new Edge(EDGE_GROUP);
    edge5C.setSource("5");
    edge5C.setDestination("C");
    edge5C.setDirected(false);
    edge5C.putProperty("count", 4);
    expectedElements.clear();
    expectedElements.add(edge1B);
    expectedElements.add(edge5C);
    assertEquals(expectedElements, results);
    sparkContext.stop();
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) Graph(uk.gov.gchq.gaffer.graph.Graph) SparkContext(org.apache.spark.SparkContext) EdgeSeed(uk.gov.gchq.gaffer.operation.data.EdgeSeed) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) Test(org.junit.Test)

Aggregations

ByteArrayOutputStream (org.apache.commons.io.output.ByteArrayOutputStream)80 Test (org.junit.Test)36 DataOutputStream (java.io.DataOutputStream)15 IOException (java.io.IOException)14 HashSet (java.util.HashSet)13 ArrayList (java.util.ArrayList)12 Configuration (org.apache.hadoop.conf.Configuration)12 ByteArrayInputStream (java.io.ByteArrayInputStream)11 SparkConf (org.apache.spark.SparkConf)10 Edge (uk.gov.gchq.gaffer.data.element.Edge)10 Element (uk.gov.gchq.gaffer.data.element.Element)10 Entity (uk.gov.gchq.gaffer.data.element.Entity)10 Graph (uk.gov.gchq.gaffer.graph.Graph)10 User (uk.gov.gchq.gaffer.user.User)10 PrintStream (java.io.PrintStream)9 HashMap (java.util.HashMap)9 File (java.io.File)8 InputStream (java.io.InputStream)6 OutputStream (java.io.OutputStream)6 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)6