Search in sources :

Example 1 with GetJavaRDDOfAllElements

use of uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements in project Gaffer by gchq.

the class GetJavaRDDOfAllElementsHandlerTest method checkGetAllElementsInJavaRDDWithVisibility.

@Test
public void checkGetAllElementsInJavaRDDWithVisibility() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchemaWithVisibility.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final List<Element> elements = new ArrayList<>();
    for (int i = 0; i < 1; i++) {
        final Entity entity = new Entity(TestGroups.ENTITY);
        entity.setVertex("" + i);
        entity.putProperty("visibility", "public");
        final Edge edge1 = new Edge(TestGroups.EDGE);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty(TestPropertyNames.COUNT, 2);
        edge1.putProperty("visibility", "private");
        final Edge edge2 = new Edge(TestGroups.EDGE);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty(TestPropertyNames.COUNT, 4);
        edge2.putProperty("visibility", "public");
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
    }
    final User user = new User("user", Collections.singleton("public"));
    graph1.execute(new AddElements(elements), user);
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    // Create user with just public auth, and user with both private and public
    final Set<String> publicNotPrivate = new HashSet<>();
    publicNotPrivate.add("public");
    final User userWithPublicNotPrivate = new User("user1", publicNotPrivate);
    final Set<String> privateAuth = new HashSet<>();
    privateAuth.add("public");
    privateAuth.add("private");
    final User userWithPrivate = new User("user2", privateAuth);
    // Calculate correct results for 2 users
    final Set<Element> expectedElementsPublicNotPrivate = new HashSet<>();
    final Set<Element> expectedElementsPrivate = new HashSet<>();
    for (final Element element : elements) {
        expectedElementsPrivate.add(element);
        if (element.getProperty("visibility").equals("public")) {
            expectedElementsPublicNotPrivate.add(element);
        }
    }
    // Check get correct edges for user with just public
    GetJavaRDDOfAllElements rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    JavaRDD<Element> rdd = graph1.execute(rddQuery, userWithPublicNotPrivate);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>(rdd.collect());
    assertEquals(expectedElementsPublicNotPrivate, results);
    // Check get correct edges for user with both private and public
    rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    rdd = graph1.execute(rddQuery, userWithPrivate);
    if (rdd == null) {
        fail("No RDD returned");
    }
    results.clear();
    results.addAll(rdd.collect());
    assertEquals(expectedElementsPrivate, results);
    sparkContext.stop();
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) GetJavaRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) Graph(uk.gov.gchq.gaffer.graph.Graph) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 2 with GetJavaRDDOfAllElements

use of uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements in project Gaffer by gchq.

the class GetJavaRDDOfAllElementsHandlerTest method checkGetAllElementsInJavaRDD.

@Test
public void checkGetAllElementsInJavaRDD() throws OperationException, IOException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final List<Element> elements = new ArrayList<>();
    final Set<Element> expectedElements = new HashSet<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity(TestGroups.ENTITY);
        entity.setVertex("" + i);
        final Edge edge1 = new Edge(TestGroups.EDGE);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty(TestPropertyNames.COUNT, 2);
        final Edge edge2 = new Edge(TestGroups.EDGE);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty(TestPropertyNames.COUNT, 4);
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
        expectedElements.add(edge1);
        expectedElements.add(edge2);
        expectedElements.add(entity);
    }
    final User user = new User();
    graph1.execute(new AddElements(elements), user);
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    // Check get correct edges for "1"
    final GetJavaRDDOfAllElements rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).build();
    rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
    final JavaRDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>(rdd.collect());
    assertEquals(expectedElements, results);
    sparkContext.stop();
}
Also used : AddElements(uk.gov.gchq.gaffer.operation.impl.add.AddElements) Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) GetJavaRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) Graph(uk.gov.gchq.gaffer.graph.Graph) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 3 with GetJavaRDDOfAllElements

use of uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements in project Gaffer by gchq.

the class GetJavaRDDOfAllElementsExample method getJavaRddOfAllElements.

public void getJavaRddOfAllElements(final JavaSparkContext sc, final Graph graph) throws OperationException {
    ROOT_LOGGER.setLevel(Level.INFO);
    // Avoid using getMethodNameAsSentence as it messes up the formatting of the "RDD" part
    log("#### get Java RDD of elements\n");
    printGraph();
    ROOT_LOGGER.setLevel(Level.OFF);
    final GetJavaRDDOfAllElements operation = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sc).build();
    final JavaRDD<Element> rdd = graph.execute(operation, new User("user01"));
    final List<Element> elements = rdd.collect();
    ROOT_LOGGER.setLevel(Level.INFO);
    printJava("GetJavaRDDOfAllElements<ElementSeed> operation = new GetJavaRDDOfAllElements.Builder<>()\n" + "                .javaSparkContext(sc)\n" + "                .build();\n" + "JavaRDD<Element> rdd = graph.execute(operation, new User(\"user01\"));\n" + "List<Element> elements = rdd.collect();");
    log("The results are:");
    log("```");
    for (final Element e : elements) {
        log(e.toString());
    }
    log("```");
    ROOT_LOGGER.setLevel(Level.OFF);
}
Also used : User(uk.gov.gchq.gaffer.user.User) Element(uk.gov.gchq.gaffer.data.element.Element) GetJavaRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements)

Example 4 with GetJavaRDDOfAllElements

use of uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements in project Gaffer by gchq.

the class ImportJavaRDDOfElementsHandlerTest method checkImportJavaRDDOfElements.

@Test
public void checkImportJavaRDDOfElements() throws OperationException, IOException, InterruptedException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final List<Element> elements = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity(TestGroups.ENTITY);
        entity.setVertex("" + i);
        final Edge edge1 = new Edge(TestGroups.EDGE);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty(TestPropertyNames.COUNT, 2);
        final Edge edge2 = new Edge(TestGroups.EDGE);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty(TestPropertyNames.COUNT, 4);
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
    }
    final User user = new User();
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    final String outputPath = this.getClass().getResource("/").getPath().toString() + "load";
    final String failurePath = this.getClass().getResource("/").getPath().toString() + "failure";
    final File file = new File(outputPath);
    if (file.exists()) {
        FileUtils.forceDelete(file);
    }
    final JavaRDD<Element> elementJavaRDD = sparkContext.parallelize(elements);
    final ImportJavaRDDOfElements addRdd = new ImportJavaRDDOfElements.Builder().javaSparkContext(sparkContext).input(elementJavaRDD).option("outputPath", outputPath).option("failurePath", failurePath).build();
    graph1.execute(addRdd, user);
    FileUtils.forceDelete(file);
    // Check all elements were added
    final GetJavaRDDOfAllElements rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString).build();
    final JavaRDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>(rdd.collect());
    assertEquals(elements.size(), results.size());
    sparkContext.stop();
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) GetJavaRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) Graph(uk.gov.gchq.gaffer.graph.Graph) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) ImportJavaRDDOfElements(uk.gov.gchq.gaffer.spark.operation.javardd.ImportJavaRDDOfElements) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) File(java.io.File) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 5 with GetJavaRDDOfAllElements

use of uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements in project Gaffer by gchq.

the class ImportKeyValueJavaPairRDDToAccumuloHandlerTest method checkImportKeyValueJavaPairRDD.

@Test
public void checkImportKeyValueJavaPairRDD() throws OperationException, IOException, InterruptedException {
    final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeSchema.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
    final List<Element> elements = new ArrayList<>();
    for (int i = 0; i < 10; i++) {
        final Entity entity = new Entity(TestGroups.ENTITY);
        entity.setVertex("" + i);
        final Edge edge1 = new Edge(TestGroups.EDGE);
        edge1.setSource("" + i);
        edge1.setDestination("B");
        edge1.setDirected(false);
        edge1.putProperty(TestPropertyNames.COUNT, 2);
        final Edge edge2 = new Edge(TestGroups.EDGE);
        edge2.setSource("" + i);
        edge2.setDestination("C");
        edge2.setDirected(false);
        edge2.putProperty(TestPropertyNames.COUNT, 4);
        elements.add(edge1);
        elements.add(edge2);
        elements.add(entity);
    }
    final User user = new User();
    final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
    final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
    // Create Hadoop configuration and serialise to a string
    final Configuration configuration = new Configuration();
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    configuration.write(new DataOutputStream(baos));
    final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
    final String outputPath = this.getClass().getResource("/").getPath().toString() + "load";
    final String failurePath = this.getClass().getResource("/").getPath().toString() + "failure";
    final File file = new File(outputPath);
    if (file.exists()) {
        FileUtils.forceDelete(file);
    }
    final ElementConverterFunction func = new ElementConverterFunction(sparkContext.broadcast(new ByteEntityAccumuloElementConverter(graph1.getSchema())));
    final JavaPairRDD<Key, Value> elementJavaRDD = sparkContext.parallelize(elements).flatMapToPair(func);
    final ImportKeyValueJavaPairRDDToAccumulo addRdd = new ImportKeyValueJavaPairRDDToAccumulo.Builder().input(elementJavaRDD).outputPath(outputPath).failurePath(failurePath).build();
    graph1.execute(addRdd, user);
    FileUtils.forceDelete(file);
    // Check all elements were added
    final GetJavaRDDOfAllElements rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString).build();
    final JavaRDD<Element> rdd = graph1.execute(rddQuery, user);
    if (rdd == null) {
        fail("No RDD returned");
    }
    final Set<Element> results = new HashSet<>(rdd.collect());
    assertEquals(elements.size(), results.size());
    sparkContext.stop();
}
Also used : Entity(uk.gov.gchq.gaffer.data.element.Entity) User(uk.gov.gchq.gaffer.user.User) Configuration(org.apache.hadoop.conf.Configuration) DataOutputStream(java.io.DataOutputStream) Element(uk.gov.gchq.gaffer.data.element.Element) ArrayList(java.util.ArrayList) GetJavaRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) HashSet(java.util.HashSet) ByteArrayOutputStream(org.apache.commons.io.output.ByteArrayOutputStream) ImportKeyValueJavaPairRDDToAccumulo(uk.gov.gchq.gaffer.sparkaccumulo.operation.javardd.ImportKeyValueJavaPairRDDToAccumulo) Graph(uk.gov.gchq.gaffer.graph.Graph) Value(org.apache.accumulo.core.data.Value) ByteEntityAccumuloElementConverter(uk.gov.gchq.gaffer.accumulostore.key.core.impl.byteEntity.ByteEntityAccumuloElementConverter) ElementConverterFunction(uk.gov.gchq.gaffer.sparkaccumulo.operation.utils.java.ElementConverterFunction) Edge(uk.gov.gchq.gaffer.data.element.Edge) SparkConf(org.apache.spark.SparkConf) File(java.io.File) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Aggregations

Element (uk.gov.gchq.gaffer.data.element.Element)6 GetJavaRDDOfAllElements (uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements)6 User (uk.gov.gchq.gaffer.user.User)6 DataOutputStream (java.io.DataOutputStream)4 ArrayList (java.util.ArrayList)4 HashSet (java.util.HashSet)4 ByteArrayOutputStream (org.apache.commons.io.output.ByteArrayOutputStream)4 Configuration (org.apache.hadoop.conf.Configuration)4 SparkConf (org.apache.spark.SparkConf)4 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)4 Test (org.junit.Test)4 Edge (uk.gov.gchq.gaffer.data.element.Edge)4 Entity (uk.gov.gchq.gaffer.data.element.Entity)4 Graph (uk.gov.gchq.gaffer.graph.Graph)4 File (java.io.File)2 AddElements (uk.gov.gchq.gaffer.operation.impl.add.AddElements)2 Key (org.apache.accumulo.core.data.Key)1 Value (org.apache.accumulo.core.data.Value)1 ByteEntityAccumuloElementConverter (uk.gov.gchq.gaffer.accumulostore.key.core.impl.byteEntity.ByteEntityAccumuloElementConverter)1 ImportJavaRDDOfElements (uk.gov.gchq.gaffer.spark.operation.javardd.ImportJavaRDDOfElements)1