use of uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements in project Gaffer by gchq.
the class GetJavaRDDOfAllElementsHandlerTest method checkGetAllElementsInJavaRDDWithVisibility.
@Test
public void checkGetAllElementsInJavaRDDWithVisibility() throws OperationException, IOException {
final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchemaWithVisibility.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
final List<Element> elements = new ArrayList<>();
for (int i = 0; i < 1; i++) {
final Entity entity = new Entity(TestGroups.ENTITY);
entity.setVertex("" + i);
entity.putProperty("visibility", "public");
final Edge edge1 = new Edge(TestGroups.EDGE);
edge1.setSource("" + i);
edge1.setDestination("B");
edge1.setDirected(false);
edge1.putProperty(TestPropertyNames.COUNT, 2);
edge1.putProperty("visibility", "private");
final Edge edge2 = new Edge(TestGroups.EDGE);
edge2.setSource("" + i);
edge2.setDestination("C");
edge2.setDirected(false);
edge2.putProperty(TestPropertyNames.COUNT, 4);
edge2.putProperty("visibility", "public");
elements.add(edge1);
elements.add(edge2);
elements.add(entity);
}
final User user = new User("user", Collections.singleton("public"));
graph1.execute(new AddElements(elements), user);
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
// Create Hadoop configuration and serialise to a string
final Configuration configuration = new Configuration();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
configuration.write(new DataOutputStream(baos));
final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
// Create user with just public auth, and user with both private and public
final Set<String> publicNotPrivate = new HashSet<>();
publicNotPrivate.add("public");
final User userWithPublicNotPrivate = new User("user1", publicNotPrivate);
final Set<String> privateAuth = new HashSet<>();
privateAuth.add("public");
privateAuth.add("private");
final User userWithPrivate = new User("user2", privateAuth);
// Calculate correct results for 2 users
final Set<Element> expectedElementsPublicNotPrivate = new HashSet<>();
final Set<Element> expectedElementsPrivate = new HashSet<>();
for (final Element element : elements) {
expectedElementsPrivate.add(element);
if (element.getProperty("visibility").equals("public")) {
expectedElementsPublicNotPrivate.add(element);
}
}
// Check get correct edges for user with just public
GetJavaRDDOfAllElements rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
JavaRDD<Element> rdd = graph1.execute(rddQuery, userWithPublicNotPrivate);
if (rdd == null) {
fail("No RDD returned");
}
final Set<Element> results = new HashSet<>(rdd.collect());
assertEquals(expectedElementsPublicNotPrivate, results);
// Check get correct edges for user with both private and public
rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
rdd = graph1.execute(rddQuery, userWithPrivate);
if (rdd == null) {
fail("No RDD returned");
}
results.clear();
results.addAll(rdd.collect());
assertEquals(expectedElementsPrivate, results);
sparkContext.stop();
}
use of uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements in project Gaffer by gchq.
the class GetJavaRDDOfAllElementsHandlerTest method checkGetAllElementsInJavaRDD.
@Test
public void checkGetAllElementsInJavaRDD() throws OperationException, IOException {
final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
final List<Element> elements = new ArrayList<>();
final Set<Element> expectedElements = new HashSet<>();
for (int i = 0; i < 10; i++) {
final Entity entity = new Entity(TestGroups.ENTITY);
entity.setVertex("" + i);
final Edge edge1 = new Edge(TestGroups.EDGE);
edge1.setSource("" + i);
edge1.setDestination("B");
edge1.setDirected(false);
edge1.putProperty(TestPropertyNames.COUNT, 2);
final Edge edge2 = new Edge(TestGroups.EDGE);
edge2.setSource("" + i);
edge2.setDestination("C");
edge2.setDirected(false);
edge2.putProperty(TestPropertyNames.COUNT, 4);
elements.add(edge1);
elements.add(edge2);
elements.add(entity);
expectedElements.add(edge1);
expectedElements.add(edge2);
expectedElements.add(entity);
}
final User user = new User();
graph1.execute(new AddElements(elements), user);
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
// Create Hadoop configuration and serialise to a string
final Configuration configuration = new Configuration();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
configuration.write(new DataOutputStream(baos));
final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
// Check get correct edges for "1"
final GetJavaRDDOfAllElements rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
final JavaRDD<Element> rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
final Set<Element> results = new HashSet<>(rdd.collect());
assertEquals(expectedElements, results);
sparkContext.stop();
}
use of uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements in project Gaffer by gchq.
the class GetJavaRDDOfAllElementsExample method getJavaRddOfAllElements.
public void getJavaRddOfAllElements(final JavaSparkContext sc, final Graph graph) throws OperationException {
ROOT_LOGGER.setLevel(Level.INFO);
// Avoid using getMethodNameAsSentence as it messes up the formatting of the "RDD" part
log("#### get Java RDD of elements\n");
printGraph();
ROOT_LOGGER.setLevel(Level.OFF);
final GetJavaRDDOfAllElements operation = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sc).build();
final JavaRDD<Element> rdd = graph.execute(operation, new User("user01"));
final List<Element> elements = rdd.collect();
ROOT_LOGGER.setLevel(Level.INFO);
printJava("GetJavaRDDOfAllElements<ElementSeed> operation = new GetJavaRDDOfAllElements.Builder<>()\n" + " .javaSparkContext(sc)\n" + " .build();\n" + "JavaRDD<Element> rdd = graph.execute(operation, new User(\"user01\"));\n" + "List<Element> elements = rdd.collect();");
log("The results are:");
log("```");
for (final Element e : elements) {
log(e.toString());
}
log("```");
ROOT_LOGGER.setLevel(Level.OFF);
}
use of uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements in project Gaffer by gchq.
the class ImportJavaRDDOfElementsHandlerTest method checkImportJavaRDDOfElements.
@Test
public void checkImportJavaRDDOfElements() throws OperationException, IOException, InterruptedException {
final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
final List<Element> elements = new ArrayList<>();
for (int i = 0; i < 10; i++) {
final Entity entity = new Entity(TestGroups.ENTITY);
entity.setVertex("" + i);
final Edge edge1 = new Edge(TestGroups.EDGE);
edge1.setSource("" + i);
edge1.setDestination("B");
edge1.setDirected(false);
edge1.putProperty(TestPropertyNames.COUNT, 2);
final Edge edge2 = new Edge(TestGroups.EDGE);
edge2.setSource("" + i);
edge2.setDestination("C");
edge2.setDirected(false);
edge2.putProperty(TestPropertyNames.COUNT, 4);
elements.add(edge1);
elements.add(edge2);
elements.add(entity);
}
final User user = new User();
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
// Create Hadoop configuration and serialise to a string
final Configuration configuration = new Configuration();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
configuration.write(new DataOutputStream(baos));
final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
final String outputPath = this.getClass().getResource("/").getPath().toString() + "load";
final String failurePath = this.getClass().getResource("/").getPath().toString() + "failure";
final File file = new File(outputPath);
if (file.exists()) {
FileUtils.forceDelete(file);
}
final JavaRDD<Element> elementJavaRDD = sparkContext.parallelize(elements);
final ImportJavaRDDOfElements addRdd = new ImportJavaRDDOfElements.Builder().javaSparkContext(sparkContext).input(elementJavaRDD).option("outputPath", outputPath).option("failurePath", failurePath).build();
graph1.execute(addRdd, user);
FileUtils.forceDelete(file);
// Check all elements were added
final GetJavaRDDOfAllElements rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString).build();
final JavaRDD<Element> rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
final Set<Element> results = new HashSet<>(rdd.collect());
assertEquals(elements.size(), results.size());
sparkContext.stop();
}
use of uk.gov.gchq.gaffer.spark.operation.javardd.GetJavaRDDOfAllElements in project Gaffer by gchq.
the class ImportKeyValueJavaPairRDDToAccumuloHandlerTest method checkImportKeyValueJavaPairRDD.
@Test
public void checkImportKeyValueJavaPairRDD() throws OperationException, IOException, InterruptedException {
final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeSchema.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
final List<Element> elements = new ArrayList<>();
for (int i = 0; i < 10; i++) {
final Entity entity = new Entity(TestGroups.ENTITY);
entity.setVertex("" + i);
final Edge edge1 = new Edge(TestGroups.EDGE);
edge1.setSource("" + i);
edge1.setDestination("B");
edge1.setDirected(false);
edge1.putProperty(TestPropertyNames.COUNT, 2);
final Edge edge2 = new Edge(TestGroups.EDGE);
edge2.setSource("" + i);
edge2.setDestination("C");
edge2.setDirected(false);
edge2.putProperty(TestPropertyNames.COUNT, 4);
elements.add(edge1);
elements.add(edge2);
elements.add(entity);
}
final User user = new User();
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
// Create Hadoop configuration and serialise to a string
final Configuration configuration = new Configuration();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
configuration.write(new DataOutputStream(baos));
final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
final String outputPath = this.getClass().getResource("/").getPath().toString() + "load";
final String failurePath = this.getClass().getResource("/").getPath().toString() + "failure";
final File file = new File(outputPath);
if (file.exists()) {
FileUtils.forceDelete(file);
}
final ElementConverterFunction func = new ElementConverterFunction(sparkContext.broadcast(new ByteEntityAccumuloElementConverter(graph1.getSchema())));
final JavaPairRDD<Key, Value> elementJavaRDD = sparkContext.parallelize(elements).flatMapToPair(func);
final ImportKeyValueJavaPairRDDToAccumulo addRdd = new ImportKeyValueJavaPairRDDToAccumulo.Builder().input(elementJavaRDD).outputPath(outputPath).failurePath(failurePath).build();
graph1.execute(addRdd, user);
FileUtils.forceDelete(file);
// Check all elements were added
final GetJavaRDDOfAllElements rddQuery = new GetJavaRDDOfAllElements.Builder().javaSparkContext(sparkContext).option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString).build();
final JavaRDD<Element> rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
final Set<Element> results = new HashSet<>(rdd.collect());
assertEquals(elements.size(), results.size());
sparkContext.stop();
}
Aggregations