use of uk.gov.gchq.gaffer.data.elementdefinition.view.View in project Gaffer by gchq.
the class GetJavaRDDOfElementsHandlerTest method checkGetCorrectElementsInRDDForEdgeSeed.
@Test
public void checkGetCorrectElementsInRDDForEdgeSeed() throws OperationException, IOException {
final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
final List<Element> elements = new ArrayList<>();
for (int i = 0; i < 10; i++) {
final Entity entity = new Entity(ENTITY_GROUP);
entity.setVertex("" + i);
final Edge edge1 = new Edge(EDGE_GROUP);
edge1.setSource("" + i);
edge1.setDestination("B");
edge1.setDirected(false);
edge1.putProperty("count", 2);
final Edge edge2 = new Edge(EDGE_GROUP);
edge2.setSource("" + i);
edge2.setDestination("C");
edge2.setDirected(false);
edge2.putProperty("count", 4);
elements.add(edge1);
elements.add(edge2);
elements.add(entity);
}
final User user = new User();
graph1.execute(new AddElements(elements), user);
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInJavaRDDForEdgeSeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
// Create Hadoop configuration and serialise to a string
final Configuration configuration = new Configuration();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
configuration.write(new DataOutputStream(baos));
final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
// Check get correct edges for EdgeSeed 1 -> B
GetJavaRDDOfElements<EdgeSeed> rddQuery = new GetJavaRDDOfElements.Builder<EdgeSeed>().javaSparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).setIncludeEdges(GetOperation.IncludeEdgeType.ALL).setIncludeEntities(false).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
JavaRDD<Element> rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
final Set<Element> results = new HashSet<>();
results.addAll(rdd.collect());
final Set<Element> expectedElements = new HashSet<>();
final Edge edge1B = new Edge(EDGE_GROUP);
edge1B.setSource("1");
edge1B.setDestination("B");
edge1B.setDirected(false);
edge1B.putProperty("count", 2);
expectedElements.add(edge1B);
assertEquals(expectedElements, results);
// Check get entity for 1 when query for 1 -> B and specify entities only
rddQuery = new GetJavaRDDOfElements.Builder<EdgeSeed>().javaSparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).setIncludeEntities(true).setIncludeEdges(GetOperation.IncludeEdgeType.NONE).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
results.clear();
results.addAll(rdd.collect());
expectedElements.clear();
final Entity entity1 = new Entity(ENTITY_GROUP);
entity1.setVertex("1");
expectedElements.add(entity1);
assertEquals(expectedElements, results);
// Check get correct edges for 1 -> B when specify edges only
rddQuery = new GetJavaRDDOfElements.Builder<EdgeSeed>().javaSparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).view(new View.Builder().edge(EDGE_GROUP).build()).setIncludeEntities(false).setIncludeEdges(GetOperation.IncludeEdgeType.ALL).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
results.clear();
results.addAll(rdd.collect());
expectedElements.clear();
expectedElements.add(edge1B);
assertEquals(expectedElements, results);
// Check get correct edges for 1 -> B and 5 -> C
Set<EdgeSeed> seeds = new HashSet<>();
seeds.add(new EdgeSeed("1", "B", false));
seeds.add(new EdgeSeed("5", "C", false));
rddQuery = new GetJavaRDDOfElements.Builder<EdgeSeed>().javaSparkContext(sparkContext).setIncludeEntities(false).seeds(seeds).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
results.clear();
results.addAll(rdd.collect());
final Edge edge5C = new Edge(EDGE_GROUP);
edge5C.setSource("5");
edge5C.setDestination("C");
edge5C.setDirected(false);
edge5C.putProperty("count", 4);
expectedElements.clear();
expectedElements.add(edge1B);
expectedElements.add(edge5C);
assertEquals(expectedElements, results);
sparkContext.stop();
}
use of uk.gov.gchq.gaffer.data.elementdefinition.view.View in project Gaffer by gchq.
the class GetRDDOfElementsHandlerTest method checkGetCorrectElementsInRDDForEntitySeed.
@Test
public void checkGetCorrectElementsInRDDForEntitySeed() throws OperationException, IOException {
final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
final List<Element> elements = new ArrayList<>();
for (int i = 0; i < 10; i++) {
final Entity entity = new Entity(ENTITY_GROUP);
entity.setVertex("" + i);
final Edge edge1 = new Edge(EDGE_GROUP);
edge1.setSource("" + i);
edge1.setDestination("B");
edge1.setDirected(false);
edge1.putProperty("count", 2);
final Edge edge2 = new Edge(EDGE_GROUP);
edge2.setSource("" + i);
edge2.setDestination("C");
edge2.setDirected(false);
edge2.putProperty("count", 4);
elements.add(edge1);
elements.add(edge2);
elements.add(entity);
}
final User user = new User();
graph1.execute(new AddElements(elements), user);
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInRDDForEntitySeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
final SparkContext sparkContext = new SparkContext(sparkConf);
// Create Hadoop configuration and serialise to a string
final Configuration configuration = new Configuration();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
configuration.write(new DataOutputStream(baos));
final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
// Check get correct edges for "1"
GetRDDOfElements<EntitySeed> rddQuery = new GetRDDOfElements.Builder<EntitySeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EntitySeed("1"))).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
RDD<Element> rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
Set<Element> results = new HashSet<>();
// NB: IDE suggests the cast in the following line is unnecessary but compilation fails without it
Element[] returnedElements = (Element[]) rdd.collect();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
final Set<Element> expectedElements = new HashSet<>();
final Entity entity1 = new Entity(ENTITY_GROUP);
entity1.setVertex("1");
final Edge edge1B = new Edge(EDGE_GROUP);
edge1B.setSource("1");
edge1B.setDestination("B");
edge1B.setDirected(false);
edge1B.putProperty("count", 2);
final Edge edge1C = new Edge(EDGE_GROUP);
edge1C.setSource("1");
edge1C.setDestination("C");
edge1C.setDirected(false);
edge1C.putProperty("count", 4);
expectedElements.add(entity1);
expectedElements.add(edge1B);
expectedElements.add(edge1C);
assertEquals(expectedElements, results);
// Check get correct edges for "1" when specify entities only
rddQuery = new GetRDDOfElements.Builder<EntitySeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EntitySeed("1"))).view(new View.Builder().entity(ENTITY_GROUP).build()).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
results.clear();
returnedElements = (Element[]) rdd.collect();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
expectedElements.clear();
expectedElements.add(entity1);
assertEquals(expectedElements, results);
// Check get correct edges for "1" when specify edges only
rddQuery = new GetRDDOfElements.Builder<EntitySeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EntitySeed("1"))).view(new View.Builder().edge(EDGE_GROUP).build()).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
results.clear();
returnedElements = (Element[]) rdd.collect();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
expectedElements.clear();
expectedElements.add(edge1B);
expectedElements.add(edge1C);
assertEquals(expectedElements, results);
// Check get correct edges for "1" and "5"
Set<EntitySeed> seeds = new HashSet<>();
seeds.add(new EntitySeed("1"));
seeds.add(new EntitySeed("5"));
rddQuery = new GetRDDOfElements.Builder<EntitySeed>().sparkContext(sparkContext).seeds(seeds).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
results.clear();
returnedElements = (Element[]) rdd.collect();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
final Entity entity5 = new Entity(ENTITY_GROUP);
entity5.setVertex("5");
final Edge edge5B = new Edge(EDGE_GROUP);
edge5B.setSource("5");
edge5B.setDestination("B");
edge5B.setDirected(false);
edge5B.putProperty("count", 2);
final Edge edge5C = new Edge(EDGE_GROUP);
edge5C.setSource("5");
edge5C.setDestination("C");
edge5C.setDirected(false);
edge5C.putProperty("count", 4);
expectedElements.clear();
expectedElements.add(entity1);
expectedElements.add(edge1B);
expectedElements.add(edge1C);
expectedElements.add(entity5);
expectedElements.add(edge5B);
expectedElements.add(edge5C);
assertEquals(expectedElements, results);
sparkContext.stop();
}
use of uk.gov.gchq.gaffer.data.elementdefinition.view.View in project Gaffer by gchq.
the class GetRDDOfElementsHandlerTest method checkGetCorrectElementsInRDDForEdgeSeed.
@Test
public void checkGetCorrectElementsInRDDForEdgeSeed() throws OperationException, IOException {
final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
final List<Element> elements = new ArrayList<>();
for (int i = 0; i < 10; i++) {
final Entity entity = new Entity(ENTITY_GROUP);
entity.setVertex("" + i);
final Edge edge1 = new Edge(EDGE_GROUP);
edge1.setSource("" + i);
edge1.setDestination("B");
edge1.setDirected(false);
edge1.putProperty("count", 2);
final Edge edge2 = new Edge(EDGE_GROUP);
edge2.setSource("" + i);
edge2.setDestination("C");
edge2.setDirected(false);
edge2.putProperty("count", 4);
elements.add(edge1);
elements.add(edge2);
elements.add(entity);
}
final User user = new User();
graph1.execute(new AddElements(elements), user);
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("testCheckGetCorrectElementsInRDDForEdgeSeed").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
final SparkContext sparkContext = new SparkContext(sparkConf);
// Create Hadoop configuration and serialise to a string
final Configuration configuration = new Configuration();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
configuration.write(new DataOutputStream(baos));
final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
// Check get correct edges for EdgeSeed 1 -> B
GetRDDOfElements<EdgeSeed> rddQuery = new GetRDDOfElements.Builder<EdgeSeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).includeEdges(GetOperation.IncludeEdgeType.ALL).includeEntities(false).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
RDD<Element> rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
Set<Element> results = new HashSet<>();
// NB: IDE suggests the cast in the following line is unnecessary but compilation fails without it
Element[] returnedElements = (Element[]) rdd.collect();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
final Set<Element> expectedElements = new HashSet<>();
final Edge edge1B = new Edge(EDGE_GROUP);
edge1B.setSource("1");
edge1B.setDestination("B");
edge1B.setDirected(false);
edge1B.putProperty("count", 2);
expectedElements.add(edge1B);
assertEquals(expectedElements, results);
// Check get entity for 1 when query for 1 -> B and specify entities only
rddQuery = new GetRDDOfElements.Builder<EdgeSeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).includeEntities(true).includeEdges(GetOperation.IncludeEdgeType.NONE).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
results.clear();
returnedElements = (Element[]) rdd.collect();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
expectedElements.clear();
final Entity entity1 = new Entity(ENTITY_GROUP);
entity1.setVertex("1");
expectedElements.add(entity1);
assertEquals(expectedElements, results);
// Check get correct edges for 1 -> B when specify edges only
rddQuery = new GetRDDOfElements.Builder<EdgeSeed>().sparkContext(sparkContext).seeds(Collections.singleton(new EdgeSeed("1", "B", false))).view(new View.Builder().edge(EDGE_GROUP).build()).includeEntities(false).includeEdges(GetOperation.IncludeEdgeType.ALL).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
results.clear();
returnedElements = (Element[]) rdd.collect();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
expectedElements.clear();
expectedElements.add(edge1B);
assertEquals(expectedElements, results);
// Check get correct edges for 1 -> B and 5 -> C
Set<EdgeSeed> seeds = new HashSet<>();
seeds.add(new EdgeSeed("1", "B", false));
seeds.add(new EdgeSeed("5", "C", false));
rddQuery = new GetRDDOfElements.Builder<EdgeSeed>().sparkContext(sparkContext).includeEntities(false).seeds(seeds).build();
rddQuery.addOption(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString);
rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
results.clear();
returnedElements = (Element[]) rdd.collect();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
final Edge edge5C = new Edge(EDGE_GROUP);
edge5C.setSource("5");
edge5C.setDestination("C");
edge5C.setDirected(false);
edge5C.putProperty("count", 4);
expectedElements.clear();
expectedElements.add(edge1B);
expectedElements.add(edge5C);
assertEquals(expectedElements, results);
sparkContext.stop();
}
use of uk.gov.gchq.gaffer.data.elementdefinition.view.View in project Gaffer by gchq.
the class GetDataFrameOfElementsExample method getDataFrameOfElementsWithEntityGroup.
public void getDataFrameOfElementsWithEntityGroup(final SQLContext sqlc, final Graph graph) throws OperationException {
ROOT_LOGGER.setLevel(Level.INFO);
log("#### " + getMethodNameAsSentence(0) + "\n");
printGraph();
ROOT_LOGGER.setLevel(Level.OFF);
final GetDataFrameOfElements operation = new GetDataFrameOfElements.Builder().view(new View.Builder().entity("entity").build()).sqlContext(sqlc).build();
final Dataset<Row> df = graph.execute(operation, new User("user01"));
// Show
String result = df.showString(100, 20);
ROOT_LOGGER.setLevel(Level.INFO);
printJava("GetDataFrameOfElements operation = new GetDataFrameOfElements.Builder()\n" + " .view(new View.Builder()\n" + " .entity(\"entity\")\n" + " .build()).\n" + " .sqlContext(sqlc)\n" + " .build();\n" + "Dataset<Row> df = getGraph().execute(operation, new User(\"user01\"));\n" + "df.show();");
log("The results are:");
log("```");
log(result.substring(0, result.length() - 2));
log("```");
ROOT_LOGGER.setLevel(Level.OFF);
// Restrict to entities involving certain vertices
final Dataset<Row> seeded = df.filter("vertex = 1 OR vertex = 2");
result = seeded.showString(100, 20);
ROOT_LOGGER.setLevel(Level.INFO);
printJava("df.filter(\"vertex = 1 OR vertex = 2\").show();");
log("The results are:");
log("```");
log(result.substring(0, result.length() - 2));
log("```");
ROOT_LOGGER.setLevel(Level.OFF);
// Filter by property
final Dataset<Row> filtered = df.filter("count > 1");
result = filtered.showString(100, 20);
ROOT_LOGGER.setLevel(Level.INFO);
printJava("df.filter(\"count > 1\").show();");
log("The results are:");
log("```");
log(result.substring(0, result.length() - 2));
log("```");
ROOT_LOGGER.setLevel(Level.OFF);
}
use of uk.gov.gchq.gaffer.data.elementdefinition.view.View in project Gaffer by gchq.
the class FilterToOperationConverterTest method testSpecifySourceOrDestinationAndPropertyFilter.
@Test
public void testSpecifySourceOrDestinationAndPropertyFilter() throws OperationException {
final Schema schema = getSchema();
final SQLContext sqlContext = getSqlContext("testSpecifyVertexAndPropertyFilter");
// Specify src and a filter on property1
Filter[] filters = new Filter[2];
filters[0] = new GreaterThan("property1", 5);
filters[1] = new EqualTo(SchemaToStructTypeConverter.SRC_COL_NAME, "0");
FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
AbstractGetRDD<?> operation = converter.getOperation();
assertTrue(operation instanceof GetRDDOfElements);
assertEquals(0, operation.getView().getEntityGroups().size());
assertEquals(2, operation.getView().getEdgeGroups().size());
final Set<EntitySeed> seeds = new HashSet<>();
for (final Object seed : ((GetRDDOfElements) operation).getSeeds()) {
seeds.add((EntitySeed) seed);
}
assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
View opView = operation.getView();
for (final String edgeGroup : EDGE_GROUPS) {
final List<ConsumerFunctionContext<String, FilterFunction>> edgePostAggFilters = opView.getEdge(edgeGroup).getPostAggregationFilterFunctions();
assertEquals(1, edgePostAggFilters.size());
assertEquals(new ArrayList<>(Collections.singleton("property1")), edgePostAggFilters.get(0).getSelection());
assertEquals(new IsMoreThan(5, false), edgePostAggFilters.get(0).getFunction());
}
// Specify src and filters on property1 and property4
filters = new Filter[3];
filters[0] = new GreaterThan("property1", 5);
filters[1] = new EqualTo(SchemaToStructTypeConverter.SRC_COL_NAME, "0");
filters[2] = new LessThan("property4", 8);
converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
operation = converter.getOperation();
assertTrue(operation instanceof GetRDDOfElements);
assertEquals(0, operation.getView().getEntityGroups().size());
assertEquals(1, operation.getView().getEdgeGroups().size());
seeds.clear();
for (final Object seed : ((GetRDDOfElements) operation).getSeeds()) {
seeds.add((EntitySeed) seed);
}
assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
opView = operation.getView();
final List<ConsumerFunctionContext<String, FilterFunction>> entityPostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
assertEquals(2, entityPostAggFilters.size());
final List<String> expectedProperties = new ArrayList<>();
expectedProperties.add("property1");
expectedProperties.add("property4");
assertEquals(1, entityPostAggFilters.get(0).getSelection().size());
assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection().get(0));
assertEquals(new IsMoreThan(5, false), entityPostAggFilters.get(0).getFunction());
assertEquals(1, entityPostAggFilters.get(1).getSelection().size());
assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection().get(0));
assertEquals(new IsLessThan(8, false), entityPostAggFilters.get(1).getFunction());
sqlContext.sparkContext().stop();
}
Aggregations