use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class GetDataFrameOfElementsHandlerTest method checkGetCorrectElementsInDataFrameWithProjectionAndFiltering.
@Test
public void checkGetCorrectElementsInDataFrameWithProjectionAndFiltering() throws OperationException {
final Graph graph = getGraph("/schema-DataFrame/dataSchema.json", getElements());
final SQLContext sqlContext = getSqlContext("checkGetCorrectElementsInDataFrameWithProjectionAndFiltering");
// Get DataFrame
final GetDataFrameOfElements dfOperation = new GetDataFrameOfElements.Builder().sqlContext(sqlContext).view(new View.Builder().edge(EDGE_GROUP).build()).build();
final Dataset<Row> dataFrame = graph.execute(dfOperation, new User());
// Check get correct rows when ask for all columns but only rows where property2 > 4.0
Set<Row> results = new HashSet<>(dataFrame.filter("property2 > 4.0").collectAsList());
final Set<Row> expectedRows = new HashSet<>();
for (int i = 0; i < NUM_ELEMENTS; i++) {
final MutableList<Object> fields = new MutableList<>();
fields.appendElem(EDGE_GROUP);
fields.appendElem("" + i);
fields.appendElem("C");
fields.appendElem(6);
fields.appendElem(7);
fields.appendElem(8.0F);
fields.appendElem(9.0D);
fields.appendElem(10L);
fields.appendElem(i * 200L);
expectedRows.add(Row$.MODULE$.fromSeq(fields));
}
assertEquals(expectedRows, results);
// Check get correct rows when ask for columns property2 and property3 but only rows where property2 > 4.0
results = new HashSet<>(dataFrame.select("property2", "property3").filter("property2 > 4.0").collectAsList());
expectedRows.clear();
for (int i = 0; i < NUM_ELEMENTS; i++) {
final MutableList<Object> fields = new MutableList<>();
fields.appendElem(8.0F);
fields.appendElem(9.0D);
expectedRows.add(Row$.MODULE$.fromSeq(fields));
}
assertEquals(expectedRows, results);
sqlContext.sparkContext().stop();
}
use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class FilterToOperationConverterTest method testSpecifyVertexAndPropertyFilter.
@Test
public void testSpecifyVertexAndPropertyFilter() throws OperationException {
final Schema schema = getSchema();
final SQLContext sqlContext = getSqlContext("testSpecifyVertexAndPropertyFilter");
// Specify vertex and a filter on property1
Filter[] filters = new Filter[2];
filters[0] = new GreaterThan("property1", 5);
filters[1] = new EqualTo(SchemaToStructTypeConverter.VERTEX_COL_NAME, "0");
FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
AbstractGetRDD<?> operation = converter.getOperation();
assertTrue(operation instanceof GetRDDOfElements);
assertEquals(1, operation.getView().getEntityGroups().size());
assertEquals(0, operation.getView().getEdgeGroups().size());
final Set<EntitySeed> seeds = new HashSet<>();
for (final Object seed : ((GetRDDOfElements) operation).getSeeds()) {
seeds.add((EntitySeed) seed);
}
assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
View opView = operation.getView();
List<ConsumerFunctionContext<String, FilterFunction>> entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
assertEquals(1, entityPostAggFilters.size());
final ArrayList<String> expectedProperties = new ArrayList<>();
expectedProperties.add("property1");
assertEquals(1, entityPostAggFilters.get(0).getSelection().size());
assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection().get(0));
final ArrayList<FilterFunction> expectedFunctions = new ArrayList<>();
expectedFunctions.add(new IsMoreThan(5, false));
assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getFunction());
// Specify vertex and filters on properties property1 and property4
filters = new Filter[3];
filters[0] = new GreaterThan("property1", 5);
filters[1] = new EqualTo(SchemaToStructTypeConverter.VERTEX_COL_NAME, "0");
filters[2] = new LessThan("property4", 8);
converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
operation = converter.getOperation();
assertTrue(operation instanceof GetRDDOfElements);
assertEquals(1, operation.getView().getEntityGroups().size());
assertEquals(0, operation.getView().getEdgeGroups().size());
seeds.clear();
for (final Object seed : ((GetRDDOfElements) operation).getSeeds()) {
seeds.add((EntitySeed) seed);
}
assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
opView = operation.getView();
entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
assertEquals(2, entityPostAggFilters.size());
expectedProperties.clear();
expectedProperties.add("property1");
expectedProperties.add("property4");
assertEquals(1, entityPostAggFilters.get(0).getSelection().size());
assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection().get(0));
assertEquals(1, entityPostAggFilters.get(1).getSelection().size());
assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection().get(0));
expectedFunctions.clear();
expectedFunctions.add(new IsMoreThan(5, false));
expectedFunctions.add(new IsLessThan(8, false));
assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getFunction());
assertEquals(expectedFunctions.get(1), entityPostAggFilters.get(1).getFunction());
sqlContext.sparkContext().stop();
}
use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class FilterToOperationConverterTest method testSingleGroup.
@Test
public void testSingleGroup() throws OperationException {
final Schema schema = getSchema();
final SQLContext sqlContext = getSqlContext("testSingleGroup");
final Filter[] filters = new Filter[1];
filters[0] = new EqualTo(SchemaToStructTypeConverter.GROUP, ENTITY_GROUP);
final FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
final AbstractGetRDD<?> operation = converter.getOperation();
assertTrue(operation instanceof GetRDDOfAllElements);
assertEquals(Collections.singleton(ENTITY_GROUP), operation.getView().getEntityGroups());
assertEquals(0, operation.getView().getEdgeGroups().size());
sqlContext.sparkContext().stop();
}
use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class FilterToOperationConverterTest method testSpecifyPropertyFilters.
@Test
public void testSpecifyPropertyFilters() throws OperationException {
final Schema schema = getSchema();
final SQLContext sqlContext = getSqlContext("testSpecifyPropertyFilters");
final Filter[] filters = new Filter[1];
// GreaterThan
filters[0] = new GreaterThan("property1", 5);
FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
AbstractGetRDD<?> operation = converter.getOperation();
assertTrue(operation instanceof GetRDDOfAllElements);
View opView = operation.getView();
List<ConsumerFunctionContext<String, FilterFunction>> entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
assertEquals(1, entityPostAggFilters.size());
assertEquals(new ArrayList<>(Collections.singleton("property1")), entityPostAggFilters.get(0).getSelection());
assertEquals(new IsMoreThan(5, false), entityPostAggFilters.get(0).getFunction());
for (final String edgeGroup : EDGE_GROUPS) {
final List<ConsumerFunctionContext<String, FilterFunction>> edgePostAggFilters = opView.getEdge(edgeGroup).getPostAggregationFilterFunctions();
assertEquals(1, edgePostAggFilters.size());
assertEquals(new ArrayList<>(Collections.singleton("property1")), edgePostAggFilters.get(0).getSelection());
assertEquals(new IsMoreThan(5, false), edgePostAggFilters.get(0).getFunction());
}
// LessThan
filters[0] = new LessThan("property4", 8L);
converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
operation = converter.getOperation();
assertTrue(operation instanceof GetRDDOfAllElements);
// Only groups ENTITY_GROUP and EDGE_GROUP should be in the view as only they have property4
opView = operation.getView();
entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
assertEquals(1, entityPostAggFilters.size());
assertEquals(new ArrayList<>(Collections.singleton("property4")), entityPostAggFilters.get(0).getSelection());
assertEquals(new IsLessThan(8L, false), entityPostAggFilters.get(0).getFunction());
List<ConsumerFunctionContext<String, FilterFunction>> edgePostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
assertEquals(1, edgePostAggFilters.size());
assertEquals(new ArrayList<>(Collections.singleton("property4")), edgePostAggFilters.get(0).getSelection());
assertEquals(new IsLessThan(8L, false), edgePostAggFilters.get(0).getFunction());
// And
final Filter left = new GreaterThan("property1", 5);
final Filter right = new GreaterThan("property4", 8L);
filters[0] = new And(left, right);
converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
operation = converter.getOperation();
assertTrue(operation instanceof GetRDDOfAllElements);
// Only groups ENTITY_GROUP and EDGE_GROUP should be in the view as only they have property1 and property4
opView = operation.getView();
entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
assertEquals(2, entityPostAggFilters.size());
final ArrayList<String> expectedProperties = new ArrayList<>();
expectedProperties.add("property1");
expectedProperties.add("property4");
assertEquals(1, entityPostAggFilters.get(0).getSelection().size());
assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection().get(0));
assertEquals(1, entityPostAggFilters.get(1).getSelection().size());
assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection().get(0));
final ArrayList<FilterFunction> expectedFunctions = new ArrayList<>();
expectedFunctions.add(new IsMoreThan(5, false));
expectedFunctions.add(new IsMoreThan(8L, false));
assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getFunction());
assertEquals(expectedFunctions.get(1), entityPostAggFilters.get(1).getFunction());
edgePostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
assertEquals(2, edgePostAggFilters.size());
assertEquals(1, edgePostAggFilters.get(0).getSelection().size());
assertEquals(expectedProperties.get(0), edgePostAggFilters.get(0).getSelection().get(0));
assertEquals(1, edgePostAggFilters.get(1).getSelection().size());
assertEquals(expectedProperties.get(1), edgePostAggFilters.get(1).getSelection().get(0));
sqlContext.sparkContext().stop();
}
use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class FilterToOperationConverterTest method testSingleGroupNotInSchema.
@Test
public void testSingleGroupNotInSchema() throws OperationException {
final Schema schema = getSchema();
final SQLContext sqlContext = getSqlContext("testSingleGroupNotInSchema");
final Filter[] filters = new Filter[1];
filters[0] = new EqualTo(SchemaToStructTypeConverter.GROUP, "random");
final FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
final AbstractGetRDD<?> operation = converter.getOperation();
assertNull(operation);
sqlContext.sparkContext().stop();
}
Aggregations