Search in sources :

Example 6 with SQLContext

use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.

the class GetDataFrameOfElementsHandlerTest method checkGetCorrectElementsInDataFrameWithProjectionAndFiltering.

@Test
public void checkGetCorrectElementsInDataFrameWithProjectionAndFiltering() throws OperationException {
    final Graph graph = getGraph("/schema-DataFrame/dataSchema.json", getElements());
    final SQLContext sqlContext = getSqlContext("checkGetCorrectElementsInDataFrameWithProjectionAndFiltering");
    // Get DataFrame
    final GetDataFrameOfElements dfOperation = new GetDataFrameOfElements.Builder().sqlContext(sqlContext).view(new View.Builder().edge(EDGE_GROUP).build()).build();
    final Dataset<Row> dataFrame = graph.execute(dfOperation, new User());
    // Check get correct rows when ask for all columns but only rows where property2 > 4.0
    Set<Row> results = new HashSet<>(dataFrame.filter("property2 > 4.0").collectAsList());
    final Set<Row> expectedRows = new HashSet<>();
    for (int i = 0; i < NUM_ELEMENTS; i++) {
        final MutableList<Object> fields = new MutableList<>();
        fields.appendElem(EDGE_GROUP);
        fields.appendElem("" + i);
        fields.appendElem("C");
        fields.appendElem(6);
        fields.appendElem(7);
        fields.appendElem(8.0F);
        fields.appendElem(9.0D);
        fields.appendElem(10L);
        fields.appendElem(i * 200L);
        expectedRows.add(Row$.MODULE$.fromSeq(fields));
    }
    assertEquals(expectedRows, results);
    // Check get correct rows when ask for columns property2 and property3 but only rows where property2 > 4.0
    results = new HashSet<>(dataFrame.select("property2", "property3").filter("property2 > 4.0").collectAsList());
    expectedRows.clear();
    for (int i = 0; i < NUM_ELEMENTS; i++) {
        final MutableList<Object> fields = new MutableList<>();
        fields.appendElem(8.0F);
        fields.appendElem(9.0D);
        expectedRows.add(Row$.MODULE$.fromSeq(fields));
    }
    assertEquals(expectedRows, results);
    sqlContext.sparkContext().stop();
}
Also used : GetDataFrameOfElements(uk.gov.gchq.gaffer.spark.operation.dataframe.GetDataFrameOfElements) User(uk.gov.gchq.gaffer.user.User) Graph(uk.gov.gchq.gaffer.graph.Graph) MutableList(scala.collection.mutable.MutableList) Row(org.apache.spark.sql.Row) SQLContext(org.apache.spark.sql.SQLContext) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 7 with SQLContext

use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSpecifyVertexAndPropertyFilter.

@Test
public void testSpecifyVertexAndPropertyFilter() throws OperationException {
    final Schema schema = getSchema();
    final SQLContext sqlContext = getSqlContext("testSpecifyVertexAndPropertyFilter");
    // Specify vertex and a filter on property1
    Filter[] filters = new Filter[2];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new EqualTo(SchemaToStructTypeConverter.VERTEX_COL_NAME, "0");
    FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    AbstractGetRDD<?> operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(1, operation.getView().getEntityGroups().size());
    assertEquals(0, operation.getView().getEdgeGroups().size());
    final Set<EntitySeed> seeds = new HashSet<>();
    for (final Object seed : ((GetRDDOfElements) operation).getSeeds()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
    View opView = operation.getView();
    List<ConsumerFunctionContext<String, FilterFunction>> entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertEquals(1, entityPostAggFilters.size());
    final ArrayList<String> expectedProperties = new ArrayList<>();
    expectedProperties.add("property1");
    assertEquals(1, entityPostAggFilters.get(0).getSelection().size());
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection().get(0));
    final ArrayList<FilterFunction> expectedFunctions = new ArrayList<>();
    expectedFunctions.add(new IsMoreThan(5, false));
    assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getFunction());
    // Specify vertex and filters on properties property1 and property4
    filters = new Filter[3];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new EqualTo(SchemaToStructTypeConverter.VERTEX_COL_NAME, "0");
    filters[2] = new LessThan("property4", 8);
    converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfElements);
    assertEquals(1, operation.getView().getEntityGroups().size());
    assertEquals(0, operation.getView().getEdgeGroups().size());
    seeds.clear();
    for (final Object seed : ((GetRDDOfElements) operation).getSeeds()) {
        seeds.add((EntitySeed) seed);
    }
    assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
    opView = operation.getView();
    entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertEquals(2, entityPostAggFilters.size());
    expectedProperties.clear();
    expectedProperties.add("property1");
    expectedProperties.add("property4");
    assertEquals(1, entityPostAggFilters.get(0).getSelection().size());
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection().get(0));
    assertEquals(1, entityPostAggFilters.get(1).getSelection().size());
    assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection().get(0));
    expectedFunctions.clear();
    expectedFunctions.add(new IsMoreThan(5, false));
    expectedFunctions.add(new IsLessThan(8, false));
    assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getFunction());
    assertEquals(expectedFunctions.get(1), entityPostAggFilters.get(1).getFunction());
    sqlContext.sparkContext().stop();
}
Also used : FilterFunction(uk.gov.gchq.gaffer.function.FilterFunction) Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) EqualTo(org.apache.spark.sql.sources.EqualTo) LessThan(org.apache.spark.sql.sources.LessThan) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) ConsumerFunctionContext(uk.gov.gchq.gaffer.function.context.ConsumerFunctionContext) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) Filter(org.apache.spark.sql.sources.Filter) GreaterThan(org.apache.spark.sql.sources.GreaterThan) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) IsMoreThan(uk.gov.gchq.gaffer.function.filter.IsMoreThan) SQLContext(org.apache.spark.sql.SQLContext) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 8 with SQLContext

use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSingleGroup.

@Test
public void testSingleGroup() throws OperationException {
    final Schema schema = getSchema();
    final SQLContext sqlContext = getSqlContext("testSingleGroup");
    final Filter[] filters = new Filter[1];
    filters[0] = new EqualTo(SchemaToStructTypeConverter.GROUP, ENTITY_GROUP);
    final FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    final AbstractGetRDD<?> operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfAllElements);
    assertEquals(Collections.singleton(ENTITY_GROUP), operation.getView().getEntityGroups());
    assertEquals(0, operation.getView().getEdgeGroups().size());
    sqlContext.sparkContext().stop();
}
Also used : Filter(org.apache.spark.sql.sources.Filter) GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) Schema(uk.gov.gchq.gaffer.store.schema.Schema) SQLContext(org.apache.spark.sql.SQLContext) EqualTo(org.apache.spark.sql.sources.EqualTo) Test(org.junit.Test)

Example 9 with SQLContext

use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSpecifyPropertyFilters.

@Test
public void testSpecifyPropertyFilters() throws OperationException {
    final Schema schema = getSchema();
    final SQLContext sqlContext = getSqlContext("testSpecifyPropertyFilters");
    final Filter[] filters = new Filter[1];
    // GreaterThan
    filters[0] = new GreaterThan("property1", 5);
    FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    AbstractGetRDD<?> operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfAllElements);
    View opView = operation.getView();
    List<ConsumerFunctionContext<String, FilterFunction>> entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertEquals(1, entityPostAggFilters.size());
    assertEquals(new ArrayList<>(Collections.singleton("property1")), entityPostAggFilters.get(0).getSelection());
    assertEquals(new IsMoreThan(5, false), entityPostAggFilters.get(0).getFunction());
    for (final String edgeGroup : EDGE_GROUPS) {
        final List<ConsumerFunctionContext<String, FilterFunction>> edgePostAggFilters = opView.getEdge(edgeGroup).getPostAggregationFilterFunctions();
        assertEquals(1, edgePostAggFilters.size());
        assertEquals(new ArrayList<>(Collections.singleton("property1")), edgePostAggFilters.get(0).getSelection());
        assertEquals(new IsMoreThan(5, false), edgePostAggFilters.get(0).getFunction());
    }
    // LessThan
    filters[0] = new LessThan("property4", 8L);
    converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfAllElements);
    // Only groups ENTITY_GROUP and EDGE_GROUP should be in the view as only they have property4
    opView = operation.getView();
    entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertEquals(1, entityPostAggFilters.size());
    assertEquals(new ArrayList<>(Collections.singleton("property4")), entityPostAggFilters.get(0).getSelection());
    assertEquals(new IsLessThan(8L, false), entityPostAggFilters.get(0).getFunction());
    List<ConsumerFunctionContext<String, FilterFunction>> edgePostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
    assertEquals(1, edgePostAggFilters.size());
    assertEquals(new ArrayList<>(Collections.singleton("property4")), edgePostAggFilters.get(0).getSelection());
    assertEquals(new IsLessThan(8L, false), edgePostAggFilters.get(0).getFunction());
    // And
    final Filter left = new GreaterThan("property1", 5);
    final Filter right = new GreaterThan("property4", 8L);
    filters[0] = new And(left, right);
    converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfAllElements);
    // Only groups ENTITY_GROUP and EDGE_GROUP should be in the view as only they have property1 and property4
    opView = operation.getView();
    entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertEquals(2, entityPostAggFilters.size());
    final ArrayList<String> expectedProperties = new ArrayList<>();
    expectedProperties.add("property1");
    expectedProperties.add("property4");
    assertEquals(1, entityPostAggFilters.get(0).getSelection().size());
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection().get(0));
    assertEquals(1, entityPostAggFilters.get(1).getSelection().size());
    assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection().get(0));
    final ArrayList<FilterFunction> expectedFunctions = new ArrayList<>();
    expectedFunctions.add(new IsMoreThan(5, false));
    expectedFunctions.add(new IsMoreThan(8L, false));
    assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getFunction());
    assertEquals(expectedFunctions.get(1), entityPostAggFilters.get(1).getFunction());
    edgePostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
    assertEquals(2, edgePostAggFilters.size());
    assertEquals(1, edgePostAggFilters.get(0).getSelection().size());
    assertEquals(expectedProperties.get(0), edgePostAggFilters.get(0).getSelection().get(0));
    assertEquals(1, edgePostAggFilters.get(1).getSelection().size());
    assertEquals(expectedProperties.get(1), edgePostAggFilters.get(1).getSelection().get(0));
    sqlContext.sparkContext().stop();
}
Also used : FilterFunction(uk.gov.gchq.gaffer.function.FilterFunction) Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) LessThan(org.apache.spark.sql.sources.LessThan) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) ConsumerFunctionContext(uk.gov.gchq.gaffer.function.context.ConsumerFunctionContext) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) Filter(org.apache.spark.sql.sources.Filter) GreaterThan(org.apache.spark.sql.sources.GreaterThan) GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) And(org.apache.spark.sql.sources.And) IsMoreThan(uk.gov.gchq.gaffer.function.filter.IsMoreThan) SQLContext(org.apache.spark.sql.SQLContext) Test(org.junit.Test)

Example 10 with SQLContext

use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSingleGroupNotInSchema.

@Test
public void testSingleGroupNotInSchema() throws OperationException {
    final Schema schema = getSchema();
    final SQLContext sqlContext = getSqlContext("testSingleGroupNotInSchema");
    final Filter[] filters = new Filter[1];
    filters[0] = new EqualTo(SchemaToStructTypeConverter.GROUP, "random");
    final FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    final AbstractGetRDD<?> operation = converter.getOperation();
    assertNull(operation);
    sqlContext.sparkContext().stop();
}
Also used : Filter(org.apache.spark.sql.sources.Filter) Schema(uk.gov.gchq.gaffer.store.schema.Schema) SQLContext(org.apache.spark.sql.SQLContext) EqualTo(org.apache.spark.sql.sources.EqualTo) Test(org.junit.Test)

Aggregations

SQLContext (org.apache.spark.sql.SQLContext)34 Test (org.junit.Test)20 HashSet (java.util.HashSet)15 Schema (uk.gov.gchq.gaffer.store.schema.Schema)14 Row (org.apache.spark.sql.Row)12 Filter (org.apache.spark.sql.sources.Filter)11 User (uk.gov.gchq.gaffer.user.User)11 View (uk.gov.gchq.gaffer.data.elementdefinition.view.View)10 EqualTo (org.apache.spark.sql.sources.EqualTo)9 Graph (uk.gov.gchq.gaffer.graph.Graph)9 GetDataFrameOfElements (uk.gov.gchq.gaffer.spark.operation.dataframe.GetDataFrameOfElements)8 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)7 MutableList (scala.collection.mutable.MutableList)7 ArrayList (java.util.ArrayList)6 DataFrame (org.apache.spark.sql.DataFrame)5 IsMoreThan (uk.gov.gchq.gaffer.function.filter.IsMoreThan)5 EntitySeed (uk.gov.gchq.gaffer.operation.data.EntitySeed)5 GetRDDOfElements (uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements)5 SparkConf (org.apache.spark.SparkConf)4 SparkContext (org.apache.spark.SparkContext)4