Search in sources :

Example 6 with GetRDDOfAllElements

use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements in project Gaffer by gchq.

the class FilterToOperationConverterTest method testSpecifyMultiplePropertyFilters.

@Test
public void testSpecifyMultiplePropertyFilters() throws OperationException {
    final Schema schema = getSchema();
    final SQLContext sqlContext = getSqlContext("testSpecifyMultiplePropertyFilters");
    final Filter[] filters = new Filter[2];
    filters[0] = new GreaterThan("property1", 5);
    filters[1] = new LessThan("property4", 8L);
    FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    AbstractGetRDD<?> operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfAllElements);
    // Only groups ENTITY_GROUP and EDGE_GROUP should be in the view as only they have property1 and property4
    View opView = operation.getView();
    List<ConsumerFunctionContext<String, FilterFunction>> entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
    assertEquals(2, entityPostAggFilters.size());
    final ArrayList<String> expectedProperties = new ArrayList<>();
    expectedProperties.add("property1");
    expectedProperties.add("property4");
    assertEquals(1, entityPostAggFilters.get(0).getSelection().size());
    assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection().get(0));
    assertEquals(1, entityPostAggFilters.get(1).getSelection().size());
    assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection().get(0));
    final ArrayList<FilterFunction> expectedFunctions = new ArrayList<>();
    expectedFunctions.add(new IsMoreThan(5, false));
    expectedFunctions.add(new IsLessThan(8L, false));
    assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getFunction());
    assertEquals(expectedFunctions.get(1), entityPostAggFilters.get(1).getFunction());
    final List<ConsumerFunctionContext<String, FilterFunction>> edgePostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
    assertEquals(2, edgePostAggFilters.size());
    assertEquals(1, edgePostAggFilters.get(0).getSelection().size());
    assertEquals(expectedProperties.get(0), edgePostAggFilters.get(0).getSelection().get(0));
    assertEquals(1, edgePostAggFilters.get(1).getSelection().size());
    assertEquals(expectedProperties.get(1), edgePostAggFilters.get(1).getSelection().get(0));
    sqlContext.sparkContext().stop();
}
Also used : FilterFunction(uk.gov.gchq.gaffer.function.FilterFunction) Schema(uk.gov.gchq.gaffer.store.schema.Schema) ArrayList(java.util.ArrayList) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) LessThan(org.apache.spark.sql.sources.LessThan) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) ConsumerFunctionContext(uk.gov.gchq.gaffer.function.context.ConsumerFunctionContext) IsLessThan(uk.gov.gchq.gaffer.function.filter.IsLessThan) Filter(org.apache.spark.sql.sources.Filter) GreaterThan(org.apache.spark.sql.sources.GreaterThan) GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) IsMoreThan(uk.gov.gchq.gaffer.function.filter.IsMoreThan) SQLContext(org.apache.spark.sql.SQLContext) Test(org.junit.Test)

Example 7 with GetRDDOfAllElements

use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements in project Gaffer by gchq.

the class FiltersToOperationConverter method applyVertexSourceDestinationFilters.

private AbstractGetRDD<?> applyVertexSourceDestinationFilters(final View view) {
    View clonedView = view.clone();
    AbstractGetRDD<?> operation = null;
    for (final Filter filter : filters) {
        if (filter instanceof EqualTo) {
            final EqualTo equalTo = (EqualTo) filter;
            final String attribute = equalTo.attribute();
            if (attribute.equals(SchemaToStructTypeConverter.VERTEX_COL_NAME)) {
                // Only entities are relevant, so remove any edge groups from the view
                LOGGER.info("Found EqualTo filter with attribute {}, setting views to only contain entity groups", attribute);
                View.Builder viewBuilder = new View.Builder();
                for (final String entityGroup : view.getEntityGroups()) {
                    viewBuilder = viewBuilder.entity(entityGroup);
                }
                clonedView = viewBuilder.build();
                LOGGER.info("Setting operation to GetRDDOfElements");
                operation = new GetRDDOfElements<>(sqlContext.sparkContext(), new EntitySeed(equalTo.value()));
                operation.setView(clonedView);
                break;
            } else if (attribute.equals(SchemaToStructTypeConverter.SRC_COL_NAME) || attribute.equals(SchemaToStructTypeConverter.DST_COL_NAME)) {
                // Only edges are relevant, so remove any entity groups from the view
                LOGGER.info("Found EqualTo filter with attribute {}, setting views to only contain edge groups", attribute);
                View.Builder viewBuilder = new View.Builder();
                for (final String edgeGroup : view.getEdgeGroups()) {
                    viewBuilder = viewBuilder.edge(edgeGroup);
                }
                clonedView = viewBuilder.build();
                LOGGER.info("Setting operation to GetRDDOfElements");
                operation = new GetRDDOfElements<>(sqlContext.sparkContext(), new EntitySeed(equalTo.value()));
                operation.setView(clonedView);
                break;
            }
        }
    }
    if (operation == null) {
        LOGGER.debug("Setting operation to GetRDDOfAllElements");
        operation = new GetRDDOfAllElements(sqlContext.sparkContext());
        operation.setView(clonedView);
    }
    return operation;
}
Also used : Filter(org.apache.spark.sql.sources.Filter) GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) EntitySeed(uk.gov.gchq.gaffer.operation.data.EntitySeed) View(uk.gov.gchq.gaffer.data.elementdefinition.view.View) EqualTo(org.apache.spark.sql.sources.EqualTo) GetRDDOfElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfElements)

Example 8 with GetRDDOfAllElements

use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements in project Gaffer by gchq.

the class AccumuloStoreRelation method buildScan.

/**
     * Creates a <code>DataFrame</code> of all {@link Element}s from the specified groups.
     *
     * @return An {@link RDD} of {@link Row}s containing {@link Element}s whose group is in <code>groups</code>.
     */
@Override
public RDD<Row> buildScan() {
    try {
        LOGGER.info("Building GetRDDOfAllElements with view set to groups {}", StringUtils.join(groups, ','));
        final GetRDDOfAllElements operation = new GetRDDOfAllElements(sqlContext.sparkContext());
        operation.setView(view);
        final RDD<Element> rdd = store.execute(operation, user);
        return rdd.map(new ConvertElementToRow(usedProperties, propertyNeedsConversion, converterByProperty), ClassTagConstants.ROW_CLASS_TAG);
    } catch (final OperationException e) {
        LOGGER.error("OperationException while executing operation: {}", e);
        return null;
    }
}
Also used : GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) Element(uk.gov.gchq.gaffer.data.element.Element) OperationException(uk.gov.gchq.gaffer.operation.OperationException) ConvertElementToRow(uk.gov.gchq.gaffer.spark.operation.dataframe.ConvertElementToRow)

Example 9 with GetRDDOfAllElements

use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements in project Gaffer by gchq.

the class FilterToOperationConverterTest method testTwoGroups.

@Test
public void testTwoGroups() throws OperationException {
    final Schema schema = getSchema();
    final SQLContext sqlContext = getSqlContext("testTwoGroups");
    final Filter[] filters = new Filter[1];
    final Filter left = new EqualTo(SchemaToStructTypeConverter.GROUP, ENTITY_GROUP);
    final Filter right = new EqualTo(SchemaToStructTypeConverter.GROUP, EDGE_GROUP2);
    filters[0] = new Or(left, right);
    final FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
    final AbstractGetRDD<?> operation = converter.getOperation();
    assertTrue(operation instanceof GetRDDOfAllElements);
    assertEquals(Collections.singleton(ENTITY_GROUP), operation.getView().getEntityGroups());
    assertEquals(Collections.singleton(EDGE_GROUP2), operation.getView().getEdgeGroups());
    sqlContext.sparkContext().stop();
}
Also used : Or(org.apache.spark.sql.sources.Or) Filter(org.apache.spark.sql.sources.Filter) GetRDDOfAllElements(uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements) Schema(uk.gov.gchq.gaffer.store.schema.Schema) SQLContext(org.apache.spark.sql.SQLContext) EqualTo(org.apache.spark.sql.sources.EqualTo) Test(org.junit.Test)

Aggregations

GetRDDOfAllElements (uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements)9 Test (org.junit.Test)6 Filter (org.apache.spark.sql.sources.Filter)5 SQLContext (org.apache.spark.sql.SQLContext)4 Element (uk.gov.gchq.gaffer.data.element.Element)4 Schema (uk.gov.gchq.gaffer.store.schema.Schema)4 EqualTo (org.apache.spark.sql.sources.EqualTo)3 View (uk.gov.gchq.gaffer.data.elementdefinition.view.View)3 DataOutputStream (java.io.DataOutputStream)2 File (java.io.File)2 ArrayList (java.util.ArrayList)2 HashSet (java.util.HashSet)2 ByteArrayOutputStream (org.apache.commons.io.output.ByteArrayOutputStream)2 Configuration (org.apache.hadoop.conf.Configuration)2 SparkConf (org.apache.spark.SparkConf)2 SparkContext (org.apache.spark.SparkContext)2 GreaterThan (org.apache.spark.sql.sources.GreaterThan)2 LessThan (org.apache.spark.sql.sources.LessThan)2 ArrayBuffer (scala.collection.mutable.ArrayBuffer)2 Edge (uk.gov.gchq.gaffer.data.element.Edge)2