use of uk.gov.gchq.gaffer.data.elementdefinition.view.View in project Gaffer by gchq.
the class AccumuloStoreRelationTest method testBuildScanRestrictViewToOneGroup.
@Test
public void testBuildScanRestrictViewToOneGroup() throws OperationException, StoreException {
final View view = new View.Builder().edge(GetDataFrameOfElementsHandlerTest.EDGE_GROUP).build();
final Predicate<Element> returnElement = (Element element) -> element.getGroup().equals(GetDataFrameOfElementsHandlerTest.EDGE_GROUP);
testBuildScanWithView("testBuildScanRestrictViewToOneGroup", view, returnElement);
}
use of uk.gov.gchq.gaffer.data.elementdefinition.view.View in project Gaffer by gchq.
the class FiltersToOperationConverter method getOperation.
/**
* Creates an operation to return an RDD in which as much filtering as possible has been carried out by Gaffer
* in Accumulo's tablet servers before the data is sent to a Spark executor.
* <p>
* Note that when this is used within an operation to return a Dataframe, Spark will also carry out the
* filtering itself, and therefore it is not essential for all filters to be applied. As many as possible
* should be applied to reduce the amount of data sent from the data store to Spark's executors.
* <p>
* The following logic is used to create an operation and a view which removes as much data as possible as early
* as possible:
* - If the filters specify a particular group or groups is/are required then the view should only contain those
* groups.
* - If the filters specify a particular value for the vertex, source or destination then an operation to return
* those directly is created (i.e. a GetRDDOfElements operation rather than a GetRDDOfAllElements operation). In
* this case the view is created to ensure that only entities or only edges are returned as appropriate.
* - Other filters are converted to Gaffer filters which are applied to the view.
*
* @return an operation to return the required data.
*/
public AbstractGetRDD<?> getOperation() {
// Check whether the filters specify any groups
View derivedView = applyGroupFilters(view);
if (derivedView == null) {
return null;
}
// Check whether the filters specify a value for the vertex, source or destination.
AbstractGetRDD<?> operation = applyVertexSourceDestinationFilters(derivedView);
// Check whether the filters specify a property - if so can ignore groups that don't contain that property
derivedView = operation.getView();
operation = applyPropertyFilters(derivedView, operation);
return operation;
}
use of uk.gov.gchq.gaffer.data.elementdefinition.view.View in project Gaffer by gchq.
the class FiltersToOperationConverter method applyVertexSourceDestinationFilters.
private AbstractGetRDD<?> applyVertexSourceDestinationFilters(final View view) {
View clonedView = view.clone();
AbstractGetRDD<?> operation = null;
for (final Filter filter : filters) {
if (filter instanceof EqualTo) {
final EqualTo equalTo = (EqualTo) filter;
final String attribute = equalTo.attribute();
if (attribute.equals(SchemaToStructTypeConverter.VERTEX_COL_NAME)) {
// Only entities are relevant, so remove any edge groups from the view
LOGGER.info("Found EqualTo filter with attribute {}, setting views to only contain entity groups", attribute);
View.Builder viewBuilder = new View.Builder();
for (final String entityGroup : view.getEntityGroups()) {
viewBuilder = viewBuilder.entity(entityGroup);
}
clonedView = viewBuilder.build();
LOGGER.info("Setting operation to GetRDDOfElements");
operation = new GetRDDOfElements<>(sqlContext.sparkContext(), new EntitySeed(equalTo.value()));
operation.setView(clonedView);
break;
} else if (attribute.equals(SchemaToStructTypeConverter.SRC_COL_NAME) || attribute.equals(SchemaToStructTypeConverter.DST_COL_NAME)) {
// Only edges are relevant, so remove any entity groups from the view
LOGGER.info("Found EqualTo filter with attribute {}, setting views to only contain edge groups", attribute);
View.Builder viewBuilder = new View.Builder();
for (final String edgeGroup : view.getEdgeGroups()) {
viewBuilder = viewBuilder.edge(edgeGroup);
}
clonedView = viewBuilder.build();
LOGGER.info("Setting operation to GetRDDOfElements");
operation = new GetRDDOfElements<>(sqlContext.sparkContext(), new EntitySeed(equalTo.value()));
operation.setView(clonedView);
break;
}
}
}
if (operation == null) {
LOGGER.debug("Setting operation to GetRDDOfAllElements");
operation = new GetRDDOfAllElements(sqlContext.sparkContext());
operation.setView(clonedView);
}
return operation;
}
use of uk.gov.gchq.gaffer.data.elementdefinition.view.View in project Gaffer by gchq.
the class GetDataFrameOfElementsHandlerTest method checkCanDealWithNonStandardProperties.
@Test
public void checkCanDealWithNonStandardProperties() throws OperationException {
final Graph graph = getGraph("/schema-DataFrame/dataSchemaNonstandardTypes.json", getElementsWithNonStandardProperties());
final SQLContext sqlContext = getSqlContext("checkCanDealWithNonStandardProperties");
// Edges group - check get correct edges
GetDataFrameOfElements dfOperation = new GetDataFrameOfElements.Builder().sqlContext(sqlContext).view(new View.Builder().edge(EDGE_GROUP).build()).build();
Dataset<Row> dataFrame = graph.execute(dfOperation, new User());
Set<Row> results = new HashSet<>(dataFrame.collectAsList());
final Set<Row> expectedRows = new HashSet<>();
final MutableList<Object> fields1 = new MutableList<>();
Map<String, Long> freqMap = Map$.MODULE$.empty();
freqMap.put("Y", 1000L);
freqMap.put("Z", 10000L);
fields1.appendElem(EDGE_GROUP);
fields1.appendElem("B");
fields1.appendElem("C");
fields1.appendElem(freqMap);
final HyperLogLogPlus hllpp = new HyperLogLogPlus(5, 5);
hllpp.offer("AAA");
hllpp.offer("BBB");
fields1.appendElem(hllpp.cardinality());
expectedRows.add(Row$.MODULE$.fromSeq(fields1));
assertEquals(expectedRows, results);
// Entities group - check get correct entities
dfOperation = new GetDataFrameOfElements.Builder().sqlContext(sqlContext).view(new View.Builder().entity(ENTITY_GROUP).build()).build();
dataFrame = graph.execute(dfOperation, new User());
results.clear();
results.addAll(dataFrame.collectAsList());
expectedRows.clear();
fields1.clear();
freqMap.clear();
freqMap.put("W", 10L);
freqMap.put("X", 100L);
fields1.appendElem(ENTITY_GROUP);
fields1.appendElem("A");
fields1.appendElem(freqMap);
final HyperLogLogPlus hllpp2 = new HyperLogLogPlus(5, 5);
hllpp2.offer("AAA");
fields1.appendElem(hllpp2.cardinality());
expectedRows.add(Row$.MODULE$.fromSeq(fields1));
assertEquals(expectedRows, results);
sqlContext.sparkContext().stop();
}
use of uk.gov.gchq.gaffer.data.elementdefinition.view.View in project Gaffer by gchq.
the class GetDataFrameOfElementsHandlerTest method checkGetCorrectElementsInDataFrame.
@Test
public void checkGetCorrectElementsInDataFrame() throws OperationException {
final Graph graph = getGraph("/schema-DataFrame/dataSchema.json", getElements());
final SQLContext sqlContext = getSqlContext("checkGetCorrectElementsInDataFrame");
// Edges group - check get correct edges
GetDataFrameOfElements dfOperation = new GetDataFrameOfElements.Builder().sqlContext(sqlContext).view(new View.Builder().edge(EDGE_GROUP).build()).build();
Dataset<Row> dataFrame = graph.execute(dfOperation, new User());
Set<Row> results = new HashSet<>(dataFrame.collectAsList());
final Set<Row> expectedRows = new HashSet<>();
for (int i = 0; i < NUM_ELEMENTS; i++) {
final MutableList<Object> fields1 = new MutableList<>();
fields1.appendElem(EDGE_GROUP);
fields1.appendElem("" + i);
fields1.appendElem("B");
fields1.appendElem(1);
fields1.appendElem(2);
fields1.appendElem(3.0F);
fields1.appendElem(4.0D);
fields1.appendElem(5L);
fields1.appendElem(100L);
expectedRows.add(Row$.MODULE$.fromSeq(fields1));
final MutableList<Object> fields2 = new MutableList<>();
fields2.appendElem(EDGE_GROUP);
fields2.appendElem("" + i);
fields2.appendElem("C");
fields2.appendElem(6);
fields2.appendElem(7);
fields2.appendElem(8.0F);
fields2.appendElem(9.0D);
fields2.appendElem(10L);
fields2.appendElem(i * 200L);
expectedRows.add(Row$.MODULE$.fromSeq(fields2));
}
assertEquals(expectedRows, results);
// Entities group - check get correct entities
dfOperation = new GetDataFrameOfElements.Builder().sqlContext(sqlContext).view(new View.Builder().entity(ENTITY_GROUP).build()).build();
dataFrame = graph.execute(dfOperation, new User());
results.clear();
results.addAll(dataFrame.collectAsList());
expectedRows.clear();
for (int i = 0; i < NUM_ELEMENTS; i++) {
final MutableList<Object> fields1 = new MutableList<>();
fields1.clear();
fields1.appendElem(ENTITY_GROUP);
fields1.appendElem("" + i);
fields1.appendElem(1);
fields1.appendElem(i);
fields1.appendElem(3.0F);
fields1.appendElem(4.0D);
fields1.appendElem(5L);
fields1.appendElem(6);
expectedRows.add(Row$.MODULE$.fromSeq(fields1));
}
assertEquals(expectedRows, results);
sqlContext.sparkContext().stop();
}
Aggregations