use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class GetDataFrameOfElementsHandlerTest method checkGetCorrectElementsInDataFrame.
@Test
public void checkGetCorrectElementsInDataFrame() throws OperationException {
final Graph graph = getGraph("/schema-DataFrame/dataSchema.json", getElements());
final SQLContext sqlContext = getSqlContext("checkGetCorrectElementsInDataFrame");
// Edges group - check get correct edges
GetDataFrameOfElements dfOperation = new GetDataFrameOfElements.Builder().sqlContext(sqlContext).view(new View.Builder().edge(EDGE_GROUP).build()).build();
Dataset<Row> dataFrame = graph.execute(dfOperation, new User());
Set<Row> results = new HashSet<>(dataFrame.collectAsList());
final Set<Row> expectedRows = new HashSet<>();
for (int i = 0; i < NUM_ELEMENTS; i++) {
final MutableList<Object> fields1 = new MutableList<>();
fields1.appendElem(EDGE_GROUP);
fields1.appendElem("" + i);
fields1.appendElem("B");
fields1.appendElem(1);
fields1.appendElem(2);
fields1.appendElem(3.0F);
fields1.appendElem(4.0D);
fields1.appendElem(5L);
fields1.appendElem(100L);
expectedRows.add(Row$.MODULE$.fromSeq(fields1));
final MutableList<Object> fields2 = new MutableList<>();
fields2.appendElem(EDGE_GROUP);
fields2.appendElem("" + i);
fields2.appendElem("C");
fields2.appendElem(6);
fields2.appendElem(7);
fields2.appendElem(8.0F);
fields2.appendElem(9.0D);
fields2.appendElem(10L);
fields2.appendElem(i * 200L);
expectedRows.add(Row$.MODULE$.fromSeq(fields2));
}
assertEquals(expectedRows, results);
// Entities group - check get correct entities
dfOperation = new GetDataFrameOfElements.Builder().sqlContext(sqlContext).view(new View.Builder().entity(ENTITY_GROUP).build()).build();
dataFrame = graph.execute(dfOperation, new User());
results.clear();
results.addAll(dataFrame.collectAsList());
expectedRows.clear();
for (int i = 0; i < NUM_ELEMENTS; i++) {
final MutableList<Object> fields1 = new MutableList<>();
fields1.clear();
fields1.appendElem(ENTITY_GROUP);
fields1.appendElem("" + i);
fields1.appendElem(1);
fields1.appendElem(i);
fields1.appendElem(3.0F);
fields1.appendElem(4.0D);
fields1.appendElem(5L);
fields1.appendElem(6);
expectedRows.add(Row$.MODULE$.fromSeq(fields1));
}
assertEquals(expectedRows, results);
sqlContext.sparkContext().stop();
}
use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class GetDataFrameOfElementsHandlerTest method checkViewIsRespected.
@Test
public void checkViewIsRespected() throws OperationException {
final Graph graph = getGraph("/schema-DataFrame/dataSchema.json", getElements());
final SQLContext sqlContext = getSqlContext("checkViewIsRespected");
// Edges group - check get correct edges
GetDataFrameOfElements dfOperation = new GetDataFrameOfElements.Builder().sqlContext(sqlContext).view(new View.Builder().edge(EDGE_GROUP, new ViewElementDefinition.Builder().preAggregationFilter(new ElementFilter.Builder().select("count").execute(new IsMoreThan(800L)).build()).build()).build()).build();
Dataset<Row> dataFrame = graph.execute(dfOperation, new User());
Set<Row> results = new HashSet<>(dataFrame.collectAsList());
final Set<Row> expectedRows = new HashSet<>();
for (int i = 0; i < NUM_ELEMENTS; i++) {
if (i * 200L > 800L) {
final MutableList<Object> fields2 = new MutableList<>();
fields2.appendElem(EDGE_GROUP);
fields2.appendElem("" + i);
fields2.appendElem("C");
fields2.appendElem(6);
fields2.appendElem(7);
fields2.appendElem(8.0F);
fields2.appendElem(9.0D);
fields2.appendElem(10L);
fields2.appendElem(i * 200L);
expectedRows.add(Row$.MODULE$.fromSeq(fields2));
}
}
assertEquals(expectedRows, results);
// Entities group - check get correct entities
dfOperation = new GetDataFrameOfElements.Builder().sqlContext(sqlContext).view(new View.Builder().entity(ENTITY_GROUP, new ViewElementDefinition.Builder().postAggregationFilter(new ElementFilter.Builder().select("property1").execute(new IsMoreThan(1)).build()).build()).build()).build();
dataFrame = graph.execute(dfOperation, new User());
results.clear();
results.addAll(dataFrame.collectAsList());
expectedRows.clear();
for (int i = 2; i < NUM_ELEMENTS; i++) {
final MutableList<Object> fields1 = new MutableList<>();
fields1.clear();
fields1.appendElem(ENTITY_GROUP);
fields1.appendElem("" + i);
fields1.appendElem(1);
fields1.appendElem(i);
fields1.appendElem(3.0F);
fields1.appendElem(4.0D);
fields1.appendElem(5L);
fields1.appendElem(6);
expectedRows.add(Row$.MODULE$.fromSeq(fields1));
}
assertEquals(expectedRows, results);
sqlContext.sparkContext().stop();
}
use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class GetDataFrameOfElementsHandlerTest method checkGetCorrectElementsInDataFrameWithProjection.
@Test
public void checkGetCorrectElementsInDataFrameWithProjection() throws OperationException {
final Graph graph = getGraph("/schema-DataFrame/dataSchema.json", getElements());
final SQLContext sqlContext = getSqlContext("checkGetCorrectElementsInDataFrameWithProjection");
// Get all edges
final GetDataFrameOfElements dfOperation = new GetDataFrameOfElements.Builder().sqlContext(sqlContext).view(new View.Builder().edge(EDGE_GROUP).build()).build();
final Dataset<Row> dataFrame = graph.execute(dfOperation, new User());
// Check get correct rows when ask for src, dst and property2 columns
Set<Row> results = new HashSet<>(dataFrame.select("src", "dst", "property2").collectAsList());
final Set<Row> expectedRows = new HashSet<>();
for (int i = 0; i < NUM_ELEMENTS; i++) {
MutableList<Object> fields1 = new MutableList<>();
fields1.appendElem("" + i);
fields1.appendElem("B");
fields1.appendElem(3.0F);
expectedRows.add(Row$.MODULE$.fromSeq(fields1));
MutableList<Object> fields2 = new MutableList<>();
fields2.appendElem("" + i);
fields2.appendElem("C");
fields2.appendElem(8.0F);
expectedRows.add(Row$.MODULE$.fromSeq(fields2));
}
assertEquals(expectedRows, results);
// Check get correct rows when ask for just property2 column
results = new HashSet<>(dataFrame.select("property2").collectAsList());
expectedRows.clear();
for (int i = 0; i < NUM_ELEMENTS; i++) {
MutableList<Object> fields1 = new MutableList<>();
fields1.appendElem(3.0F);
expectedRows.add(Row$.MODULE$.fromSeq(fields1));
MutableList<Object> fields2 = new MutableList<>();
fields2.appendElem(8.0F);
expectedRows.add(Row$.MODULE$.fromSeq(fields2));
}
assertEquals(expectedRows, results);
sqlContext.sparkContext().stop();
}
use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class GetDataFrameOfElementsHandler method doOperation.
public Dataset<Row> doOperation(final GetDataFrameOfElements operation, final Context context, final AccumuloStore store) throws OperationException {
final SQLContext sqlContext = operation.getSqlContext();
final AccumuloStoreRelation relation = new AccumuloStoreRelation(sqlContext, operation.getConverters(), operation.getView(), store, context.getUser());
return sqlContext.baseRelationToDataFrame(relation);
}
use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class FilterToOperationConverterTest method testIncompatibleGroups.
@Test
public void testIncompatibleGroups() throws OperationException {
final Schema schema = getSchema();
final SQLContext sqlContext = getSqlContext("testIncompatibleGroups");
final Filter[] filters = new Filter[2];
filters[0] = new EqualTo(SchemaToStructTypeConverter.GROUP, "A");
filters[1] = new EqualTo(SchemaToStructTypeConverter.GROUP, "B");
final FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
final AbstractGetRDD<?> operation = converter.getOperation();
assertNull(operation);
sqlContext.sparkContext().stop();
}
Aggregations