use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class GetDataFrameOfElementsExample method runExamples.
@Override
public void runExamples() {
// Need to actively turn logging on and off as needed as Spark produces some logs
// even when the log level is set to off.
ROOT_LOGGER.setLevel(Level.OFF);
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("getDataFrameOfElementsWithEntityGroup").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
final SparkContext sc = new SparkContext(sparkConf);
sc.setLogLevel("OFF");
final SQLContext sqlc = new SQLContext(sc);
final Graph graph = getGraph();
try {
getDataFrameOfElementsWithEntityGroup(sqlc, graph);
getDataFrameOfElementsWithEdgeGroup(sqlc, graph);
} catch (final OperationException e) {
throw new RuntimeException(e);
}
sc.stop();
ROOT_LOGGER.setLevel(Level.INFO);
}
use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class AccumuloStoreRelationTest method testBuildScanSpecifyColumnsAndFiltersWithView.
private void testBuildScanSpecifyColumnsAndFiltersWithView(final String name, final View view, final String[] requiredColumns, final Filter[] filters, final Predicate<Element> returnElement) throws OperationException, StoreException {
// Given
final SQLContext sqlContext = getSqlContext(name);
final Schema schema = getSchema();
final AccumuloProperties properties = AccumuloProperties.loadStoreProperties(getClass().getResourceAsStream("/store.properties"));
final SingleUseMockAccumuloStore store = new SingleUseMockAccumuloStore();
store.initialise(schema, properties);
addElements(store);
// When
final AccumuloStoreRelation relation = new AccumuloStoreRelation(sqlContext, Collections.emptyList(), view, store, new User());
final RDD<Row> rdd = relation.buildScan(requiredColumns, filters);
final Row[] returnedElements = (Row[]) rdd.collect();
// Then
// - Actual results are:
final Set<Row> results = new HashSet<>();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
// - Expected results are:
final SchemaToStructTypeConverter schemaConverter = new SchemaToStructTypeConverter(schema, view, new ArrayList<>());
final ConvertElementToRow elementConverter = new ConvertElementToRow(new LinkedHashSet<>(Arrays.asList(requiredColumns)), schemaConverter.getPropertyNeedsConversion(), schemaConverter.getConverterByProperty());
final Set<Row> expectedRows = new HashSet<>();
StreamSupport.stream(getElements().spliterator(), false).filter(returnElement).map(elementConverter::apply).forEach(expectedRows::add);
assertEquals(expectedRows, results);
sqlContext.sparkContext().stop();
}
use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class AccumuloStoreRelationTest method testBuildScanSpecifyColumnsWithView.
private void testBuildScanSpecifyColumnsWithView(final String name, final View view, final String[] requiredColumns, final Predicate<Element> returnElement) throws OperationException, StoreException {
// Given
final SQLContext sqlContext = getSqlContext(name);
final Schema schema = getSchema();
final AccumuloProperties properties = AccumuloProperties.loadStoreProperties(getClass().getResourceAsStream("/store.properties"));
final SingleUseMockAccumuloStore store = new SingleUseMockAccumuloStore();
store.initialise(schema, properties);
addElements(store);
// When
final AccumuloStoreRelation relation = new AccumuloStoreRelation(sqlContext, Collections.emptyList(), view, store, new User());
final RDD<Row> rdd = relation.buildScan(requiredColumns);
final Row[] returnedElements = (Row[]) rdd.collect();
// Then
// - Actual results are:
final Set<Row> results = new HashSet<>();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
// - Expected results are:
final SchemaToStructTypeConverter schemaConverter = new SchemaToStructTypeConverter(schema, view, new ArrayList<>());
final ConvertElementToRow elementConverter = new ConvertElementToRow(new LinkedHashSet<>(Arrays.asList(requiredColumns)), schemaConverter.getPropertyNeedsConversion(), schemaConverter.getConverterByProperty());
final Set<Row> expectedRows = new HashSet<>();
StreamSupport.stream(getElements().spliterator(), false).filter(returnElement).map(elementConverter::apply).forEach(expectedRows::add);
assertEquals(expectedRows, results);
sqlContext.sparkContext().stop();
}
use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class FilterToOperationConverterTest method testSpecifySource.
@Test
public void testSpecifySource() throws OperationException {
final Schema schema = getSchema();
final SQLContext sqlContext = getSqlContext("testSpecifySource");
final Filter[] filters = new Filter[1];
filters[0] = new EqualTo(SchemaToStructTypeConverter.SRC_COL_NAME, "0");
FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
AbstractGetRDD<?> operation = converter.getOperation();
assertTrue(operation instanceof GetRDDOfElements);
assertEquals(0, operation.getView().getEntityGroups().size());
assertEquals(EDGE_GROUPS, operation.getView().getEdgeGroups());
final Set<EntitySeed> seeds = new HashSet<>();
for (final Object seed : ((GetRDDOfElements) operation).getSeeds()) {
seeds.add((EntitySeed) seed);
}
assertEquals(Collections.singleton(new EntitySeed("0")), seeds);
sqlContext.sparkContext().stop();
}
use of org.apache.spark.sql.SQLContext in project Gaffer by gchq.
the class GetDataFrameOfElementsHandlerTest method checkCanDealWithNonStandardProperties.
@Test
public void checkCanDealWithNonStandardProperties() throws OperationException {
final Graph graph = getGraph("/schema-DataFrame/dataSchemaNonstandardTypes.json", getElementsWithNonStandardProperties());
final SQLContext sqlContext = getSqlContext("checkCanDealWithNonStandardProperties");
// Edges group - check get correct edges
GetDataFrameOfElements dfOperation = new GetDataFrameOfElements.Builder().sqlContext(sqlContext).view(new View.Builder().edge(EDGE_GROUP).build()).build();
Dataset<Row> dataFrame = graph.execute(dfOperation, new User());
Set<Row> results = new HashSet<>(dataFrame.collectAsList());
final Set<Row> expectedRows = new HashSet<>();
final MutableList<Object> fields1 = new MutableList<>();
Map<String, Long> freqMap = Map$.MODULE$.empty();
freqMap.put("Y", 1000L);
freqMap.put("Z", 10000L);
fields1.appendElem(EDGE_GROUP);
fields1.appendElem("B");
fields1.appendElem("C");
fields1.appendElem(freqMap);
final HyperLogLogPlus hllpp = new HyperLogLogPlus(5, 5);
hllpp.offer("AAA");
hllpp.offer("BBB");
fields1.appendElem(hllpp.cardinality());
expectedRows.add(Row$.MODULE$.fromSeq(fields1));
assertEquals(expectedRows, results);
// Entities group - check get correct entities
dfOperation = new GetDataFrameOfElements.Builder().sqlContext(sqlContext).view(new View.Builder().entity(ENTITY_GROUP).build()).build();
dataFrame = graph.execute(dfOperation, new User());
results.clear();
results.addAll(dataFrame.collectAsList());
expectedRows.clear();
fields1.clear();
freqMap.clear();
freqMap.put("W", 10L);
freqMap.put("X", 100L);
fields1.appendElem(ENTITY_GROUP);
fields1.appendElem("A");
fields1.appendElem(freqMap);
final HyperLogLogPlus hllpp2 = new HyperLogLogPlus(5, 5);
hllpp2.offer("AAA");
fields1.appendElem(hllpp2.cardinality());
expectedRows.add(Row$.MODULE$.fromSeq(fields1));
assertEquals(expectedRows, results);
sqlContext.sparkContext().stop();
}
Aggregations