use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements in project Gaffer by gchq.
the class ImportKeyValuePairRDDToAccumuloHandlerTest method checkImportRDDOfElements.
@Test
public void checkImportRDDOfElements() throws OperationException, IOException {
final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeSchema.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
final ArrayBuffer<Element> elements = new ArrayBuffer<>();
for (int i = 0; i < 10; i++) {
final Entity entity = new Entity(TestGroups.ENTITY);
entity.setVertex("" + i);
final Edge edge1 = new Edge(TestGroups.EDGE);
edge1.setSource("" + i);
edge1.setDestination("B");
edge1.setDirected(false);
edge1.putProperty(TestPropertyNames.COUNT, 2);
final Edge edge2 = new Edge(TestGroups.EDGE);
edge2.setSource("" + i);
edge2.setDestination("C");
edge2.setDirected(false);
edge2.putProperty(TestPropertyNames.COUNT, 4);
elements.$plus$eq(edge1);
elements.$plus$eq(edge2);
elements.$plus$eq(entity);
}
final User user = new User();
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("tests").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
final SparkContext sparkContext = new SparkContext(sparkConf);
// Create Hadoop configuration and serialise to a string
final Configuration configuration = new Configuration();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
configuration.write(new DataOutputStream(baos));
final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
final String outputPath = this.getClass().getResource("/").getPath().toString() + "load";
final String failurePath = this.getClass().getResource("/").getPath().toString() + "failure";
final File file = new File(outputPath);
if (file.exists()) {
FileUtils.forceDelete(file);
}
final ElementConverterFunction func = new ElementConverterFunction(sparkContext.broadcast(new ByteEntityAccumuloElementConverter(graph1.getSchema()), ACCUMULO_ELEMENT_CONVERTER_CLASS_TAG));
final RDD<Tuple2<Key, Value>> elementRDD = sparkContext.parallelize(elements, 1, ELEMENT_CLASS_TAG).flatMap(func, TUPLE2_CLASS_TAG);
final ImportKeyValuePairRDDToAccumulo addRdd = new ImportKeyValuePairRDDToAccumulo.Builder().input(elementRDD).outputPath(outputPath).failurePath(failurePath).build();
graph1.execute(addRdd, user);
FileUtils.forceDelete(file);
// Check all elements were added
final GetRDDOfAllElements rddQuery = new GetRDDOfAllElements.Builder().sparkContext(sparkContext).option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString).build();
final RDD<Element> rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
final Set<Element> results = new HashSet<>();
final Element[] returnedElements = (Element[]) rdd.collect();
Collections.addAll(results, returnedElements);
assertEquals(elements.size(), results.size());
sparkContext.stop();
}
use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements in project Gaffer by gchq.
the class ImportRDDOfElementsHandlerTest method checkImportRDDOfElements.
@Test
public void checkImportRDDOfElements() throws OperationException, IOException {
final Graph graph1 = new Graph.Builder().addSchema(getClass().getResourceAsStream("/schema/dataSchema.json")).addSchema(getClass().getResourceAsStream("/schema/dataTypes.json")).addSchema(getClass().getResourceAsStream("/schema/storeTypes.json")).storeProperties(getClass().getResourceAsStream("/store.properties")).build();
final ArrayBuffer<Element> elements = new ArrayBuffer<>();
for (int i = 0; i < 10; i++) {
final Entity entity = new Entity(TestGroups.ENTITY);
entity.setVertex("" + i);
final Edge edge1 = new Edge(TestGroups.EDGE);
edge1.setSource("" + i);
edge1.setDestination("B");
edge1.setDirected(false);
edge1.putProperty(TestPropertyNames.COUNT, 2);
final Edge edge2 = new Edge(TestGroups.EDGE);
edge2.setSource("" + i);
edge2.setDestination("C");
edge2.setDirected(false);
edge2.putProperty(TestPropertyNames.COUNT, 4);
elements.$plus$eq(edge1);
elements.$plus$eq(edge2);
elements.$plus$eq(entity);
}
final User user = new User();
final SparkConf sparkConf = new SparkConf().setMaster("local").setAppName("tests").set("spark.serializer", "org.apache.spark.serializer.KryoSerializer").set("spark.kryo.registrator", "uk.gov.gchq.gaffer.spark.serialisation.kryo.Registrator").set("spark.driver.allowMultipleContexts", "true");
final SparkContext sparkContext = new SparkContext(sparkConf);
// Create Hadoop configuration and serialise to a string
final Configuration configuration = new Configuration();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
configuration.write(new DataOutputStream(baos));
final String configurationString = new String(baos.toByteArray(), CommonConstants.UTF_8);
final String outputPath = this.getClass().getResource("/").getPath().toString() + "load";
final String failurePath = this.getClass().getResource("/").getPath().toString() + "failure";
final File file = new File(outputPath);
if (file.exists()) {
FileUtils.forceDelete(file);
}
final RDD<Element> elementRDD = sparkContext.parallelize(elements, 8, ELEMENT_CLASS_TAG);
final ImportRDDOfElements addRdd = new ImportRDDOfElements.Builder().sparkContext(sparkContext).input(elementRDD).option("outputPath", outputPath).option("failurePath", failurePath).build();
graph1.execute(addRdd, user);
FileUtils.forceDelete(file);
// Check all elements were added
final GetRDDOfAllElements rddQuery = new GetRDDOfAllElements.Builder().sparkContext(sparkContext).option(AbstractGetRDDHandler.HADOOP_CONFIGURATION_KEY, configurationString).build();
final RDD<Element> rdd = graph1.execute(rddQuery, user);
if (rdd == null) {
fail("No RDD returned");
}
final Set<Element> results = new HashSet<>();
final Element[] returnedElements = (Element[]) rdd.collect();
for (int i = 0; i < returnedElements.length; i++) {
results.add(returnedElements[i]);
}
assertEquals(elements.size(), results.size());
sparkContext.stop();
}
use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements in project Gaffer by gchq.
the class AccumuloStoreRelation method buildScan.
/**
* Creates a <code>DataFrame</code> of all {@link Element}s from the specified groups with columns that are not
* required filtered out.
* <p>
* Currently this does not push the projection down to the store (i.e. it should be implemented in an iterator,
* not in the transform). Issue 320 refers to this.
*
* @param requiredColumns The columns to return.
* @return An {@link RDD} of {@link Row}s containing the requested columns.
*/
@Override
public RDD<Row> buildScan(final String[] requiredColumns) {
try {
LOGGER.info("Building scan with required columns: {}", StringUtils.join(requiredColumns, ','));
LOGGER.info("Building GetRDDOfAllElements with view set to groups {}", StringUtils.join(groups, ','));
final GetRDDOfAllElements operation = new GetRDDOfAllElements(sqlContext.sparkContext());
operation.setView(view);
final RDD<Element> rdd = store.execute(operation, user);
return rdd.map(new ConvertElementToRow(new LinkedHashSet<>(Arrays.asList(requiredColumns)), propertyNeedsConversion, converterByProperty), ClassTagConstants.ROW_CLASS_TAG);
} catch (final OperationException e) {
LOGGER.error("OperationException while executing operation {}", e);
return null;
}
}
use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements in project Gaffer by gchq.
the class FilterToOperationConverterTest method testSingleGroup.
@Test
public void testSingleGroup() throws OperationException {
final Schema schema = getSchema();
final SQLContext sqlContext = getSqlContext("testSingleGroup");
final Filter[] filters = new Filter[1];
filters[0] = new EqualTo(SchemaToStructTypeConverter.GROUP, ENTITY_GROUP);
final FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
final AbstractGetRDD<?> operation = converter.getOperation();
assertTrue(operation instanceof GetRDDOfAllElements);
assertEquals(Collections.singleton(ENTITY_GROUP), operation.getView().getEntityGroups());
assertEquals(0, operation.getView().getEdgeGroups().size());
sqlContext.sparkContext().stop();
}
use of uk.gov.gchq.gaffer.spark.operation.scalardd.GetRDDOfAllElements in project Gaffer by gchq.
the class FilterToOperationConverterTest method testSpecifyPropertyFilters.
@Test
public void testSpecifyPropertyFilters() throws OperationException {
final Schema schema = getSchema();
final SQLContext sqlContext = getSqlContext("testSpecifyPropertyFilters");
final Filter[] filters = new Filter[1];
// GreaterThan
filters[0] = new GreaterThan("property1", 5);
FiltersToOperationConverter converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
AbstractGetRDD<?> operation = converter.getOperation();
assertTrue(operation instanceof GetRDDOfAllElements);
View opView = operation.getView();
List<ConsumerFunctionContext<String, FilterFunction>> entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
assertEquals(1, entityPostAggFilters.size());
assertEquals(new ArrayList<>(Collections.singleton("property1")), entityPostAggFilters.get(0).getSelection());
assertEquals(new IsMoreThan(5, false), entityPostAggFilters.get(0).getFunction());
for (final String edgeGroup : EDGE_GROUPS) {
final List<ConsumerFunctionContext<String, FilterFunction>> edgePostAggFilters = opView.getEdge(edgeGroup).getPostAggregationFilterFunctions();
assertEquals(1, edgePostAggFilters.size());
assertEquals(new ArrayList<>(Collections.singleton("property1")), edgePostAggFilters.get(0).getSelection());
assertEquals(new IsMoreThan(5, false), edgePostAggFilters.get(0).getFunction());
}
// LessThan
filters[0] = new LessThan("property4", 8L);
converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
operation = converter.getOperation();
assertTrue(operation instanceof GetRDDOfAllElements);
// Only groups ENTITY_GROUP and EDGE_GROUP should be in the view as only they have property4
opView = operation.getView();
entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
assertEquals(1, entityPostAggFilters.size());
assertEquals(new ArrayList<>(Collections.singleton("property4")), entityPostAggFilters.get(0).getSelection());
assertEquals(new IsLessThan(8L, false), entityPostAggFilters.get(0).getFunction());
List<ConsumerFunctionContext<String, FilterFunction>> edgePostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
assertEquals(1, edgePostAggFilters.size());
assertEquals(new ArrayList<>(Collections.singleton("property4")), edgePostAggFilters.get(0).getSelection());
assertEquals(new IsLessThan(8L, false), edgePostAggFilters.get(0).getFunction());
// And
final Filter left = new GreaterThan("property1", 5);
final Filter right = new GreaterThan("property4", 8L);
filters[0] = new And(left, right);
converter = new FiltersToOperationConverter(sqlContext, getViewFromSchema(schema), schema, filters);
operation = converter.getOperation();
assertTrue(operation instanceof GetRDDOfAllElements);
// Only groups ENTITY_GROUP and EDGE_GROUP should be in the view as only they have property1 and property4
opView = operation.getView();
entityPostAggFilters = opView.getEntity(ENTITY_GROUP).getPostAggregationFilterFunctions();
assertEquals(2, entityPostAggFilters.size());
final ArrayList<String> expectedProperties = new ArrayList<>();
expectedProperties.add("property1");
expectedProperties.add("property4");
assertEquals(1, entityPostAggFilters.get(0).getSelection().size());
assertEquals(expectedProperties.get(0), entityPostAggFilters.get(0).getSelection().get(0));
assertEquals(1, entityPostAggFilters.get(1).getSelection().size());
assertEquals(expectedProperties.get(1), entityPostAggFilters.get(1).getSelection().get(0));
final ArrayList<FilterFunction> expectedFunctions = new ArrayList<>();
expectedFunctions.add(new IsMoreThan(5, false));
expectedFunctions.add(new IsMoreThan(8L, false));
assertEquals(expectedFunctions.get(0), entityPostAggFilters.get(0).getFunction());
assertEquals(expectedFunctions.get(1), entityPostAggFilters.get(1).getFunction());
edgePostAggFilters = opView.getEdge(EDGE_GROUP).getPostAggregationFilterFunctions();
assertEquals(2, edgePostAggFilters.size());
assertEquals(1, edgePostAggFilters.get(0).getSelection().size());
assertEquals(expectedProperties.get(0), edgePostAggFilters.get(0).getSelection().get(0));
assertEquals(1, edgePostAggFilters.get(1).getSelection().size());
assertEquals(expectedProperties.get(1), edgePostAggFilters.get(1).getSelection().get(0));
sqlContext.sparkContext().stop();
}
Aggregations