use of org.apache.flink.graph.utils.Tuple3ToEdgeMap in project flink by apache.
the class GSACompilerTest method testGSACompiler.
@Test
public void testGSACompiler() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple3<>(1L, 2L, NullValue.getInstance())).map(new Tuple3ToEdgeMap<Long, NullValue>());
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new InitVertices(), env);
DataSet<Vertex<Long, Long>> result = graph.runGatherSumApplyIteration(new GatherNeighborIds(), new SelectMinId(), new UpdateComponentId(), 100).getVertices();
result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
}
Plan p = env.createProgramPlan("GSA Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
assertEquals(PartitioningProperty.HASH_PARTITIONED, sink.getGlobalProperties().getPartitioning());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the solution set join and the delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
// this is only true if the update function preserves the partitioning
assertTrue(ssDelta instanceof DualInputPlanNode);
DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());
// check the workset set join
SingleInputPlanNode sumReducer = (SingleInputPlanNode) ssJoin.getInput1().getSource();
SingleInputPlanNode gatherMapper = (SingleInputPlanNode) sumReducer.getInput().getSource();
DualInputPlanNode edgeJoin = (DualInputPlanNode) gatherMapper.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
// input1 is the workset
assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput1().getShipStrategy());
// input2 is the edges
assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput2().getShipStrategy());
assertTrue(edgeJoin.getInput2().getTempMode().isCached());
assertEquals(new FieldList(0), edgeJoin.getInput2().getShipStrategyKeys());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.graph.utils.Tuple3ToEdgeMap in project flink by apache.
the class GSATranslationTest method testTranslation.
@Test
public void testTranslation() {
try {
final String ITERATION_NAME = "Test Name";
final String AGGREGATOR_NAME = "AggregatorName";
final String BC_SET_GATHER_NAME = "gather messages";
final String BC_SET_SUM_NAME = "sum updates";
final String BC_SET_APLLY_NAME = "apply updates";
final int NUM_ITERATIONS = 13;
final int ITERATION_parallelism = 77;
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Long> bcGather = env.fromElements(1L);
DataSet<Long> bcSum = env.fromElements(1L);
DataSet<Long> bcApply = env.fromElements(1L);
DataSet<Vertex<Long, Long>> result;
// ------------ construct the test program ------------------
{
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple3<>(1L, 2L, NullValue.getInstance())).map(new Tuple3ToEdgeMap<Long, NullValue>());
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new InitVertices(), env);
GSAConfiguration parameters = new GSAConfiguration();
parameters.registerAggregator(AGGREGATOR_NAME, new LongSumAggregator());
parameters.setName(ITERATION_NAME);
parameters.setParallelism(ITERATION_parallelism);
parameters.addBroadcastSetForGatherFunction(BC_SET_GATHER_NAME, bcGather);
parameters.addBroadcastSetForSumFunction(BC_SET_SUM_NAME, bcSum);
parameters.addBroadcastSetForApplyFunction(BC_SET_APLLY_NAME, bcApply);
result = graph.runGatherSumApplyIteration(new GatherNeighborIds(), new SelectMinId(), new UpdateComponentId(), NUM_ITERATIONS, parameters).getVertices();
result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
}
// ------------- validate the java program ----------------
assertTrue(result instanceof DeltaIterationResultSet);
DeltaIterationResultSet<?, ?> resultSet = (DeltaIterationResultSet<?, ?>) result;
DeltaIteration<?, ?> iteration = resultSet.getIterationHead();
// check the basic iteration properties
assertEquals(NUM_ITERATIONS, resultSet.getMaxIterations());
assertArrayEquals(new int[] { 0 }, resultSet.getKeyPositions());
assertEquals(ITERATION_parallelism, iteration.getParallelism());
assertEquals(ITERATION_NAME, iteration.getName());
assertEquals(AGGREGATOR_NAME, iteration.getAggregators().getAllRegisteredAggregators().iterator().next().getName());
// validate that the semantic properties are set as they should
TwoInputUdfOperator<?, ?, ?, ?> solutionSetJoin = (TwoInputUdfOperator<?, ?, ?, ?>) resultSet.getNextWorkset();
assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(0, 0).contains(0));
assertTrue(solutionSetJoin.getSemanticProperties().getForwardingTargetFields(1, 0).contains(0));
SingleInputUdfOperator<?, ?, ?> sumReduce = (SingleInputUdfOperator<?, ?, ?>) solutionSetJoin.getInput1();
SingleInputUdfOperator<?, ?, ?> gatherMap = (SingleInputUdfOperator<?, ?, ?>) sumReduce.getInput();
// validate that the broadcast sets are forwarded
assertEquals(bcGather, gatherMap.getBroadcastSets().get(BC_SET_GATHER_NAME));
assertEquals(bcSum, sumReduce.getBroadcastSets().get(BC_SET_SUM_NAME));
assertEquals(bcApply, solutionSetJoin.getBroadcastSets().get(BC_SET_APLLY_NAME));
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations