use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.
the class SpargelCompilerTest method testSpargelCompilerWithBroadcastVariable.
@SuppressWarnings("serial")
@Test
public void testSpargelCompilerWithBroadcastVariable() {
try {
final String BC_VAR_NAME = "borat variable";
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Long> bcVar = env.fromElements(1L);
DataSet<Vertex<Long, Long>> initialVertices = env.fromElements(new Tuple2<>(1L, 1L), new Tuple2<>(2L, 2L)).map(new Tuple2ToVertexMap<Long, Long>());
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple2<>(1L, 2L)).map(new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {
public Edge<Long, NullValue> map(Tuple2<Long, Long> edge) {
return new Edge<>(edge.f0, edge.f1, NullValue.getInstance());
}
});
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(initialVertices, edges, env);
ScatterGatherConfiguration parameters = new ScatterGatherConfiguration();
parameters.addBroadcastSetForScatterFunction(BC_VAR_NAME, bcVar);
parameters.addBroadcastSetForGatherFunction(BC_VAR_NAME, bcVar);
DataSet<Vertex<Long, Long>> result = graph.runScatterGatherIteration(new ConnectedComponents.CCMessenger<Long, Long>(BasicTypeInfo.LONG_TYPE_INFO), new ConnectedComponents.CCUpdater<Long, Long>(), 100).getVertices();
result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
}
Plan p = env.createProgramPlan("Spargel Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the solution set join and the delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
// this is only true if the update functions preserves the partitioning
assertTrue(ssDelta instanceof DualInputPlanNode);
DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());
// check the workset set join
DualInputPlanNode edgeJoin = (DualInputPlanNode) ssJoin.getInput1().getSource();
assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput2().getShipStrategy());
assertTrue(edgeJoin.getInput1().getTempMode().isCached());
assertEquals(new FieldList(0), edgeJoin.getInput1().getShipStrategyKeys());
// check that the initial partitioning is pushed out of the loop
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput1().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, iteration.getInput2().getShipStrategy());
assertEquals(new FieldList(0), iteration.getInput1().getShipStrategyKeys());
assertEquals(new FieldList(0), iteration.getInput2().getShipStrategyKeys());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.
the class GSACompilerTest method testGSACompiler.
@Test
public void testGSACompiler() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// compose test program
{
DataSet<Edge<Long, NullValue>> edges = env.fromElements(new Tuple3<>(1L, 2L, NullValue.getInstance())).map(new Tuple3ToEdgeMap<Long, NullValue>());
Graph<Long, Long, NullValue> graph = Graph.fromDataSet(edges, new InitVertices(), env);
DataSet<Vertex<Long, Long>> result = graph.runGatherSumApplyIteration(new GatherNeighborIds(), new SelectMinId(), new UpdateComponentId(), 100).getVertices();
result.output(new DiscardingOutputFormat<Vertex<Long, Long>>());
}
Plan p = env.createProgramPlan("GSA Connected Components");
OptimizedPlan op = compileNoStats(p);
// check the sink
SinkPlanNode sink = op.getDataSinks().iterator().next();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
assertEquals(DEFAULT_PARALLELISM, sink.getParallelism());
assertEquals(PartitioningProperty.HASH_PARTITIONED, sink.getGlobalProperties().getPartitioning());
// check the iteration
WorksetIterationPlanNode iteration = (WorksetIterationPlanNode) sink.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, iteration.getParallelism());
// check the solution set join and the delta
PlanNode ssDelta = iteration.getSolutionSetDeltaPlanNode();
// this is only true if the update function preserves the partitioning
assertTrue(ssDelta instanceof DualInputPlanNode);
DualInputPlanNode ssJoin = (DualInputPlanNode) ssDelta;
assertEquals(DEFAULT_PARALLELISM, ssJoin.getParallelism());
assertEquals(ShipStrategyType.PARTITION_HASH, ssJoin.getInput1().getShipStrategy());
assertEquals(new FieldList(0), ssJoin.getInput1().getShipStrategyKeys());
// check the workset set join
SingleInputPlanNode sumReducer = (SingleInputPlanNode) ssJoin.getInput1().getSource();
SingleInputPlanNode gatherMapper = (SingleInputPlanNode) sumReducer.getInput().getSource();
DualInputPlanNode edgeJoin = (DualInputPlanNode) gatherMapper.getInput().getSource();
assertEquals(DEFAULT_PARALLELISM, edgeJoin.getParallelism());
// input1 is the workset
assertEquals(ShipStrategyType.FORWARD, edgeJoin.getInput1().getShipStrategy());
// input2 is the edges
assertEquals(ShipStrategyType.PARTITION_HASH, edgeJoin.getInput2().getShipStrategy());
assertTrue(edgeJoin.getInput2().getTempMode().isCached());
assertEquals(new FieldList(0), edgeJoin.getInput2().getShipStrategyKeys());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.
the class DistinctCompilationTest method testDistinctWithFieldPositionKeyCombinable.
@Test
public void testDistinctWithFieldPositionKeyCombinable() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
DistinctOperator<Tuple2<String, Double>> reduced = data.distinct(1).name("reducer");
reduced.output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, combineNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check that both reduce and combiner have the same strategy
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(1), reduceNode.getKeys(0));
assertEquals(new FieldList(1), combineNode.getKeys(0));
assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.
the class GroupOrderTest method testReduceWithGroupOrder.
@Test
public void testReduceWithGroupOrder() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple4<Long, Long, Long, Long>> set1 = env.readCsvFile("/tmp/fake.csv").types(Long.class, Long.class, Long.class, Long.class);
set1.groupBy(1).sortGroup(3, Order.DESCENDING).reduceGroup(new IdentityGroupReducer<Tuple4<Long, Long, Long, Long>>()).name("Reduce").output(new DiscardingOutputFormat<Tuple4<Long, Long, Long, Long>>()).name("Sink");
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan;
try {
oPlan = compileNoStats(plan);
} catch (CompilerException ce) {
ce.printStackTrace();
fail("The pact compiler is unable to compile this plan correctly.");
// silence the compiler
return;
}
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
SinkPlanNode sinkNode = resolver.getNode("Sink");
SingleInputPlanNode reducer = resolver.getNode("Reduce");
// verify the strategies
Assert.assertEquals(ShipStrategyType.FORWARD, sinkNode.getInput().getShipStrategy());
Assert.assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());
Channel c = reducer.getInput();
Assert.assertEquals(LocalStrategy.SORT, c.getLocalStrategy());
FieldList ship = new FieldList(1);
FieldList local = new FieldList(1, 3);
Assert.assertEquals(ship, c.getShipStrategyKeys());
Assert.assertEquals(local, c.getLocalStrategyKeys());
Assert.assertTrue(c.getLocalStrategySortOrder()[0] == reducer.getSortOrders(0)[0]);
// check that we indeed sort descending
Assert.assertEquals(false, c.getLocalStrategySortOrder()[1]);
}
use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.
the class IterationsCompilerTest method testBulkIterationWithPartialSolutionProperties.
/**
* Tests that interesting properties can be pushed out of the bulk iteration. This requires
* that a NoOp node is appended to the step function which re-establishes the properties of
* the initial input. If this does not work, then Flink won't find a plan, because the optimizer
* will not consider plans where the partitioning is done after the partial solution node in
* this case (because of pruning).
* @throws Exception
*/
@Test
public void testBulkIterationWithPartialSolutionProperties() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple1<Long>> input1 = env.generateSequence(1, 10).map(new MapFunction<Long, Tuple1<Long>>() {
@Override
public Tuple1<Long> map(Long value) throws Exception {
return new Tuple1<>(value);
}
});
DataSet<Tuple1<Long>> input2 = env.generateSequence(1, 10).map(new MapFunction<Long, Tuple1<Long>>() {
@Override
public Tuple1<Long> map(Long value) throws Exception {
return new Tuple1<>(value);
}
});
DataSet<Tuple1<Long>> distinctInput = input1.distinct();
IterativeDataSet<Tuple1<Long>> iteration = distinctInput.iterate(10);
DataSet<Tuple1<Long>> iterationStep = iteration.coGroup(input2).where(0).equalTo(0).with(new CoGroupFunction<Tuple1<Long>, Tuple1<Long>, Tuple1<Long>>() {
@Override
public void coGroup(Iterable<Tuple1<Long>> first, Iterable<Tuple1<Long>> second, Collector<Tuple1<Long>> out) throws Exception {
Iterator<Tuple1<Long>> it = first.iterator();
if (it.hasNext()) {
out.collect(it.next());
}
}
});
DataSet<Tuple1<Long>> iterationResult = iteration.closeWith(iterationStep);
iterationResult.output(new DiscardingOutputFormat<Tuple1<Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
new JobGraphGenerator().compileJobGraph(op);
}
Aggregations