use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class ParallelismChangeTest method checkPropertyHandlingWithIncreasingLocalParallelism.
/**
* Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
*
* Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance.
* Expected to re-establish partitioning between map and reduce via a local hash.
*/
@Test
public void checkPropertyHandlingWithIncreasingLocalParallelism() {
final int p = DEFAULT_PARALLELISM * 2;
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(p);
DataSet<Long> set1 = env.generateSequence(0, 1).setParallelism(p);
set1.map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p).name("Map1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p).name("Reduce1").map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Map2").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Reduce2").output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
// mapper respectively reducer
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
Assert.assertTrue("Invalid ship strategy for an operator.", (ShipStrategyType.PARTITION_RANDOM == mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || (ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn));
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class ParallelismChangeTest method checkPropertyHandlingWithDecreasingParallelism.
@Test
public void checkPropertyHandlingWithDecreasingParallelism() {
final int p = DEFAULT_PARALLELISM;
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(p);
env.generateSequence(0, 1).setParallelism(p * 2).map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Map1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Reduce1").map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p).name("Map2").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p).name("Reduce2").output(new DiscardingOutputFormat<Long>()).setParallelism(p).name("Sink");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
// mapper respectively reducer
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
Assert.assertTrue("The no sorting local strategy.", LocalStrategy.SORT == red2Node.getInput().getLocalStrategy() || LocalStrategy.SORT == map2Node.getInput().getLocalStrategy());
Assert.assertTrue("The no partitioning ship strategy.", ShipStrategyType.PARTITION_HASH == red2Node.getInput().getShipStrategy() || ShipStrategyType.PARTITION_HASH == map2Node.getInput().getShipStrategy());
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class PartitionPushdownTest method testPartitioningReused.
@Test
public void testPartitioningReused() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
@SuppressWarnings("unchecked") DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
input.groupBy(0).sum(1).groupBy(0, 1).sum(2).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
SingleInputPlanNode agg2Reducer = (SingleInputPlanNode) sink.getInput().getSource();
SingleInputPlanNode agg1Reducer = (SingleInputPlanNode) agg2Reducer.getInput().getSource();
assertEquals(ShipStrategyType.FORWARD, agg2Reducer.getInput().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_HASH, agg1Reducer.getInput().getShipStrategy());
assertEquals(new FieldList(0), agg1Reducer.getInput().getShipStrategyKeys());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class PipelineBreakerTest method testPipelineBreakerWithBroadcastVariable.
@Test
public void testPipelineBreakerWithBroadcastVariable() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setExecutionMode(ExecutionMode.PIPELINED);
env.setParallelism(64);
DataSet<Long> source = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
DataSet<Long> result = source.map(new IdentityMapper<Long>()).map(new IdentityMapper<Long>()).withBroadcastSet(source, "bc");
result.output(new DiscardingOutputFormat<Long>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource();
SingleInputPlanNode mapperInput = (SingleInputPlanNode) mapper.getInput().getSource();
assertEquals(TempMode.NONE, mapper.getInput().getTempMode());
assertEquals(TempMode.NONE, mapper.getBroadcastInputs().get(0).getTempMode());
assertEquals(DataExchangeMode.BATCH, mapperInput.getInput().getDataExchangeMode());
assertEquals(DataExchangeMode.BATCH, mapper.getBroadcastInputs().get(0).getDataExchangeMode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class PipelineBreakerTest method testPipelineBreakerBroadcastedAllReduce.
@Test
public void testPipelineBreakerBroadcastedAllReduce() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setExecutionMode(ExecutionMode.PIPELINED);
env.setParallelism(64);
DataSet<Long> sourceWithMapper = env.generateSequence(1, 10).map(new IdentityMapper<Long>());
DataSet<Long> bcInput1 = sourceWithMapper.map(new IdentityMapper<Long>()).reduce(new SelectOneReducer<Long>());
DataSet<Long> bcInput2 = env.generateSequence(1, 10);
DataSet<Long> result = sourceWithMapper.map(new IdentityMapper<Long>()).withBroadcastSet(bcInput1, "bc1").withBroadcastSet(bcInput2, "bc2");
result.output(new DiscardingOutputFormat<Long>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
SingleInputPlanNode mapper = (SingleInputPlanNode) sink.getInput().getSource();
assertEquals(TempMode.NONE, mapper.getInput().getTempMode());
assertEquals(DataExchangeMode.BATCH, mapper.getInput().getDataExchangeMode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations