use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducer in project flink by apache.
the class HardPlansCompilationTest method testTicket158.
/**
* Source -> Map -> Reduce -> Cross -> Reduce -> Cross -> Reduce ->
* |--------------------------/ /
* |--------------------------------------------/
*
* First cross has SameKeyFirst output contract
*/
@Test
public void testTicket158() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> set1 = env.generateSequence(0, 1);
set1.map(new IdentityMapper<Long>()).name("Map1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1").cross(set1).with(new IdentityCrosser<Long>()).withForwardedFieldsFirst("*").name("Cross1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce2").cross(set1).with(new IdentityCrosser<Long>()).name("Cross2").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce3").output(new DiscardingOutputFormat<Long>()).name("Sink");
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
jobGen.compileJobGraph(oPlan);
}
use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducer in project flink by apache.
the class UnionPropertyPropagationTest method testUnion1.
@Test
public void testUnion1() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> sourceA = env.generateSequence(0, 1);
DataSet<Long> sourceB = env.generateSequence(0, 1);
DataSet<Long> redA = sourceA.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>());
DataSet<Long> redB = sourceB.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>());
redA.union(redB).groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).output(new DiscardingOutputFormat<Long>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
// Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
if (visitable instanceof SingleInputPlanNode && visitable.getProgramOperator() instanceof GroupReduceOperatorBase) {
for (Channel inConn : visitable.getInputs()) {
Assert.assertTrue("Reduce should just forward the input if it is already partitioned", inConn.getShipStrategy() == ShipStrategyType.FORWARD);
}
//just check latest ReduceNode
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
// DO NOTHING
}
});
}
use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducer in project flink by apache.
the class ReduceAllTest method testReduce.
@Test
public void testReduce() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> set1 = env.generateSequence(0, 1);
set1.reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce1").output(new DiscardingOutputFormat<Long>()).name("Sink");
Plan plan = env.createProgramPlan();
try {
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
jobGen.compileJobGraph(oPlan);
} catch (CompilerException ce) {
ce.printStackTrace();
fail("The pact compiler is unable to compile this plan correctly");
}
}
use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducer in project flink by apache.
the class ParallelismChangeTest method checkPropertyHandlingWithIncreasingGlobalParallelism1.
/**
* Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
*
* Increases parallelism between 1st reduce and 2nd map, so the hash partitioning from 1st reduce is not reusable.
* Expected to re-establish partitioning between reduce and map, via hash, because random is a full network
* transit as well.
*/
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism1() {
final int p = DEFAULT_PARALLELISM;
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(p);
DataSet<Long> set1 = env.generateSequence(0, 1).setParallelism(p);
set1.map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p).name("Map1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p).name("Reduce1").map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Map2").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Reduce2").output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
// mapper respectively reducer
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
ShipStrategyType redIn = red2Node.getInput().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, mapIn);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, redIn);
}
use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducer in project flink by apache.
the class ParallelismChangeTest method checkPropertyHandlingWithIncreasingLocalParallelism.
/**
* Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all properties).
*
* Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one instance.
* Expected to re-establish partitioning between map and reduce via a local hash.
*/
@Test
public void checkPropertyHandlingWithIncreasingLocalParallelism() {
final int p = DEFAULT_PARALLELISM * 2;
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(p);
DataSet<Long> set1 = env.generateSequence(0, 1).setParallelism(p);
set1.map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p).name("Map1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p).name("Reduce1").map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Map2").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Reduce2").output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when reducer 1 distributes its data across the instances of map2, it needs to employ a local hash method,
// because map2 has twice as many instances and key/value pairs with the same key need to be processed by the same
// mapper respectively reducer
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
Assert.assertTrue("Invalid ship strategy for an operator.", (ShipStrategyType.PARTITION_RANDOM == mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || (ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn));
}
Aggregations