use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducer in project flink by apache.
the class BranchingPlansCompilerTest method testBranchingUnion.
@Test
public void testBranchingUnion() {
try {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> source1 = env.generateSequence(0, 1);
DataSet<Long> source2 = env.generateSequence(0, 1);
DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 1");
DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1");
DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");
DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2");
DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2");
DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3");
DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3).join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 2");
join2.output(new DiscardingOutputFormat<Long>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
// Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducer in project flink by apache.
the class BranchingPlansCompilerTest method testMultipleIterations.
@Test
public void testMultipleIterations() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(100);
DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
DataSet<String> reduced = input.map(new IdentityMapper<String>()).reduceGroup(new Top1GroupReducer<String>());
IterativeDataSet<String> iteration1 = input.iterate(100);
IterativeDataSet<String> iteration2 = input.iterate(20);
IterativeDataSet<String> iteration3 = input.iterate(17);
iteration1.closeWith(iteration1.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc1")).output(new DiscardingOutputFormat<String>());
iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(reduced, "bc2")).output(new DiscardingOutputFormat<String>());
iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()).withBroadcastSet(reduced, "bc3")).output(new DiscardingOutputFormat<String>());
Plan plan = env.createProgramPlan();
try {
compileNoStats(plan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducer in project flink by apache.
the class GroupOrderTest method testReduceWithGroupOrder.
@Test
public void testReduceWithGroupOrder() {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple4<Long, Long, Long, Long>> set1 = env.readCsvFile("/tmp/fake.csv").types(Long.class, Long.class, Long.class, Long.class);
set1.groupBy(1).sortGroup(3, Order.DESCENDING).reduceGroup(new IdentityGroupReducer<Tuple4<Long, Long, Long, Long>>()).name("Reduce").output(new DiscardingOutputFormat<Tuple4<Long, Long, Long, Long>>()).name("Sink");
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan;
try {
oPlan = compileNoStats(plan);
} catch (CompilerException ce) {
ce.printStackTrace();
fail("The pact compiler is unable to compile this plan correctly.");
// silence the compiler
return;
}
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
SinkPlanNode sinkNode = resolver.getNode("Sink");
SingleInputPlanNode reducer = resolver.getNode("Reduce");
// verify the strategies
Assert.assertEquals(ShipStrategyType.FORWARD, sinkNode.getInput().getShipStrategy());
Assert.assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());
Channel c = reducer.getInput();
Assert.assertEquals(LocalStrategy.SORT, c.getLocalStrategy());
FieldList ship = new FieldList(1);
FieldList local = new FieldList(1, 3);
Assert.assertEquals(ship, c.getShipStrategyKeys());
Assert.assertEquals(local, c.getLocalStrategyKeys());
Assert.assertTrue(c.getLocalStrategySortOrder()[0] == reducer.getSortOrders(0)[0]);
// check that we indeed sort descending
Assert.assertEquals(false, c.getLocalStrategySortOrder()[1]);
}
use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducer in project flink by apache.
the class ParallelismChangeTest method checkPropertyHandlingWithIncreasingGlobalParallelism2.
/**
* Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all
* properties).
*
* <p>Increases parallelism between 2nd map and 2nd reduce, so the hash partitioning from 1st
* reduce is not reusable. Expected to re-establish partitioning between map and reduce (hash).
*/
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism2() {
final int p = DEFAULT_PARALLELISM;
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(p);
DataSet<Long> set1 = env.generateSequence(0, 1).setParallelism(p);
set1.map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p).name("Map1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p).name("Reduce1").map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p).name("Map2").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Reduce2").output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when reducer 1 distributes its data across the instances of map2, it needs to employ a
// local hash method,
// because map2 has twice as many instances and key/value pairs with the same key need to be
// processed by the same
// mapper respectively reducer
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, mapIn);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, reduceIn);
}
use of org.apache.flink.optimizer.testfunctions.IdentityGroupReducer in project flink by apache.
the class UnionReplacementTest method testUnionForwardOutput.
/**
* Tests that a the outgoing connection of a Union node is FORWARD. See FLINK-9031 for a bug
* report.
*
* <p>The issue is quite hard to reproduce as the plan choice seems to depend on the enumeration
* order due to lack of plan costs. This test is a smaller variant of the job that was reported
* to fail.
*
* <p>/-\ /- PreFilter1 -\-/- Union - PostFilter1 - Reducer1 -\ Src -< >- Union -< X >- Union -
* Out \-/ \- PreFilter2 -/-\- Union - PostFilter2 - Reducer2 -/
*/
@Test
public void testUnionForwardOutput() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> u1 = src1.union(src1).map(new IdentityMapper<>());
DataSet<Tuple2<Long, Long>> s1 = u1.filter(x -> true).name("preFilter1");
DataSet<Tuple2<Long, Long>> s2 = u1.filter(x -> true).name("preFilter2");
DataSet<Tuple2<Long, Long>> reduced1 = s1.union(s2).filter(x -> true).name("postFilter1").groupBy(0).reduceGroup(new IdentityGroupReducer<>()).name("reducer1");
DataSet<Tuple2<Long, Long>> reduced2 = s1.union(s2).filter(x -> true).name("postFilter2").groupBy(1).reduceGroup(new IdentityGroupReducer<>()).name("reducer2");
reduced1.union(reduced2).output(new DiscardingOutputFormat<>());
// -----------------------------------------------------------------------------------------
// Verify optimized plan
// -----------------------------------------------------------------------------------------
OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
SingleInputPlanNode unionOut1 = resolver.getNode("postFilter1");
SingleInputPlanNode unionOut2 = resolver.getNode("postFilter2");
assertEquals(ShipStrategyType.FORWARD, unionOut1.getInput().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, unionOut2.getInput().getShipStrategy());
}
Aggregations