use of org.apache.flink.optimizer.testfunctions.IdentityMapper in project flink by apache.
the class JoinCustomPartitioningTest method testIncompatibleHashAndCustomPartitioning.
@Test
public void testIncompatibleHashAndCustomPartitioning() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Long, Long, Long>> input = env.fromElements(new Tuple3<Long, Long, Long>(0L, 0L, 0L));
DataSet<Tuple3<Long, Long, Long>> partitioned = input.partitionCustom(new Partitioner<Long>() {
@Override
public int partition(Long key, int numPartitions) {
return 0;
}
}, 0).map(new IdentityMapper<Tuple3<Long, Long, Long>>()).withForwardedFields("0", "1", "2");
DataSet<Tuple3<Long, Long, Long>> grouped = partitioned.distinct(0, 1).groupBy(1).sortGroup(0, Order.ASCENDING).reduceGroup(new IdentityGroupReducerCombinable<Tuple3<Long, Long, Long>>()).withForwardedFields("0", "1");
grouped.join(partitioned, JoinHint.REPARTITION_HASH_FIRST).where(0).equalTo(0).with(new DummyFlatJoinFunction<Tuple3<Long, Long, Long>>()).output(new DiscardingOutputFormat<Tuple3<Long, Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
DualInputPlanNode coGroup = (DualInputPlanNode) sink.getInput().getSource();
assertEquals(ShipStrategyType.PARTITION_HASH, coGroup.getInput1().getShipStrategy());
assertTrue(coGroup.getInput2().getShipStrategy() == ShipStrategyType.PARTITION_HASH || coGroup.getInput2().getShipStrategy() == ShipStrategyType.FORWARD);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.IdentityMapper in project flink by apache.
the class UnionReplacementTest method testUnionForwardOutput.
/**
* Tests that a the outgoing connection of a Union node is FORWARD. See FLINK-9031 for a bug
* report.
*
* <p>The issue is quite hard to reproduce as the plan choice seems to depend on the enumeration
* order due to lack of plan costs. This test is a smaller variant of the job that was reported
* to fail.
*
* <p>/-\ /- PreFilter1 -\-/- Union - PostFilter1 - Reducer1 -\ Src -< >- Union -< X >- Union -
* Out \-/ \- PreFilter2 -/-\- Union - PostFilter2 - Reducer2 -/
*/
@Test
public void testUnionForwardOutput() throws Exception {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> u1 = src1.union(src1).map(new IdentityMapper<>());
DataSet<Tuple2<Long, Long>> s1 = u1.filter(x -> true).name("preFilter1");
DataSet<Tuple2<Long, Long>> s2 = u1.filter(x -> true).name("preFilter2");
DataSet<Tuple2<Long, Long>> reduced1 = s1.union(s2).filter(x -> true).name("postFilter1").groupBy(0).reduceGroup(new IdentityGroupReducer<>()).name("reducer1");
DataSet<Tuple2<Long, Long>> reduced2 = s1.union(s2).filter(x -> true).name("postFilter2").groupBy(1).reduceGroup(new IdentityGroupReducer<>()).name("reducer2");
reduced1.union(reduced2).output(new DiscardingOutputFormat<>());
// -----------------------------------------------------------------------------------------
// Verify optimized plan
// -----------------------------------------------------------------------------------------
OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
SingleInputPlanNode unionOut1 = resolver.getNode("postFilter1");
SingleInputPlanNode unionOut2 = resolver.getNode("postFilter2");
assertEquals(ShipStrategyType.FORWARD, unionOut1.getInput().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, unionOut2.getInput().getShipStrategy());
}
use of org.apache.flink.optimizer.testfunctions.IdentityMapper in project flink by apache.
the class ParallelismChangeTest method checkPropertyHandlingWithIncreasingGlobalParallelism1.
/**
* Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all
* properties).
*
* <p>Increases parallelism between 1st reduce and 2nd map, so the hash partitioning from 1st
* reduce is not reusable. Expected to re-establish partitioning between reduce and map, via
* hash, because random is a full network transit as well.
*/
@Test
public void checkPropertyHandlingWithIncreasingGlobalParallelism1() {
final int p = DEFAULT_PARALLELISM;
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(p);
DataSet<Long> set1 = env.generateSequence(0, 1).setParallelism(p);
set1.map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p).name("Map1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p).name("Reduce1").map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Map2").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Reduce2").output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when reducer 1 distributes its data across the instances of map2, it needs to employ a
// local hash method,
// because map2 has twice as many instances and key/value pairs with the same key need to be
// processed by the same
// mapper respectively reducer
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
ShipStrategyType redIn = red2Node.getInput().getShipStrategy();
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.PARTITION_HASH, mapIn);
Assert.assertEquals("Invalid ship strategy for an operator.", ShipStrategyType.FORWARD, redIn);
}
use of org.apache.flink.optimizer.testfunctions.IdentityMapper in project flink by apache.
the class ParallelismChangeTest method checkPropertyHandlingWithIncreasingLocalParallelism.
/**
* Simple Job: Map -> Reduce -> Map -> Reduce. All functions preserve all fields (hence all
* properties).
*
* <p>Increases parallelism between 1st reduce and 2nd map, such that more tasks are on one
* instance. Expected to re-establish partitioning between map and reduce via a local hash.
*/
@Test
public void checkPropertyHandlingWithIncreasingLocalParallelism() {
final int p = DEFAULT_PARALLELISM * 2;
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(p);
DataSet<Long> set1 = env.generateSequence(0, 1).setParallelism(p);
set1.map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p).name("Map1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p).name("Reduce1").map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Map2").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Reduce2").output(new DiscardingOutputFormat<Long>()).setParallelism(p * 2).name("Sink");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when reducer 1 distributes its data across the instances of map2, it needs to employ a
// local hash method,
// because map2 has twice as many instances and key/value pairs with the same key need to be
// processed by the same
// mapper respectively reducer
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
ShipStrategyType mapIn = map2Node.getInput().getShipStrategy();
ShipStrategyType reduceIn = red2Node.getInput().getShipStrategy();
Assert.assertTrue("Invalid ship strategy for an operator.", (ShipStrategyType.PARTITION_RANDOM == mapIn && ShipStrategyType.PARTITION_HASH == reduceIn) || (ShipStrategyType.PARTITION_HASH == mapIn && ShipStrategyType.FORWARD == reduceIn));
}
use of org.apache.flink.optimizer.testfunctions.IdentityMapper in project flink by apache.
the class ParallelismChangeTest method checkPropertyHandlingWithDecreasingParallelism.
@Test
public void checkPropertyHandlingWithDecreasingParallelism() {
final int p = DEFAULT_PARALLELISM;
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(p);
env.generateSequence(0, 1).setParallelism(p * 2).map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Map1").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p * 2).name("Reduce1").map(new IdentityMapper<Long>()).withForwardedFields("*").setParallelism(p).name("Map2").groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).withForwardedFields("*").setParallelism(p).name("Reduce2").output(new DiscardingOutputFormat<Long>()).setParallelism(p).name("Sink");
Plan plan = env.createProgramPlan();
// submit the plan to the compiler
OptimizedPlan oPlan = compileNoStats(plan);
// check the optimized Plan
// when reducer 1 distributes its data across the instances of map2, it needs to employ a
// local hash method,
// because map2 has twice as many instances and key/value pairs with the same key need to be
// processed by the same
// mapper respectively reducer
SinkPlanNode sinkNode = oPlan.getDataSinks().iterator().next();
SingleInputPlanNode red2Node = (SingleInputPlanNode) sinkNode.getPredecessor();
SingleInputPlanNode map2Node = (SingleInputPlanNode) red2Node.getPredecessor();
Assert.assertTrue("The no sorting local strategy.", LocalStrategy.SORT == red2Node.getInput().getLocalStrategy() || LocalStrategy.SORT == map2Node.getInput().getLocalStrategy());
Assert.assertTrue("The no partitioning ship strategy.", ShipStrategyType.PARTITION_HASH == red2Node.getInput().getShipStrategy() || ShipStrategyType.PARTITION_HASH == map2Node.getInput().getShipStrategy());
}
Aggregations