use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.
the class DistinctCompilationTest method testDistinctWithCombineHint.
@Test
public void testDistinctWithCombineHint() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
data.distinct().setCombineHint(CombineHint.HASH).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, combineNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check that both reduce and combiner have the same strategy
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0, 1), reduceNode.getKeys(0));
assertEquals(new FieldList(0, 1), combineNode.getKeys(0));
assertEquals(new FieldList(0, 1), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.
the class BranchingPlansCompilerTest method testBranchingWithMultipleDataSinks.
/**
*
* <pre>
* (SINK A)
* | (SINK B) (SINK C)
* CROSS / /
* / \ | +------+
* / \ | /
* REDUCE MATCH2
* | +---/ \
* \ / |
* MAP |
* | |
* COGROUP MATCH1
* / \ / \
* (SRC A) (SRC B) (SRC C)
* </pre>
*/
@Test
public void testBranchingWithMultipleDataSinks() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(1, 10000000).map(new Duplicator<Long>());
DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(1, 10000000).map(new Duplicator<Long>());
DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(1, 10000000).map(new Duplicator<Long>());
DataSet<Tuple2<Long, Long>> mapped = sourceA.coGroup(sourceB).where(0).equalTo(1).with(new CoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override
public void coGroup(Iterable<Tuple2<Long, Long>> first, Iterable<Tuple2<Long, Long>> second, Collector<Tuple2<Long, Long>> out) {
}
}).map(new IdentityMapper<Tuple2<Long, Long>>());
DataSet<Tuple2<Long, Long>> joined = sourceB.join(sourceC).where(0).equalTo(1).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
DataSet<Tuple2<Long, Long>> joined2 = mapped.join(joined).where(1).equalTo(1).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
DataSet<Tuple2<Long, Long>> reduced = mapped.groupBy(1).reduceGroup(new Top1GroupReducer<Tuple2<Long, Long>>());
reduced.cross(joined2).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>());
joined2.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
joined2.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileNoStats(plan);
new JobGraphGenerator().compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.
the class BranchingPlansCompilerTest method testBranchingWithMultipleDataSinks2.
/**
*
* <pre>
* (SRC A)
* |
* (MAP A)
* / \
* (MAP B) (MAP C)
* / / \
* (SINK A) (SINK B) (SINK C)
* </pre>
*/
@SuppressWarnings("unchecked")
@Test
public void testBranchingWithMultipleDataSinks2() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> source = env.generateSequence(1, 10000);
DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
DataSet<Long> mappedB = mappedA.map(new IdentityMapper<Long>());
DataSet<Long> mappedC = mappedA.map(new IdentityMapper<Long>());
mappedB.output(new DiscardingOutputFormat<Long>());
mappedC.output(new DiscardingOutputFormat<Long>());
mappedC.output(new DiscardingOutputFormat<Long>());
Plan plan = env.createProgramPlan();
Set<Operator<?>> sinks = new HashSet<Operator<?>>(plan.getDataSinks());
OptimizedPlan oPlan = compileNoStats(plan);
// ---------- check the optimizer plan ----------
// number of sinks
assertEquals("Wrong number of data sinks.", 3, oPlan.getDataSinks().size());
// remove matching sinks to check relation
for (SinkPlanNode sink : oPlan.getDataSinks()) {
assertTrue(sinks.remove(sink.getProgramOperator()));
}
assertTrue(sinks.isEmpty());
new JobGraphGenerator().compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.
the class BranchingPlansCompilerTest method testCostComputationWithMultipleDataSinks.
@Test
public void testCostComputationWithMultipleDataSinks() {
final int SINKS = 5;
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> source = env.generateSequence(1, 10000);
DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
DataSet<Long> mappedC = source.map(new IdentityMapper<Long>());
for (int sink = 0; sink < SINKS; sink++) {
mappedA.output(new DiscardingOutputFormat<Long>());
mappedC.output(new DiscardingOutputFormat<Long>());
}
Plan plan = env.createProgramPlan("Plans With Multiple Data Sinks");
OptimizedPlan oPlan = compileNoStats(plan);
new JobGraphGenerator().compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.OptimizedPlan in project flink by apache.
the class BranchingPlansCompilerTest method testBranchingUnion.
@Test
public void testBranchingUnion() {
try {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> source1 = env.generateSequence(0, 1);
DataSet<Long> source2 = env.generateSequence(0, 1);
DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 1");
DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1");
DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");
DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2");
DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2");
DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3");
DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3).join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 2");
join2.output(new DiscardingOutputFormat<Long>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
//Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
Aggregations