use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.
the class CachedMatchStrategyCompilerTest method testCorrectChoosing.
/**
* This test simulates a join of a big left side with a small right side inside of an iteration, where the small side is on a static path.
* Currently the best execution plan is a HYBRIDHASH_BUILD_SECOND_CACHED, where the small side is hashed and cached.
* This test also makes sure that all relevant plans are correctly enumerated by the optimizer.
*/
@Test
public void testCorrectChoosing() {
try {
Plan plan = getTestPlanRightStatic("");
SourceCollectorVisitor sourceCollector = new SourceCollectorVisitor();
plan.accept(sourceCollector);
for (GenericDataSourceBase<?, ?> s : sourceCollector.getSources()) {
if (s.getName().equals("bigFile")) {
this.setSourceStatistics(s, 10000000, 1000);
} else if (s.getName().equals("smallFile")) {
this.setSourceStatistics(s, 100, 100);
}
}
OptimizedPlan oPlan = compileNoStats(plan);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(oPlan);
DualInputPlanNode innerJoin = resolver.getNode("DummyJoiner");
// verify correct join strategy
assertEquals(DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED, innerJoin.getDriverStrategy());
assertEquals(TempMode.NONE, innerJoin.getInput1().getTempMode());
assertEquals(TempMode.NONE, innerJoin.getInput2().getTempMode());
new JobGraphGenerator().compileJobGraph(oPlan);
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail("Test errored: " + e.getMessage());
}
}
use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.
the class BranchingPlansCompilerTest method testBranchingWithMultipleDataSinks.
/**
*
* <pre>
* (SINK A)
* | (SINK B) (SINK C)
* CROSS / /
* / \ | +------+
* / \ | /
* REDUCE MATCH2
* | +---/ \
* \ / |
* MAP |
* | |
* COGROUP MATCH1
* / \ / \
* (SRC A) (SRC B) (SRC C)
* </pre>
*/
@Test
public void testBranchingWithMultipleDataSinks() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> sourceA = env.generateSequence(1, 10000000).map(new Duplicator<Long>());
DataSet<Tuple2<Long, Long>> sourceB = env.generateSequence(1, 10000000).map(new Duplicator<Long>());
DataSet<Tuple2<Long, Long>> sourceC = env.generateSequence(1, 10000000).map(new Duplicator<Long>());
DataSet<Tuple2<Long, Long>> mapped = sourceA.coGroup(sourceB).where(0).equalTo(1).with(new CoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>, Tuple2<Long, Long>>() {
@Override
public void coGroup(Iterable<Tuple2<Long, Long>> first, Iterable<Tuple2<Long, Long>> second, Collector<Tuple2<Long, Long>> out) {
}
}).map(new IdentityMapper<Tuple2<Long, Long>>());
DataSet<Tuple2<Long, Long>> joined = sourceB.join(sourceC).where(0).equalTo(1).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
DataSet<Tuple2<Long, Long>> joined2 = mapped.join(joined).where(1).equalTo(1).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
DataSet<Tuple2<Long, Long>> reduced = mapped.groupBy(1).reduceGroup(new Top1GroupReducer<Tuple2<Long, Long>>());
reduced.cross(joined2).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>());
joined2.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
joined2.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileNoStats(plan);
new JobGraphGenerator().compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.
the class BranchingPlansCompilerTest method testBranchingWithMultipleDataSinks2.
/**
*
* <pre>
* (SRC A)
* |
* (MAP A)
* / \
* (MAP B) (MAP C)
* / / \
* (SINK A) (SINK B) (SINK C)
* </pre>
*/
@SuppressWarnings("unchecked")
@Test
public void testBranchingWithMultipleDataSinks2() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> source = env.generateSequence(1, 10000);
DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
DataSet<Long> mappedB = mappedA.map(new IdentityMapper<Long>());
DataSet<Long> mappedC = mappedA.map(new IdentityMapper<Long>());
mappedB.output(new DiscardingOutputFormat<Long>());
mappedC.output(new DiscardingOutputFormat<Long>());
mappedC.output(new DiscardingOutputFormat<Long>());
Plan plan = env.createProgramPlan();
Set<Operator<?>> sinks = new HashSet<Operator<?>>(plan.getDataSinks());
OptimizedPlan oPlan = compileNoStats(plan);
// ---------- check the optimizer plan ----------
// number of sinks
assertEquals("Wrong number of data sinks.", 3, oPlan.getDataSinks().size());
// remove matching sinks to check relation
for (SinkPlanNode sink : oPlan.getDataSinks()) {
assertTrue(sinks.remove(sink.getProgramOperator()));
}
assertTrue(sinks.isEmpty());
new JobGraphGenerator().compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.
the class BranchingPlansCompilerTest method testCostComputationWithMultipleDataSinks.
@Test
public void testCostComputationWithMultipleDataSinks() {
final int SINKS = 5;
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> source = env.generateSequence(1, 10000);
DataSet<Long> mappedA = source.map(new IdentityMapper<Long>());
DataSet<Long> mappedC = source.map(new IdentityMapper<Long>());
for (int sink = 0; sink < SINKS; sink++) {
mappedA.output(new DiscardingOutputFormat<Long>());
mappedC.output(new DiscardingOutputFormat<Long>());
}
Plan plan = env.createProgramPlan("Plans With Multiple Data Sinks");
OptimizedPlan oPlan = compileNoStats(plan);
new JobGraphGenerator().compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plantranslate.JobGraphGenerator in project flink by apache.
the class BranchingPlansCompilerTest method testBranchingUnion.
@Test
public void testBranchingUnion() {
try {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> source1 = env.generateSequence(0, 1);
DataSet<Long> source2 = env.generateSequence(0, 1);
DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 1");
DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1");
DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");
DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2");
DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2");
DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3");
DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3).join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 2");
join2.output(new DiscardingOutputFormat<Long>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
//Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
Aggregations