use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class DataExchangeModeClosedBranchingTest method verifyBranchingJoiningPlan.
private void verifyBranchingJoiningPlan(ExecutionMode execMode, DataExchangeMode toMap, DataExchangeMode toReduceCombiner, DataExchangeMode toReduce, DataExchangeMode toFilter, DataExchangeMode toReduceSink, DataExchangeMode toJoin1, DataExchangeMode toJoin2, DataExchangeMode toOtherReduceCombiner, DataExchangeMode toOtherReduce, DataExchangeMode toFlatMap, DataExchangeMode toFlatMapSink, DataExchangeMode toCoGroup1, DataExchangeMode toCoGroup2, DataExchangeMode toCoGroupSink) {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setExecutionMode(execMode);
DataSet<Tuple2<Long, Long>> data = env.fromElements(33L, 44L).map(new MapFunction<Long, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Long value) {
return new Tuple2<Long, Long>(value, value);
}
});
DataSet<Tuple2<Long, Long>> reduced = data.groupBy(0).reduce(new SelectOneReducer<Tuple2<Long, Long>>());
reduced.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("reduceSink");
DataSet<Tuple2<Long, Long>> filtered = data.filter(new FilterFunction<Tuple2<Long, Long>>() {
@Override
public boolean filter(Tuple2<Long, Long> value) throws Exception {
return false;
}
});
DataSet<Tuple2<Long, Long>> joined = reduced.join(filtered).where(1).equalTo(1).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
joined.flatMap(new IdentityFlatMapper<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("flatMapSink");
joined.coGroup(filtered.groupBy(1).reduceGroup(new Top1GroupReducer<Tuple2<Long, Long>>())).where(0).equalTo(0).with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("cgSink");
OptimizedPlan optPlan = compileNoStats(env.createProgramPlan());
SinkPlanNode reduceSink = findSink(optPlan.getDataSinks(), "reduceSink");
SinkPlanNode flatMapSink = findSink(optPlan.getDataSinks(), "flatMapSink");
SinkPlanNode cgSink = findSink(optPlan.getDataSinks(), "cgSink");
DualInputPlanNode coGroupNode = (DualInputPlanNode) cgSink.getPredecessor();
DualInputPlanNode joinNode = (DualInputPlanNode) coGroupNode.getInput1().getSource();
SingleInputPlanNode otherReduceNode = (SingleInputPlanNode) coGroupNode.getInput2().getSource();
SingleInputPlanNode otherReduceCombinerNode = (SingleInputPlanNode) otherReduceNode.getPredecessor();
SingleInputPlanNode reduceNode = (SingleInputPlanNode) joinNode.getInput1().getSource();
SingleInputPlanNode reduceCombinerNode = (SingleInputPlanNode) reduceNode.getPredecessor();
assertEquals(reduceNode, reduceSink.getPredecessor());
SingleInputPlanNode filterNode = (SingleInputPlanNode) joinNode.getInput2().getSource();
assertEquals(filterNode, otherReduceCombinerNode.getPredecessor());
SingleInputPlanNode mapNode = (SingleInputPlanNode) filterNode.getPredecessor();
assertEquals(mapNode, reduceCombinerNode.getPredecessor());
SingleInputPlanNode flatMapNode = (SingleInputPlanNode) flatMapSink.getPredecessor();
assertEquals(joinNode, flatMapNode.getPredecessor());
// verify the data exchange modes
assertEquals(toReduceSink, reduceSink.getInput().getDataExchangeMode());
assertEquals(toFlatMapSink, flatMapSink.getInput().getDataExchangeMode());
assertEquals(toCoGroupSink, cgSink.getInput().getDataExchangeMode());
assertEquals(toCoGroup1, coGroupNode.getInput1().getDataExchangeMode());
assertEquals(toCoGroup2, coGroupNode.getInput2().getDataExchangeMode());
assertEquals(toJoin1, joinNode.getInput1().getDataExchangeMode());
assertEquals(toJoin2, joinNode.getInput2().getDataExchangeMode());
assertEquals(toOtherReduce, otherReduceNode.getInput().getDataExchangeMode());
assertEquals(toOtherReduceCombiner, otherReduceCombinerNode.getInput().getDataExchangeMode());
assertEquals(toFlatMap, flatMapNode.getInput().getDataExchangeMode());
assertEquals(toFilter, filterNode.getInput().getDataExchangeMode());
assertEquals(toReduce, reduceNode.getInput().getDataExchangeMode());
assertEquals(toReduceCombiner, reduceCombinerNode.getInput().getDataExchangeMode());
assertEquals(toMap, mapNode.getInput().getDataExchangeMode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class DataExchangeModeForwardTest method verifySimpleForwardPlan.
private void verifySimpleForwardPlan(ExecutionMode execMode, DataExchangeMode toMap, DataExchangeMode toFilter, DataExchangeMode toKeyExtractor, DataExchangeMode toCombiner, DataExchangeMode toReduce, DataExchangeMode toSink) {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setExecutionMode(execMode);
DataSet<String> dataSet = env.readTextFile("/never/accessed");
dataSet.map(new MapFunction<String, Integer>() {
@Override
public Integer map(String value) {
return 0;
}
}).filter(new FilterFunction<Integer>() {
@Override
public boolean filter(Integer value) {
return false;
}
}).groupBy(new IdentityKeyExtractor<Integer>()).reduceGroup(new Top1GroupReducer<Integer>()).output(new DiscardingOutputFormat<Integer>());
OptimizedPlan optPlan = compileNoStats(env.createProgramPlan());
SinkPlanNode sinkNode = optPlan.getDataSinks().iterator().next();
SingleInputPlanNode reduceNode = (SingleInputPlanNode) sinkNode.getPredecessor();
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getPredecessor();
SingleInputPlanNode keyExtractorNode = (SingleInputPlanNode) combineNode.getPredecessor();
SingleInputPlanNode filterNode = (SingleInputPlanNode) keyExtractorNode.getPredecessor();
SingleInputPlanNode mapNode = (SingleInputPlanNode) filterNode.getPredecessor();
assertEquals(toMap, mapNode.getInput().getDataExchangeMode());
assertEquals(toFilter, filterNode.getInput().getDataExchangeMode());
assertEquals(toKeyExtractor, keyExtractorNode.getInput().getDataExchangeMode());
assertEquals(toCombiner, combineNode.getInput().getDataExchangeMode());
assertEquals(toReduce, reduceNode.getInput().getDataExchangeMode());
assertEquals(toSink, sinkNode.getInput().getDataExchangeMode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class DistinctAndGroupingOptimizerTest method testDistinctDestroysPartitioningOfNonDistinctFields.
@Test
public void testDistinctDestroysPartitioningOfNonDistinctFields() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
@SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)).map(new IdentityMapper<Tuple2<Long, Long>>()).setParallelism(4);
data.distinct(1).groupBy(0).sum(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
SingleInputPlanNode distinctReducer = (SingleInputPlanNode) combiner.getInput().getSource();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
// reducer must repartition, because it works on a different field
assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
// distinct reducer is partitioned
assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class GroupReduceCompilationTest method testGroupedReduceWithFieldPositionKeyNonCombinable.
@Test
public void testGroupedReduceWithFieldPositionKeyNonCombinable() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
data.groupBy(1).reduceGroup(new RichGroupReduceFunction<Tuple2<String, Double>, Tuple2<String, Double>>() {
public void reduce(Iterable<Tuple2<String, Double>> values, Collector<Tuple2<String, Double>> out) {
}
}).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// check wiring
assertEquals(sourceNode, reduceNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check that both reduce and combiner have the same strategy
assertEquals(DriverStrategy.SORTED_GROUP_REDUCE, reduceNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(1), reduceNode.getKeys(0));
assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SingleInputPlanNode in project flink by apache.
the class GroupReduceCompilationTest method testAllReduceWithCombiner.
@Test
public void testAllReduceWithCombiner() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Long> data = env.generateSequence(1, 8000000).name("source");
GroupReduceOperator<Long, Long> reduced = data.reduceGroup(new CombineReducer2()).name("reducer");
reduced.setCombinable(true);
reduced.output(new DiscardingOutputFormat<Long>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, combineNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check that both reduce and combiner have the same strategy
assertEquals(DriverStrategy.ALL_GROUP_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.ALL_GROUP_REDUCE_COMBINE, combineNode.getDriverStrategy());
// check parallelism
assertEquals(8, sourceNode.getParallelism());
assertEquals(8, combineNode.getParallelism());
assertEquals(1, reduceNode.getParallelism());
assertEquals(1, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
Aggregations