use of org.apache.flink.optimizer.testfunctions.Top1GroupReducer in project flink by apache.
the class BranchingPlansCompilerTest method testBranchingBroadcastVariable.
@Test
public void testBranchingBroadcastVariable() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(100);
DataSet<String> input1 = env.readTextFile(IN_FILE).name("source1");
DataSet<String> input2 = env.readTextFile(IN_FILE).name("source2");
DataSet<String> input3 = env.readTextFile(IN_FILE).name("source3");
DataSet<String> result1 = input1.map(new IdentityMapper<String>()).reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(input3, "bc");
DataSet<String> result2 = input2.map(new IdentityMapper<String>()).reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(input3, "bc");
result1.join(result2).where(new IdentityKeyExtractor<String>()).equalTo(new IdentityKeyExtractor<String>()).with(new RichJoinFunction<String, String, String>() {
@Override
public String join(String first, String second) {
return null;
}
}).withBroadcastSet(input3, "bc1").withBroadcastSet(input1, "bc2").withBroadcastSet(result1, "bc3").output(new DiscardingOutputFormat<String>());
Plan plan = env.createProgramPlan();
try {
compileNoStats(plan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.Top1GroupReducer in project flink by apache.
the class BranchingPlansCompilerTest method testMultipleIterations.
@Test
public void testMultipleIterations() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(100);
DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
DataSet<String> reduced = input.map(new IdentityMapper<String>()).reduceGroup(new Top1GroupReducer<String>());
IterativeDataSet<String> iteration1 = input.iterate(100);
IterativeDataSet<String> iteration2 = input.iterate(20);
IterativeDataSet<String> iteration3 = input.iterate(17);
iteration1.closeWith(iteration1.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc1")).output(new DiscardingOutputFormat<String>());
iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(reduced, "bc2")).output(new DiscardingOutputFormat<String>());
iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()).withBroadcastSet(reduced, "bc3")).output(new DiscardingOutputFormat<String>());
Plan plan = env.createProgramPlan();
try {
compileNoStats(plan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.Top1GroupReducer in project flink by apache.
the class PipelineBreakingTest method testSimpleForwardPlan.
/**
* Tests that no pipeline breakers are inserted into a simple forward pipeline.
*
* <pre>
* (source) -> (map) -> (filter) -> (groupBy / reduce)
* </pre>
*/
@Test
public void testSimpleForwardPlan() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> dataSet = env.readTextFile("/never/accessed");
dataSet.map(new MapFunction<String, Integer>() {
@Override
public Integer map(String value) {
return 0;
}
}).filter(new FilterFunction<Integer>() {
@Override
public boolean filter(Integer value) {
return false;
}
}).groupBy(new IdentityKeyExtractor<Integer>()).reduceGroup(new Top1GroupReducer<Integer>()).output(new DiscardingOutputFormat<Integer>());
DataSinkNode sinkNode = convertPlan(env.createProgramPlan()).get(0);
SingleInputNode reduceNode = (SingleInputNode) sinkNode.getPredecessorNode();
SingleInputNode keyExtractorNode = (SingleInputNode) reduceNode.getPredecessorNode();
SingleInputNode filterNode = (SingleInputNode) keyExtractorNode.getPredecessorNode();
SingleInputNode mapNode = (SingleInputNode) filterNode.getPredecessorNode();
assertFalse(sinkNode.getInputConnection().isBreakingPipeline());
assertFalse(reduceNode.getIncomingConnection().isBreakingPipeline());
assertFalse(keyExtractorNode.getIncomingConnection().isBreakingPipeline());
assertFalse(filterNode.getIncomingConnection().isBreakingPipeline());
assertFalse(mapNode.getIncomingConnection().isBreakingPipeline());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.Top1GroupReducer in project flink by apache.
the class DataExchangeModeForwardTest method verifySimpleForwardPlan.
private void verifySimpleForwardPlan(ExecutionMode execMode, DataExchangeMode toMap, DataExchangeMode toFilter, DataExchangeMode toKeyExtractor, DataExchangeMode toCombiner, DataExchangeMode toReduce, DataExchangeMode toSink) {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setExecutionMode(execMode);
DataSet<String> dataSet = env.readTextFile("/never/accessed");
dataSet.map(new MapFunction<String, Integer>() {
@Override
public Integer map(String value) {
return 0;
}
}).filter(new FilterFunction<Integer>() {
@Override
public boolean filter(Integer value) {
return false;
}
}).groupBy(new IdentityKeyExtractor<Integer>()).reduceGroup(new Top1GroupReducer<Integer>()).output(new DiscardingOutputFormat<Integer>());
OptimizedPlan optPlan = compileNoStats(env.createProgramPlan());
SinkPlanNode sinkNode = optPlan.getDataSinks().iterator().next();
SingleInputPlanNode reduceNode = (SingleInputPlanNode) sinkNode.getPredecessor();
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getPredecessor();
SingleInputPlanNode keyExtractorNode = (SingleInputPlanNode) combineNode.getPredecessor();
SingleInputPlanNode filterNode = (SingleInputPlanNode) keyExtractorNode.getPredecessor();
SingleInputPlanNode mapNode = (SingleInputPlanNode) filterNode.getPredecessor();
assertEquals(toMap, mapNode.getInput().getDataExchangeMode());
assertEquals(toFilter, filterNode.getInput().getDataExchangeMode());
assertEquals(toKeyExtractor, keyExtractorNode.getInput().getDataExchangeMode());
assertEquals(toCombiner, combineNode.getInput().getDataExchangeMode());
assertEquals(toReduce, reduceNode.getInput().getDataExchangeMode());
assertEquals(toSink, sinkNode.getInput().getDataExchangeMode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.Top1GroupReducer in project flink by apache.
the class PipelineBreakingTest method testReJoinedBranches.
/**
* Tests that branches that are re-joined have place pipeline breakers.
*
* <pre>
* /-> (sink)
* /
* /-> (reduce) -+ /-> (flatmap) -> (sink)
* / \ /
* (source) -> (map) - (join) -+-----\
* \ / \
* \-> (filter) -+ \
* \ (co group) -> (sink)
* \ /
* \-> (reduce) - /
* </pre>
*/
@Test
public void testReJoinedBranches() {
try {
// build a test program
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> data = env.fromElements(33L, 44L).map(new MapFunction<Long, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Long value) {
return new Tuple2<Long, Long>(value, value);
}
});
DataSet<Tuple2<Long, Long>> reduced = data.groupBy(0).reduce(new SelectOneReducer<Tuple2<Long, Long>>());
reduced.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
DataSet<Tuple2<Long, Long>> filtered = data.filter(new FilterFunction<Tuple2<Long, Long>>() {
@Override
public boolean filter(Tuple2<Long, Long> value) throws Exception {
return false;
}
});
DataSet<Tuple2<Long, Long>> joined = reduced.join(filtered).where(1).equalTo(1).with(new DummyFlatJoinFunction<Tuple2<Long, Long>>());
joined.flatMap(new IdentityFlatMapper<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
joined.coGroup(filtered.groupBy(1).reduceGroup(new Top1GroupReducer<Tuple2<Long, Long>>())).where(0).equalTo(0).with(new DummyCoGroupFunction<Tuple2<Long, Long>, Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>());
List<DataSinkNode> sinks = convertPlan(env.createProgramPlan());
// gather the optimizer DAG nodes
DataSinkNode sinkAfterReduce = sinks.get(0);
DataSinkNode sinkAfterFlatMap = sinks.get(1);
DataSinkNode sinkAfterCoGroup = sinks.get(2);
SingleInputNode reduceNode = (SingleInputNode) sinkAfterReduce.getPredecessorNode();
SingleInputNode mapNode = (SingleInputNode) reduceNode.getPredecessorNode();
SingleInputNode flatMapNode = (SingleInputNode) sinkAfterFlatMap.getPredecessorNode();
TwoInputNode joinNode = (TwoInputNode) flatMapNode.getPredecessorNode();
SingleInputNode filterNode = (SingleInputNode) joinNode.getSecondPredecessorNode();
TwoInputNode coGroupNode = (TwoInputNode) sinkAfterCoGroup.getPredecessorNode();
SingleInputNode otherReduceNode = (SingleInputNode) coGroupNode.getSecondPredecessorNode();
// test sanity checks (that we constructed the DAG correctly)
assertEquals(reduceNode, joinNode.getFirstPredecessorNode());
assertEquals(mapNode, filterNode.getPredecessorNode());
assertEquals(joinNode, coGroupNode.getFirstPredecessorNode());
assertEquals(filterNode, otherReduceNode.getPredecessorNode());
// verify the pipeline breaking status
assertFalse(sinkAfterReduce.getInputConnection().isBreakingPipeline());
assertFalse(sinkAfterFlatMap.getInputConnection().isBreakingPipeline());
assertFalse(sinkAfterCoGroup.getInputConnection().isBreakingPipeline());
assertFalse(mapNode.getIncomingConnection().isBreakingPipeline());
assertFalse(flatMapNode.getIncomingConnection().isBreakingPipeline());
assertFalse(joinNode.getFirstIncomingConnection().isBreakingPipeline());
assertFalse(coGroupNode.getFirstIncomingConnection().isBreakingPipeline());
assertFalse(coGroupNode.getSecondIncomingConnection().isBreakingPipeline());
// these should be pipeline breakers
assertTrue(reduceNode.getIncomingConnection().isBreakingPipeline());
assertTrue(filterNode.getIncomingConnection().isBreakingPipeline());
assertTrue(otherReduceNode.getIncomingConnection().isBreakingPipeline());
assertTrue(joinNode.getSecondIncomingConnection().isBreakingPipeline());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations