use of org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor in project flink by apache.
the class PipelineBreakingTest method testBranchingPlanNotReJoined.
/**
* Tests that branching plans, where the branches are not re-joined,
* do not place pipeline breakers.
*
* <pre>
* /---> (filter) -> (sink)
* /
* /
* (source) -> (map) -----------------\
* \ (join) -> (sink)
* \ (source) --/
* \
* \
* \-> (sink)
* </pre>
*/
@Test
public void testBranchingPlanNotReJoined() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Integer> data = env.readTextFile("/never/accessed").map(new MapFunction<String, Integer>() {
@Override
public Integer map(String value) {
return 0;
}
});
// output 1
data.filter(new FilterFunction<Integer>() {
@Override
public boolean filter(Integer value) {
return false;
}
}).output(new DiscardingOutputFormat<Integer>());
// output 2 does a join before a join
data.join(env.fromElements(1, 2, 3, 4)).where(new IdentityKeyExtractor<Integer>()).equalTo(new IdentityKeyExtractor<Integer>()).output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
// output 3 is direct
data.output(new DiscardingOutputFormat<Integer>());
List<DataSinkNode> sinks = convertPlan(env.createProgramPlan());
// gather the optimizer DAG nodes
DataSinkNode sinkAfterFilter = sinks.get(0);
DataSinkNode sinkAfterJoin = sinks.get(1);
DataSinkNode sinkDirect = sinks.get(2);
SingleInputNode filterNode = (SingleInputNode) sinkAfterFilter.getPredecessorNode();
SingleInputNode mapNode = (SingleInputNode) filterNode.getPredecessorNode();
TwoInputNode joinNode = (TwoInputNode) sinkAfterJoin.getPredecessorNode();
SingleInputNode joinInput = (SingleInputNode) joinNode.getSecondPredecessorNode();
// verify the non-pipeline breaking status
assertFalse(sinkAfterFilter.getInputConnection().isBreakingPipeline());
assertFalse(sinkAfterJoin.getInputConnection().isBreakingPipeline());
assertFalse(sinkDirect.getInputConnection().isBreakingPipeline());
assertFalse(filterNode.getIncomingConnection().isBreakingPipeline());
assertFalse(mapNode.getIncomingConnection().isBreakingPipeline());
assertFalse(joinNode.getFirstIncomingConnection().isBreakingPipeline());
assertFalse(joinNode.getSecondIncomingConnection().isBreakingPipeline());
assertFalse(joinInput.getIncomingConnection().isBreakingPipeline());
// some other sanity checks on the plan construction (cannot hurt)
assertEquals(mapNode, ((SingleInputNode) joinNode.getFirstPredecessorNode()).getPredecessorNode());
assertEquals(mapNode, sinkDirect.getPredecessorNode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor in project flink by apache.
the class IterationsCompilerTest method testResetPartialSolution.
@Test
public void testResetPartialSolution() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Long> width = env.generateSequence(1, 10);
DataSet<Long> update = env.generateSequence(1, 10);
DataSet<Long> lastGradient = env.generateSequence(1, 10);
DataSet<Long> init = width.union(update).union(lastGradient);
IterativeDataSet<Long> iteration = init.iterate(10);
width = iteration.filter(new IdFilter<Long>());
update = iteration.filter(new IdFilter<Long>());
lastGradient = iteration.filter(new IdFilter<Long>());
DataSet<Long> gradient = width.map(new IdentityMapper<Long>());
DataSet<Long> term = gradient.join(lastGradient).where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>()).with(new JoinFunction<Long, Long, Long>() {
public Long join(Long first, Long second) {
return null;
}
});
update = update.map(new RichMapFunction<Long, Long>() {
public Long map(Long value) {
return null;
}
}).withBroadcastSet(term, "some-name");
DataSet<Long> result = iteration.closeWith(width.union(update).union(lastGradient));
result.output(new DiscardingOutputFormat<Long>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
new JobGraphGenerator().compileJobGraph(op);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor in project flink by apache.
the class NestedIterationsTest method testBulkIterationInClosure.
@Test
public void testBulkIterationInClosure() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Long> data1 = env.generateSequence(1, 100);
DataSet<Long> data2 = env.generateSequence(1, 100);
IterativeDataSet<Long> firstIteration = data1.iterate(100);
DataSet<Long> firstResult = firstIteration.closeWith(firstIteration.map(new IdentityMapper<Long>()));
IterativeDataSet<Long> mainIteration = data2.map(new IdentityMapper<Long>()).iterate(100);
DataSet<Long> joined = mainIteration.join(firstResult).where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>()).with(new DummyFlatJoinFunction<Long>());
DataSet<Long> mainResult = mainIteration.closeWith(joined);
mainResult.output(new DiscardingOutputFormat<Long>());
Plan p = env.createProgramPlan();
// optimizer should be able to translate this
OptimizedPlan op = compileNoStats(p);
// job graph generator should be able to translate this
new JobGraphGenerator().compileJobGraph(op);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor in project flink by apache.
the class BranchingPlansCompilerTest method testIterationWithStaticInput.
/**
* <pre>
* +---------Iteration-------+
* | |
* /--map--< >----\ |
* / | \ /-------< >---sink
* src-map | join------/ |
* \ | / |
* \ +-----/-------------------+
* \ /
* \--reduce--/
* </pre>
*/
@Test
public void testIterationWithStaticInput() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(100);
DataSet<Long> source = env.generateSequence(1, 1000000);
DataSet<Long> mapped = source.map(new IdentityMapper<Long>());
DataSet<Long> reduced = source.groupBy(new IdentityKeyExtractor<Long>()).reduce(new SelectOneReducer<Long>());
IterativeDataSet<Long> iteration = mapped.iterate(10);
iteration.closeWith(iteration.join(reduced).where(new IdentityKeyExtractor<Long>()).equalTo(new IdentityKeyExtractor<Long>()).with(new DummyFlatJoinFunction<Long>())).output(new DiscardingOutputFormat<Long>());
compileNoStats(env.createProgramPlan());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.IdentityKeyExtractor in project flink by apache.
the class BranchingPlansCompilerTest method testBranchingBroadcastVariable.
@Test
public void testBranchingBroadcastVariable() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(100);
DataSet<String> input1 = env.readTextFile(IN_FILE).name("source1");
DataSet<String> input2 = env.readTextFile(IN_FILE).name("source2");
DataSet<String> input3 = env.readTextFile(IN_FILE).name("source3");
DataSet<String> result1 = input1.map(new IdentityMapper<String>()).reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(input3, "bc");
DataSet<String> result2 = input2.map(new IdentityMapper<String>()).reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(input3, "bc");
result1.join(result2).where(new IdentityKeyExtractor<String>()).equalTo(new IdentityKeyExtractor<String>()).with(new RichJoinFunction<String, String, String>() {
@Override
public String join(String first, String second) {
return null;
}
}).withBroadcastSet(input3, "bc1").withBroadcastSet(input1, "bc2").withBroadcastSet(result1, "bc3").output(new DiscardingOutputFormat<String>());
Plan plan = env.createProgramPlan();
try {
compileNoStats(plan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
Aggregations