use of org.apache.flink.optimizer.testfunctions.IdentityMapper in project flink by apache.
the class DistinctAndGroupingOptimizerTest method testDistinctPreservesPartitioningOfDistinctFields.
@Test
public void testDistinctPreservesPartitioningOfDistinctFields() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
@SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)).map(new IdentityMapper<Tuple2<Long, Long>>()).setParallelism(4);
data.distinct(0).groupBy(0).sum(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
SingleInputPlanNode distinctReducer = (SingleInputPlanNode) reducer.getInput().getSource();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
// reducer can be forward, reuses partitioning from distinct
assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
// distinct reducer is partitioned
assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.IdentityMapper in project flink by apache.
the class BranchingPlansCompilerTest method testBCVariableClosure.
@Test
public void testBCVariableClosure() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
DataSet<String> reduced = input.map(new IdentityMapper<String>()).reduceGroup(new Top1GroupReducer<String>());
DataSet<String> initialSolution = input.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc");
IterativeDataSet<String> iteration = initialSolution.iterate(100);
iteration.closeWith(iteration.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "red")).output(new DiscardingOutputFormat<String>());
Plan plan = env.createProgramPlan();
try {
compileNoStats(plan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.IdentityMapper in project flink by apache.
the class BranchingPlansCompilerTest method testBranchingUnion.
@Test
public void testBranchingUnion() {
try {
// construct the plan
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Long> source1 = env.generateSequence(0, 1);
DataSet<Long> source2 = env.generateSequence(0, 1);
DataSet<Long> join1 = source1.join(source2).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 1");
DataSet<Long> map1 = join1.map(new IdentityMapper<Long>()).name("Map 1");
DataSet<Long> reduce1 = map1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 1");
DataSet<Long> reduce2 = join1.groupBy("*").reduceGroup(new IdentityGroupReducer<Long>()).name("Reduce 2");
DataSet<Long> map2 = join1.map(new IdentityMapper<Long>()).name("Map 2");
DataSet<Long> map3 = map2.map(new IdentityMapper<Long>()).name("Map 3");
DataSet<Long> join2 = reduce1.union(reduce2).union(map2).union(map3).join(map2, JoinHint.REPARTITION_SORT_MERGE).where("*").equalTo("*").with(new IdentityJoiner<Long>()).name("Join 2");
join2.output(new DiscardingOutputFormat<Long>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileNoStats(plan);
JobGraphGenerator jobGen = new JobGraphGenerator();
// Compile plan to verify that no error is thrown
jobGen.compileJobGraph(oPlan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.IdentityMapper in project flink by apache.
the class BranchingPlansCompilerTest method testMultipleIterations.
@Test
public void testMultipleIterations() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(100);
DataSet<String> input = env.readTextFile(IN_FILE).name("source1");
DataSet<String> reduced = input.map(new IdentityMapper<String>()).reduceGroup(new Top1GroupReducer<String>());
IterativeDataSet<String> iteration1 = input.iterate(100);
IterativeDataSet<String> iteration2 = input.iterate(20);
IterativeDataSet<String> iteration3 = input.iterate(17);
iteration1.closeWith(iteration1.map(new IdentityMapper<String>()).withBroadcastSet(reduced, "bc1")).output(new DiscardingOutputFormat<String>());
iteration2.closeWith(iteration2.reduceGroup(new Top1GroupReducer<String>()).withBroadcastSet(reduced, "bc2")).output(new DiscardingOutputFormat<String>());
iteration3.closeWith(iteration3.reduceGroup(new IdentityGroupReducer<String>()).withBroadcastSet(reduced, "bc3")).output(new DiscardingOutputFormat<String>());
Plan plan = env.createProgramPlan();
try {
compileNoStats(plan);
} catch (Exception e) {
e.printStackTrace();
Assert.fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.testfunctions.IdentityMapper in project flink by apache.
the class NestedIterationsTest method testRejectNestedBulkIterations.
@Test
public void testRejectNestedBulkIterations() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Long> data = env.generateSequence(1, 100);
IterativeDataSet<Long> outerIteration = data.iterate(100);
IterativeDataSet<Long> innerIteration = outerIteration.map(new IdentityMapper<Long>()).iterate(100);
DataSet<Long> innerResult = innerIteration.closeWith(innerIteration.map(new IdentityMapper<Long>()));
DataSet<Long> outerResult = outerIteration.closeWith(innerResult.map(new IdentityMapper<Long>()));
outerResult.output(new DiscardingOutputFormat<Long>());
Plan p = env.createProgramPlan();
try {
compileNoStats(p);
} catch (CompilerException e) {
assertTrue(e.getMessage().toLowerCase().indexOf("nested iterations") != -1);
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations