use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class DistinctAndGroupingOptimizerTest method testDistinctDestroysPartitioningOfNonDistinctFields.
@Test
public void testDistinctDestroysPartitioningOfNonDistinctFields() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(4);
@SuppressWarnings("unchecked") DataSet<Tuple2<Long, Long>> data = env.fromElements(new Tuple2<Long, Long>(0L, 0L), new Tuple2<Long, Long>(1L, 1L)).map(new IdentityMapper<Tuple2<Long, Long>>()).setParallelism(4);
data.distinct(1).groupBy(0).sum(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getInput().getSource();
SingleInputPlanNode distinctReducer = (SingleInputPlanNode) combiner.getInput().getSource();
assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
// reducer must repartition, because it works on a different field
assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
// distinct reducer is partitioned
assertEquals(ShipStrategyType.PARTITION_HASH, distinctReducer.getInput().getShipStrategy());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class IterationCompilerTest method testWorksetIterationWithUnionRoot.
@Test
public void testWorksetIterationWithUnionRoot() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(43);
DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20).map(new MapFunction<Long, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Long value) {
return null;
}
});
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
iter.closeWith(iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()).union(iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>())), iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()).union(iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()))).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) sink.getInput().getSource();
// make sure that the root is part of the dynamic path
// the "NoOp"a that come after the union.
SingleInputPlanNode nextWorksetNoop = (SingleInputPlanNode) iterNode.getNextWorkSetPlanNode();
SingleInputPlanNode solutionDeltaNoop = (SingleInputPlanNode) iterNode.getSolutionSetDeltaPlanNode();
NAryUnionPlanNode nextWorksetUnion = (NAryUnionPlanNode) nextWorksetNoop.getInput().getSource();
NAryUnionPlanNode solutionDeltaUnion = (NAryUnionPlanNode) solutionDeltaNoop.getInput().getSource();
assertTrue(nextWorksetNoop.isOnDynamicPath());
assertTrue(nextWorksetNoop.getCostWeight() >= 1);
assertTrue(solutionDeltaNoop.isOnDynamicPath());
assertTrue(solutionDeltaNoop.getCostWeight() >= 1);
assertTrue(nextWorksetUnion.isOnDynamicPath());
assertTrue(nextWorksetUnion.getCostWeight() >= 1);
assertTrue(solutionDeltaUnion.isOnDynamicPath());
assertTrue(solutionDeltaUnion.getCostWeight() >= 1);
new JobGraphGenerator().compileJobGraph(op);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class IterationCompilerTest method testIterationWithUnionRoot.
@Test
public void testIterationWithUnionRoot() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(43);
IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100);
iteration.closeWith(iteration.map(new IdentityMapper<Long>()).union(iteration.map(new IdentityMapper<Long>()))).output(new DiscardingOutputFormat<Long>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
BulkIterationPlanNode iterNode = (BulkIterationPlanNode) sink.getInput().getSource();
// make sure that the root is part of the dynamic path
// the "NoOp" that comes after the union.
SingleInputPlanNode noop = (SingleInputPlanNode) iterNode.getRootOfStepFunction();
NAryUnionPlanNode union = (NAryUnionPlanNode) noop.getInput().getSource();
assertTrue(noop.isOnDynamicPath());
assertTrue(noop.getCostWeight() >= 1);
assertTrue(union.isOnDynamicPath());
assertTrue(union.getCostWeight() >= 1);
// see that the jobgraph generator can translate this
new JobGraphGenerator().compileJobGraph(op);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class ReduceCompilationTest method testGroupedReduceWithFieldPositionKey.
@Test
public void testGroupedReduceWithFieldPositionKey() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
data.groupBy(1).reduce(new RichReduceFunction<Tuple2<String, Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
return null;
}
}).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, combineNode.getInput().getSource());
assertEquals(reduceNode, sinkNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.SORTED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(1), reduceNode.getKeys(0));
assertEquals(new FieldList(1), combineNode.getKeys(0));
assertEquals(new FieldList(1), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.SinkPlanNode in project flink by apache.
the class ReduceCompilationTest method testGroupedReduceWithHint.
@Test
public void testGroupedReduceWithHint() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(8);
DataSet<Tuple2<String, Double>> data = env.readCsvFile("file:///will/never/be/read").types(String.class, Double.class).name("source").setParallelism(6);
data.groupBy(new KeySelector<Tuple2<String, Double>, String>() {
public String getKey(Tuple2<String, Double> value) {
return value.f0;
}
}).reduce(new RichReduceFunction<Tuple2<String, Double>>() {
@Override
public Tuple2<String, Double> reduce(Tuple2<String, Double> value1, Tuple2<String, Double> value2) {
return null;
}
}).setCombineHint(CombineHint.HASH).name("reducer").output(new DiscardingOutputFormat<Tuple2<String, Double>>()).name("sink");
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(op);
// get the original nodes
SourcePlanNode sourceNode = resolver.getNode("source");
SingleInputPlanNode reduceNode = resolver.getNode("reducer");
SinkPlanNode sinkNode = resolver.getNode("sink");
// get the combiner
SingleInputPlanNode combineNode = (SingleInputPlanNode) reduceNode.getInput().getSource();
// get the key extractors and projectors
SingleInputPlanNode keyExtractor = (SingleInputPlanNode) combineNode.getInput().getSource();
SingleInputPlanNode keyProjector = (SingleInputPlanNode) sinkNode.getInput().getSource();
// check wiring
assertEquals(sourceNode, keyExtractor.getInput().getSource());
assertEquals(keyProjector, sinkNode.getInput().getSource());
// check the strategies
assertEquals(DriverStrategy.SORTED_REDUCE, reduceNode.getDriverStrategy());
assertEquals(DriverStrategy.HASHED_PARTIAL_REDUCE, combineNode.getDriverStrategy());
// check the keys
assertEquals(new FieldList(0), reduceNode.getKeys(0));
assertEquals(new FieldList(0), combineNode.getKeys(0));
assertEquals(new FieldList(0), reduceNode.getInput().getLocalStrategyKeys());
// check parallelism
assertEquals(6, sourceNode.getParallelism());
assertEquals(6, keyExtractor.getParallelism());
assertEquals(6, combineNode.getParallelism());
assertEquals(8, reduceNode.getParallelism());
assertEquals(8, keyProjector.getParallelism());
assertEquals(8, sinkNode.getParallelism());
} catch (Exception e) {
System.err.println(e.getMessage());
e.printStackTrace();
fail(e.getClass().getSimpleName() + " in test: " + e.getMessage());
}
}
Aggregations