use of org.apache.flink.optimizer.plan.NAryUnionPlanNode in project flink by apache.
the class UnionReplacementTest method testConsecutiveUnionsWithBroadcast.
/**
*
* Checks that a plan with consecutive UNIONs followed by broadcast-fwd JOIN is correctly translated.
*
* The program can be illustrated as follows:
*
* Src1 -\
* >-> Union12--<
* Src2 -/ \
* >-> Union123 --> bc-fwd-Join -> Output
* Src3 ----------------/ /
* /
* Src4 ----------------------------/
*
* In the resulting plan, the broadcasting must be
* pushed to the inputs of the unions (Src1, Src2, Src3).
*
*/
@Test
public void testConsecutiveUnionsWithBroadcast() throws Exception {
// -----------------------------------------------------------------------------------------
// Build test program
// -----------------------------------------------------------------------------------------
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src4 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
union123.join(src4, JoinOperatorBase.JoinHint.BROADCAST_HASH_FIRST).where(0).equalTo(0).name("join").output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("out");
// -----------------------------------------------------------------------------------------
// Verify optimized plan
// -----------------------------------------------------------------------------------------
OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
DualInputPlanNode join = resolver.getNode("join");
// check input of join is broadcasted
assertEquals("First join input should be fully replicated.", PartitioningProperty.FULL_REPLICATION, join.getInput1().getGlobalProperties().getPartitioning());
NAryUnionPlanNode union = (NAryUnionPlanNode) join.getInput1().getSource();
// check that all union inputs are broadcasted
for (Channel c : union.getInputs()) {
assertEquals("Union input should be fully replicated", PartitioningProperty.FULL_REPLICATION, c.getGlobalProperties().getPartitioning());
assertEquals("Union input channel should be broadcasting", ShipStrategyType.BROADCAST, c.getShipStrategy());
}
}
use of org.apache.flink.optimizer.plan.NAryUnionPlanNode in project flink by apache.
the class UnionReplacementTest method testConsecutiveUnionsWithHashPartitioning.
/**
*
* Checks that a plan with consecutive UNIONs followed by PartitionByHash is correctly translated.
*
* The program can be illustrated as follows:
*
* Src1 -\
* >-> Union12--<
* Src2 -/ \
* >-> Union123 -> PartitionByHash -> Output
* Src3 ----------------/
*
* In the resulting plan, the hash partitioning (ShippingStrategy.PARTITION_HASH) must be
* pushed to the inputs of the unions (Src1, Src2, Src3).
*
*/
@Test
public void testConsecutiveUnionsWithHashPartitioning() throws Exception {
// -----------------------------------------------------------------------------------------
// Build test program
// -----------------------------------------------------------------------------------------
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
union123.partitionByHash(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("out");
// -----------------------------------------------------------------------------------------
// Verify optimized plan
// -----------------------------------------------------------------------------------------
OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
SingleInputPlanNode sink = resolver.getNode("out");
// check partitioning is correct
assertEquals("Sink input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, sink.getInput().getGlobalProperties().getPartitioning());
assertEquals("Sink input should be hash partitioned on 1.", new FieldList(1), sink.getInput().getGlobalProperties().getPartitioningFields());
SingleInputPlanNode partitioner = (SingleInputPlanNode) sink.getInput().getSource();
assertTrue(partitioner.getDriverStrategy() == DriverStrategy.UNARY_NO_OP);
assertEquals("Partitioner input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, partitioner.getInput().getGlobalProperties().getPartitioning());
assertEquals("Partitioner input should be hash partitioned on 1.", new FieldList(1), partitioner.getInput().getGlobalProperties().getPartitioningFields());
assertEquals("Partitioner input channel should be forwarding", ShipStrategyType.FORWARD, partitioner.getInput().getShipStrategy());
NAryUnionPlanNode union = (NAryUnionPlanNode) partitioner.getInput().getSource();
// all union inputs should be hash partitioned
for (Channel c : union.getInputs()) {
assertEquals("Union input should be hash partitioned", PartitioningProperty.HASH_PARTITIONED, c.getGlobalProperties().getPartitioning());
assertEquals("Union input channel should be hash partitioning", ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
assertTrue("Union input should be data source", c.getSource() instanceof SourcePlanNode);
}
}
use of org.apache.flink.optimizer.plan.NAryUnionPlanNode in project flink by apache.
the class IterationCompilerTest method testWorksetIterationWithUnionRoot.
@Test
public void testWorksetIterationWithUnionRoot() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(43);
DataSet<Tuple2<Long, Long>> input = env.generateSequence(1, 20).map(new MapFunction<Long, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Long value) {
return null;
}
});
DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iter = input.iterateDelta(input, 100, 0);
iter.closeWith(iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()).union(iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>())), iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()).union(iter.getWorkset().map(new IdentityMapper<Tuple2<Long, Long>>()))).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
WorksetIterationPlanNode iterNode = (WorksetIterationPlanNode) sink.getInput().getSource();
// make sure that the root is part of the dynamic path
// the "NoOp"a that come after the union.
SingleInputPlanNode nextWorksetNoop = (SingleInputPlanNode) iterNode.getNextWorkSetPlanNode();
SingleInputPlanNode solutionDeltaNoop = (SingleInputPlanNode) iterNode.getSolutionSetDeltaPlanNode();
NAryUnionPlanNode nextWorksetUnion = (NAryUnionPlanNode) nextWorksetNoop.getInput().getSource();
NAryUnionPlanNode solutionDeltaUnion = (NAryUnionPlanNode) solutionDeltaNoop.getInput().getSource();
assertTrue(nextWorksetNoop.isOnDynamicPath());
assertTrue(nextWorksetNoop.getCostWeight() >= 1);
assertTrue(solutionDeltaNoop.isOnDynamicPath());
assertTrue(solutionDeltaNoop.getCostWeight() >= 1);
assertTrue(nextWorksetUnion.isOnDynamicPath());
assertTrue(nextWorksetUnion.getCostWeight() >= 1);
assertTrue(solutionDeltaUnion.isOnDynamicPath());
assertTrue(solutionDeltaUnion.getCostWeight() >= 1);
new JobGraphGenerator().compileJobGraph(op);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.NAryUnionPlanNode in project flink by apache.
the class IterationCompilerTest method testIterationWithUnionRoot.
@Test
public void testIterationWithUnionRoot() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(43);
IterativeDataSet<Long> iteration = env.generateSequence(-4, 1000).iterate(100);
iteration.closeWith(iteration.map(new IdentityMapper<Long>()).union(iteration.map(new IdentityMapper<Long>()))).output(new DiscardingOutputFormat<Long>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
BulkIterationPlanNode iterNode = (BulkIterationPlanNode) sink.getInput().getSource();
// make sure that the root is part of the dynamic path
// the "NoOp" that comes after the union.
SingleInputPlanNode noop = (SingleInputPlanNode) iterNode.getRootOfStepFunction();
NAryUnionPlanNode union = (NAryUnionPlanNode) noop.getInput().getSource();
assertTrue(noop.isOnDynamicPath());
assertTrue(noop.getCostWeight() >= 1);
assertTrue(union.isOnDynamicPath());
assertTrue(union.getCostWeight() >= 1);
// see that the jobgraph generator can translate this
new JobGraphGenerator().compileJobGraph(op);
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations