use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class UnionReplacementTest method testConsecutiveUnionsWithBroadcast.
/**
*
* Checks that a plan with consecutive UNIONs followed by broadcast-fwd JOIN is correctly translated.
*
* The program can be illustrated as follows:
*
* Src1 -\
* >-> Union12--<
* Src2 -/ \
* >-> Union123 --> bc-fwd-Join -> Output
* Src3 ----------------/ /
* /
* Src4 ----------------------------/
*
* In the resulting plan, the broadcasting must be
* pushed to the inputs of the unions (Src1, Src2, Src3).
*
*/
@Test
public void testConsecutiveUnionsWithBroadcast() throws Exception {
// -----------------------------------------------------------------------------------------
// Build test program
// -----------------------------------------------------------------------------------------
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src4 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
union123.join(src4, JoinOperatorBase.JoinHint.BROADCAST_HASH_FIRST).where(0).equalTo(0).name("join").output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("out");
// -----------------------------------------------------------------------------------------
// Verify optimized plan
// -----------------------------------------------------------------------------------------
OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
DualInputPlanNode join = resolver.getNode("join");
// check input of join is broadcasted
assertEquals("First join input should be fully replicated.", PartitioningProperty.FULL_REPLICATION, join.getInput1().getGlobalProperties().getPartitioning());
NAryUnionPlanNode union = (NAryUnionPlanNode) join.getInput1().getSource();
// check that all union inputs are broadcasted
for (Channel c : union.getInputs()) {
assertEquals("Union input should be fully replicated", PartitioningProperty.FULL_REPLICATION, c.getGlobalProperties().getPartitioning());
assertEquals("Union input channel should be broadcasting", ShipStrategyType.BROADCAST, c.getShipStrategy());
}
}
use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class UnionReplacementTest method testConsecutiveUnionsWithHashPartitioning.
/**
*
* Checks that a plan with consecutive UNIONs followed by PartitionByHash is correctly translated.
*
* The program can be illustrated as follows:
*
* Src1 -\
* >-> Union12--<
* Src2 -/ \
* >-> Union123 -> PartitionByHash -> Output
* Src3 ----------------/
*
* In the resulting plan, the hash partitioning (ShippingStrategy.PARTITION_HASH) must be
* pushed to the inputs of the unions (Src1, Src2, Src3).
*
*/
@Test
public void testConsecutiveUnionsWithHashPartitioning() throws Exception {
// -----------------------------------------------------------------------------------------
// Build test program
// -----------------------------------------------------------------------------------------
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
DataSet<Tuple2<Long, Long>> src1 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src2 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> src3 = env.fromElements(new Tuple2<>(0L, 0L));
DataSet<Tuple2<Long, Long>> union12 = src1.union(src2);
DataSet<Tuple2<Long, Long>> union123 = union12.union(src3);
union123.partitionByHash(1).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("out");
// -----------------------------------------------------------------------------------------
// Verify optimized plan
// -----------------------------------------------------------------------------------------
OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(optimizedPlan);
SingleInputPlanNode sink = resolver.getNode("out");
// check partitioning is correct
assertEquals("Sink input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, sink.getInput().getGlobalProperties().getPartitioning());
assertEquals("Sink input should be hash partitioned on 1.", new FieldList(1), sink.getInput().getGlobalProperties().getPartitioningFields());
SingleInputPlanNode partitioner = (SingleInputPlanNode) sink.getInput().getSource();
assertTrue(partitioner.getDriverStrategy() == DriverStrategy.UNARY_NO_OP);
assertEquals("Partitioner input should be hash partitioned.", PartitioningProperty.HASH_PARTITIONED, partitioner.getInput().getGlobalProperties().getPartitioning());
assertEquals("Partitioner input should be hash partitioned on 1.", new FieldList(1), partitioner.getInput().getGlobalProperties().getPartitioningFields());
assertEquals("Partitioner input channel should be forwarding", ShipStrategyType.FORWARD, partitioner.getInput().getShipStrategy());
NAryUnionPlanNode union = (NAryUnionPlanNode) partitioner.getInput().getSource();
// all union inputs should be hash partitioned
for (Channel c : union.getInputs()) {
assertEquals("Union input should be hash partitioned", PartitioningProperty.HASH_PARTITIONED, c.getGlobalProperties().getPartitioning());
assertEquals("Union input channel should be hash partitioning", ShipStrategyType.PARTITION_HASH, c.getShipStrategy());
assertTrue("Union input should be data source", c.getSource() instanceof SourcePlanNode);
}
}
use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class SemanticPropertiesAPIToPlanTest method forwardFieldsTestJoin.
@Test
public void forwardFieldsTestJoin() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple3<Integer, Integer, Integer>> in1 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
DataSet<Tuple3<Integer, Integer, Integer>> in2 = env.readCsvFile(IN_FILE).types(Integer.class, Integer.class, Integer.class);
in1 = in1.map(new MockMapper()).withForwardedFields("*").groupBy(0).reduce(new MockReducer()).withForwardedFields("f0->f1");
in2 = in2.map(new MockMapper()).withForwardedFields("*").groupBy(1).reduce(new MockReducer()).withForwardedFields("f1->f2");
DataSet<Tuple3<Integer, Integer, Integer>> out = in1.join(in2).where(1).equalTo(2).with(new MockJoin());
out.output(new DiscardingOutputFormat<Tuple3<Integer, Integer, Integer>>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = compileWithStats(plan);
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
if (visitable instanceof DualInputPlanNode && visitable.getProgramOperator() instanceof InnerJoinOperatorBase) {
DualInputPlanNode node = ((DualInputPlanNode) visitable);
final Channel inConn1 = node.getInput1();
final Channel inConn2 = node.getInput2();
Assert.assertTrue("Join should just forward the input if it is already partitioned", inConn1.getShipStrategy() == ShipStrategyType.FORWARD);
Assert.assertTrue("Join should just forward the input if it is already partitioned", inConn2.getShipStrategy() == ShipStrategyType.FORWARD);
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
}
});
}
use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class PartitionOperatorTest method testRangePartitionOperatorPreservesFields.
@Test
public void testRangePartitionOperatorPreservesFields() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L)));
data.partitionByRange(1).groupBy(1).reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
SingleInputPlanNode partitionNode = (SingleInputPlanNode) reducer.getInput().getSource();
SingleInputPlanNode partitionIDRemover = (SingleInputPlanNode) partitionNode.getInput().getSource();
assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, partitionNode.getInput().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitionIDRemover.getInput().getShipStrategy());
SourcePlanNode sourcePlanNode = op.getDataSources().iterator().next();
List<Channel> sourceOutgoingChannels = sourcePlanNode.getOutgoingChannels();
assertEquals(2, sourceOutgoingChannels.size());
assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(0).getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(1).getShipStrategy());
assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(0).getDataExchangeMode());
assertEquals(DataExchangeMode.BATCH, sourceOutgoingChannels.get(1).getDataExchangeMode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class WordCountCompilerTest method checkWordCount.
private void checkWordCount(boolean estimates) {
final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(DEFAULT_PARALLELISM);
// get input data
DataSet<String> lines = env.readTextFile(IN_FILE).name("Input Lines");
lines.map(new MapFunction<String, Tuple2<String, Integer>>() {
private static final long serialVersionUID = -3952739820618875030L;
@Override
public Tuple2<String, Integer> map(String v) throws Exception {
return new Tuple2<>(v, 1);
}
}).name("Tokenize Lines").groupBy(0).sum(1).name("Count Words").output(new DiscardingOutputFormat<Tuple2<String, Integer>>()).name("Word Counts");
// get the plan and compile it
Plan p = env.createProgramPlan();
p.setExecutionConfig(new ExecutionConfig());
OptimizedPlan plan;
if (estimates) {
GenericDataSourceBase<?, ?> source = getContractResolver(p).getNode("Input Lines");
setSourceStatistics(source, 1024 * 1024 * 1024 * 1024L, 24f);
plan = compileWithStats(p);
} else {
plan = compileNoStats(p);
}
// get the optimizer plan nodes
OptimizerPlanNodeResolver resolver = getOptimizerPlanNodeResolver(plan);
SinkPlanNode sink = resolver.getNode("Word Counts");
SingleInputPlanNode reducer = resolver.getNode("Count Words");
SingleInputPlanNode mapper = resolver.getNode("Tokenize Lines");
// verify the strategies
Assert.assertEquals(ShipStrategyType.FORWARD, mapper.getInput().getShipStrategy());
Assert.assertEquals(ShipStrategyType.PARTITION_HASH, reducer.getInput().getShipStrategy());
Assert.assertEquals(ShipStrategyType.FORWARD, sink.getInput().getShipStrategy());
Channel c = reducer.getInput();
Assert.assertEquals(LocalStrategy.COMBININGSORT, c.getLocalStrategy());
FieldList l = new FieldList(0);
Assert.assertEquals(l, c.getShipStrategyKeys());
Assert.assertEquals(l, c.getLocalStrategyKeys());
Assert.assertTrue(Arrays.equals(c.getLocalStrategySortOrder(), reducer.getSortOrders(0)));
// check the combiner
SingleInputPlanNode combiner = (SingleInputPlanNode) reducer.getPredecessor();
Assert.assertEquals(DriverStrategy.SORTED_GROUP_COMBINE, combiner.getDriverStrategy());
Assert.assertEquals(l, combiner.getKeys(0));
Assert.assertEquals(ShipStrategyType.FORWARD, combiner.getInput().getShipStrategy());
}
Aggregations