use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.
the class CoGroupSolutionSetFirstTest method testCoGroupSolutionSet.
@Test
public void testCoGroupSolutionSet() {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple1<Integer>> raw = env.readCsvFile(IN_FILE).types(Integer.class);
DeltaIteration<Tuple1<Integer>, Tuple1<Integer>> iteration = raw.iterateDelta(raw, 1000, 0);
DataSet<Tuple1<Integer>> test = iteration.getWorkset().map(new SimpleMap());
DataSet<Tuple1<Integer>> delta = iteration.getSolutionSet().coGroup(test).where(0).equalTo(0).with(new SimpleCGroup());
DataSet<Tuple1<Integer>> feedback = iteration.getWorkset().map(new SimpleMap());
DataSet<Tuple1<Integer>> result = iteration.closeWith(delta, feedback);
result.output(new DiscardingOutputFormat<Tuple1<Integer>>());
Plan plan = env.createProgramPlan();
OptimizedPlan oPlan = null;
try {
oPlan = compileNoStats(plan);
} catch (CompilerException e) {
Assert.fail(e.getMessage());
}
oPlan.accept(new Visitor<PlanNode>() {
@Override
public boolean preVisit(PlanNode visitable) {
if (visitable instanceof WorksetIterationPlanNode) {
PlanNode deltaNode = ((WorksetIterationPlanNode) visitable).getSolutionSetDeltaPlanNode();
//get the CoGroup
DualInputPlanNode dpn = (DualInputPlanNode) deltaNode.getInputs().iterator().next().getSource();
Channel in1 = dpn.getInput1();
Channel in2 = dpn.getInput2();
Assert.assertTrue(in1.getLocalProperties().getOrdering() == null);
Assert.assertTrue(in2.getLocalProperties().getOrdering() != null);
Assert.assertTrue(in2.getLocalProperties().getOrdering().getInvolvedIndexes().contains(0));
Assert.assertTrue(in1.getShipStrategy() == ShipStrategyType.FORWARD);
Assert.assertTrue(in2.getShipStrategy() == ShipStrategyType.PARTITION_HASH);
return false;
}
return true;
}
@Override
public void postVisit(PlanNode visitable) {
}
});
}
use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.
the class HashJoinBuildFirstProperties method instantiate.
@Override
public DualInputPlanNode instantiate(Channel in1, Channel in2, TwoInputNode node) {
DriverStrategy strategy;
if (!in1.isOnDynamicPath() && in2.isOnDynamicPath()) {
// sanity check that the first input is cached and remove that cache
if (!in1.getTempMode().isCached()) {
throw new CompilerException("No cache at point where static and dynamic parts meet.");
}
in1.setTempMode(in1.getTempMode().makeNonCached());
strategy = DriverStrategy.HYBRIDHASH_BUILD_FIRST_CACHED;
} else {
strategy = DriverStrategy.HYBRIDHASH_BUILD_FIRST;
}
return new DualInputPlanNode(node, "Join(" + node.getOperator().getName() + ")", in1, in2, strategy, this.keys1, this.keys2);
}
use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.
the class HashJoinBuildSecondProperties method instantiate.
@Override
public DualInputPlanNode instantiate(Channel in1, Channel in2, TwoInputNode node) {
DriverStrategy strategy;
if (!in2.isOnDynamicPath() && in1.isOnDynamicPath()) {
// sanity check that the first input is cached and remove that cache
if (!in2.getTempMode().isCached()) {
throw new CompilerException("No cache at point where static and dynamic parts meet.");
}
in2.setTempMode(in2.getTempMode().makeNonCached());
strategy = DriverStrategy.HYBRIDHASH_BUILD_SECOND_CACHED;
} else {
strategy = DriverStrategy.HYBRIDHASH_BUILD_SECOND;
}
return new DualInputPlanNode(node, "Join (" + node.getOperator().getName() + ")", in1, in2, strategy, this.keys1, this.keys2);
}
use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.
the class DataExchangeModeOpenBranchingTest method verifyBranchigPlan.
private void verifyBranchigPlan(ExecutionMode execMode, DataExchangeMode toMap, DataExchangeMode toFilter, DataExchangeMode toFilterSink, DataExchangeMode toJoin1, DataExchangeMode toJoin2, DataExchangeMode toJoinSink, DataExchangeMode toDirectSink) {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setExecutionMode(execMode);
DataSet<Tuple2<Long, Long>> data = env.generateSequence(1, 100000).map(new MapFunction<Long, Tuple2<Long, Long>>() {
@Override
public Tuple2<Long, Long> map(Long value) {
return new Tuple2<Long, Long>(value, value);
}
});
// output 1
data.filter(new FilterFunction<Tuple2<Long, Long>>() {
@Override
public boolean filter(Tuple2<Long, Long> value) {
return false;
}
}).output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("sink1");
// output 2 does a join before a join
data.join(env.fromElements(new Tuple2<Long, Long>(1L, 2L))).where(1).equalTo(0).output(new DiscardingOutputFormat<Tuple2<Tuple2<Long, Long>, Tuple2<Long, Long>>>()).name("sink2");
// output 3 is direct
data.output(new DiscardingOutputFormat<Tuple2<Long, Long>>()).name("sink3");
OptimizedPlan optPlan = compileNoStats(env.createProgramPlan());
SinkPlanNode filterSink = findSink(optPlan.getDataSinks(), "sink1");
SinkPlanNode joinSink = findSink(optPlan.getDataSinks(), "sink2");
SinkPlanNode directSink = findSink(optPlan.getDataSinks(), "sink3");
SingleInputPlanNode filterNode = (SingleInputPlanNode) filterSink.getPredecessor();
SingleInputPlanNode mapNode = (SingleInputPlanNode) filterNode.getPredecessor();
DualInputPlanNode joinNode = (DualInputPlanNode) joinSink.getPredecessor();
assertEquals(mapNode, joinNode.getInput1().getSource());
assertEquals(mapNode, directSink.getPredecessor());
assertEquals(toFilterSink, filterSink.getInput().getDataExchangeMode());
assertEquals(toJoinSink, joinSink.getInput().getDataExchangeMode());
assertEquals(toDirectSink, directSink.getInput().getDataExchangeMode());
assertEquals(toMap, mapNode.getInput().getDataExchangeMode());
assertEquals(toFilter, filterNode.getInput().getDataExchangeMode());
assertEquals(toJoin1, joinNode.getInput1().getDataExchangeMode());
assertEquals(toJoin2, joinNode.getInput2().getDataExchangeMode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.DualInputPlanNode in project flink by apache.
the class UnionClosedBranchingTest method testUnionClosedBranchingTest.
@Test
public void testUnionClosedBranchingTest() throws Exception {
// -----------------------------------------------------------------------------------------
// Build test program
// -----------------------------------------------------------------------------------------
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setExecutionMode(executionMode);
env.setParallelism(4);
DataSet<Tuple1<Integer>> src1 = env.fromElements(new Tuple1<>(0), new Tuple1<>(1));
DataSet<Tuple1<Integer>> src2 = env.fromElements(new Tuple1<>(0), new Tuple1<>(1));
DataSet<Tuple1<Integer>> union = src1.union(src2);
DataSet<Tuple2<Integer, Integer>> join = union.join(union).where(0).equalTo(0).projectFirst(0).projectSecond(0);
join.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
// -----------------------------------------------------------------------------------------
// Verify optimized plan
// -----------------------------------------------------------------------------------------
OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
SinkPlanNode sinkNode = optimizedPlan.getDataSinks().iterator().next();
DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
// Verify that the compiler correctly sets the expected data exchange modes.
for (Channel channel : joinNode.getInputs()) {
assertEquals("Unexpected data exchange mode between union and join node.", unionToJoin, channel.getDataExchangeMode());
assertEquals("Unexpected ship strategy between union and join node.", unionToJoinStrategy, channel.getShipStrategy());
}
for (SourcePlanNode src : optimizedPlan.getDataSources()) {
for (Channel channel : src.getOutgoingChannels()) {
assertEquals("Unexpected data exchange mode between source and union node.", sourceToUnion, channel.getDataExchangeMode());
assertEquals("Unexpected ship strategy between source and union node.", sourceToUnionStrategy, channel.getShipStrategy());
}
}
// -----------------------------------------------------------------------------------------
// Verify generated JobGraph
// -----------------------------------------------------------------------------------------
JobGraphGenerator jgg = new JobGraphGenerator();
JobGraph jobGraph = jgg.compileJobGraph(optimizedPlan);
List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
// Sanity check for the test setup
assertEquals("Unexpected number of vertices created.", 4, vertices.size());
// Verify all sources
JobVertex[] sources = new JobVertex[] { vertices.get(0), vertices.get(1) };
for (JobVertex src : sources) {
// Sanity check
assertTrue("Unexpected vertex type. Test setup is broken.", src.isInputVertex());
// The union is not translated to an extra union task, but the join uses a union
// input gate to read multiple inputs. The source create a single result per consumer.
assertEquals("Unexpected number of created results.", 2, src.getNumberOfProducedIntermediateDataSets());
for (IntermediateDataSet dataSet : src.getProducedDataSets()) {
ResultPartitionType dsType = dataSet.getResultType();
// Ensure batch exchange unless PIPELINED_FORCE is enabled.
if (!executionMode.equals(ExecutionMode.PIPELINED_FORCED)) {
assertTrue("Expected batch exchange, but result type is " + dsType + ".", dsType.isBlocking());
} else {
assertFalse("Expected non-batch exchange, but result type is " + dsType + ".", dsType.isBlocking());
}
}
}
}
Aggregations