use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class UnionClosedBranchingTest method testUnionClosedBranchingTest.
@Test
public void testUnionClosedBranchingTest() throws Exception {
// -----------------------------------------------------------------------------------------
// Build test program
// -----------------------------------------------------------------------------------------
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.getConfig().setExecutionMode(executionMode);
env.setParallelism(4);
DataSet<Tuple1<Integer>> src1 = env.fromElements(new Tuple1<>(0), new Tuple1<>(1));
DataSet<Tuple1<Integer>> src2 = env.fromElements(new Tuple1<>(0), new Tuple1<>(1));
DataSet<Tuple1<Integer>> union = src1.union(src2);
DataSet<Tuple2<Integer, Integer>> join = union.join(union).where(0).equalTo(0).projectFirst(0).projectSecond(0);
join.output(new DiscardingOutputFormat<Tuple2<Integer, Integer>>());
// -----------------------------------------------------------------------------------------
// Verify optimized plan
// -----------------------------------------------------------------------------------------
OptimizedPlan optimizedPlan = compileNoStats(env.createProgramPlan());
SinkPlanNode sinkNode = optimizedPlan.getDataSinks().iterator().next();
DualInputPlanNode joinNode = (DualInputPlanNode) sinkNode.getPredecessor();
// Verify that the compiler correctly sets the expected data exchange modes.
for (Channel channel : joinNode.getInputs()) {
assertEquals("Unexpected data exchange mode between union and join node.", unionToJoin, channel.getDataExchangeMode());
assertEquals("Unexpected ship strategy between union and join node.", unionToJoinStrategy, channel.getShipStrategy());
}
for (SourcePlanNode src : optimizedPlan.getDataSources()) {
for (Channel channel : src.getOutgoingChannels()) {
assertEquals("Unexpected data exchange mode between source and union node.", sourceToUnion, channel.getDataExchangeMode());
assertEquals("Unexpected ship strategy between source and union node.", sourceToUnionStrategy, channel.getShipStrategy());
}
}
// -----------------------------------------------------------------------------------------
// Verify generated JobGraph
// -----------------------------------------------------------------------------------------
JobGraphGenerator jgg = new JobGraphGenerator();
JobGraph jobGraph = jgg.compileJobGraph(optimizedPlan);
List<JobVertex> vertices = jobGraph.getVerticesSortedTopologicallyFromSources();
// Sanity check for the test setup
assertEquals("Unexpected number of vertices created.", 4, vertices.size());
// Verify all sources
JobVertex[] sources = new JobVertex[] { vertices.get(0), vertices.get(1) };
for (JobVertex src : sources) {
// Sanity check
assertTrue("Unexpected vertex type. Test setup is broken.", src.isInputVertex());
// The union is not translated to an extra union task, but the join uses a union
// input gate to read multiple inputs. The source create a single result per consumer.
assertEquals("Unexpected number of created results.", 2, src.getNumberOfProducedIntermediateDataSets());
for (IntermediateDataSet dataSet : src.getProducedDataSets()) {
ResultPartitionType dsType = dataSet.getResultType();
// Ensure batch exchange unless PIPELINED_FORCE is enabled.
if (!executionMode.equals(ExecutionMode.PIPELINED_FORCED)) {
assertTrue("Expected batch exchange, but result type is " + dsType + ".", dsType.isBlocking());
} else {
assertFalse("Expected non-batch exchange, but result type is " + dsType + ".", dsType.isBlocking());
}
}
}
}
use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class PartitionOperatorTest method testRangePartitionOperatorPreservesFields2.
@Test
public void testRangePartitionOperatorPreservesFields2() {
try {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
DataSet<Tuple2<Long, Long>> data = env.fromCollection(Collections.singleton(new Tuple2<>(0L, 0L)));
PartitionOperator<Tuple2<Long, Long>> rangePartitioned = data.partitionByRange(1);
rangePartitioned.groupBy(1).reduceGroup(new IdentityGroupReducerCombinable<Tuple2<Long, Long>>()).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
data.groupBy(0).aggregate(Aggregations.SUM, 1).map(new MapFunction<Tuple2<Long, Long>, Long>() {
@Override
public Long map(Tuple2<Long, Long> value) throws Exception {
return value.f1;
}
}).output(new DiscardingOutputFormat<Long>());
rangePartitioned.filter(new FilterFunction<Tuple2<Long, Long>>() {
@Override
public boolean filter(Tuple2<Long, Long> value) throws Exception {
return value.f0 % 2 == 0;
}
}).output(new DiscardingOutputFormat<Tuple2<Long, Long>>());
Plan p = env.createProgramPlan();
OptimizedPlan op = compileNoStats(p);
SinkPlanNode sink = op.getDataSinks().iterator().next();
SingleInputPlanNode reducer = (SingleInputPlanNode) sink.getInput().getSource();
SingleInputPlanNode partitionNode = (SingleInputPlanNode) reducer.getInput().getSource();
SingleInputPlanNode partitionIDRemover = (SingleInputPlanNode) partitionNode.getInput().getSource();
assertEquals(ShipStrategyType.FORWARD, reducer.getInput().getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, partitionNode.getInput().getShipStrategy());
assertEquals(ShipStrategyType.PARTITION_CUSTOM, partitionIDRemover.getInput().getShipStrategy());
SourcePlanNode sourcePlanNode = op.getDataSources().iterator().next();
List<Channel> sourceOutgoingChannels = sourcePlanNode.getOutgoingChannels();
assertEquals(3, sourceOutgoingChannels.size());
assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(0).getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(1).getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, sourceOutgoingChannels.get(2).getShipStrategy());
assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(0).getDataExchangeMode());
assertEquals(DataExchangeMode.PIPELINED, sourceOutgoingChannels.get(1).getDataExchangeMode());
assertEquals(DataExchangeMode.BATCH, sourceOutgoingChannels.get(2).getDataExchangeMode());
List<Channel> partitionOutputChannels = partitionNode.getOutgoingChannels();
assertEquals(2, partitionOutputChannels.size());
assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(0).getShipStrategy());
assertEquals(ShipStrategyType.FORWARD, partitionOutputChannels.get(1).getShipStrategy());
assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(0).getDataExchangeMode());
assertEquals(DataExchangeMode.PIPELINED, partitionOutputChannels.get(1).getDataExchangeMode());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class SingleInputNode method getAlternativePlans.
@Override
public List<PlanNode> getAlternativePlans(CostEstimator estimator) {
// check if we have a cached version
if (this.cachedPlans != null) {
return this.cachedPlans;
}
boolean childrenSkippedDueToReplicatedInput = false;
// calculate alternative sub-plans for predecessor
final List<? extends PlanNode> subPlans = getPredecessorNode().getAlternativePlans(estimator);
final Set<RequestedGlobalProperties> intGlobal = this.inConn.getInterestingProperties().getGlobalProperties();
// calculate alternative sub-plans for broadcast inputs
final List<Set<? extends NamedChannel>> broadcastPlanChannels = new ArrayList<Set<? extends NamedChannel>>();
List<DagConnection> broadcastConnections = getBroadcastConnections();
List<String> broadcastConnectionNames = getBroadcastConnectionNames();
for (int i = 0; i < broadcastConnections.size(); i++) {
DagConnection broadcastConnection = broadcastConnections.get(i);
String broadcastConnectionName = broadcastConnectionNames.get(i);
List<PlanNode> broadcastPlanCandidates = broadcastConnection.getSource().getAlternativePlans(estimator);
// wrap the plan candidates in named channels
HashSet<NamedChannel> broadcastChannels = new HashSet<NamedChannel>(broadcastPlanCandidates.size());
for (PlanNode plan : broadcastPlanCandidates) {
NamedChannel c = new NamedChannel(broadcastConnectionName, plan);
DataExchangeMode exMode = DataExchangeMode.select(broadcastConnection.getDataExchangeMode(), ShipStrategyType.BROADCAST, broadcastConnection.isBreakingPipeline());
c.setShipStrategy(ShipStrategyType.BROADCAST, exMode);
broadcastChannels.add(c);
}
broadcastPlanChannels.add(broadcastChannels);
}
final RequestedGlobalProperties[] allValidGlobals;
{
Set<RequestedGlobalProperties> pairs = new HashSet<RequestedGlobalProperties>();
for (OperatorDescriptorSingle ods : getPossibleProperties()) {
pairs.addAll(ods.getPossibleGlobalProperties());
}
allValidGlobals = pairs.toArray(new RequestedGlobalProperties[pairs.size()]);
}
final ArrayList<PlanNode> outputPlans = new ArrayList<PlanNode>();
final ExecutionMode executionMode = this.inConn.getDataExchangeMode();
final int parallelism = getParallelism();
final int inParallelism = getPredecessorNode().getParallelism();
final boolean parallelismChange = inParallelism != parallelism;
final boolean breaksPipeline = this.inConn.isBreakingPipeline();
// create all candidates
for (PlanNode child : subPlans) {
if (child.getGlobalProperties().isFullyReplicated()) {
// fully replicated input is always locally forwarded if the parallelism is not changed
if (parallelismChange) {
// can not continue with this child
childrenSkippedDueToReplicatedInput = true;
continue;
} else {
this.inConn.setShipStrategy(ShipStrategyType.FORWARD);
}
}
if (this.inConn.getShipStrategy() == null) {
// pick the strategy ourselves
for (RequestedGlobalProperties igps : intGlobal) {
final Channel c = new Channel(child, this.inConn.getMaterializationMode());
igps.parameterizeChannel(c, parallelismChange, executionMode, breaksPipeline);
// ship strategy preserves/establishes them even under changing parallelisms
if (parallelismChange && !c.getShipStrategy().isNetworkStrategy()) {
c.getGlobalProperties().reset();
}
// requested properties
for (RequestedGlobalProperties rgps : allValidGlobals) {
if (rgps.isMetBy(c.getGlobalProperties())) {
c.setRequiredGlobalProps(rgps);
addLocalCandidates(c, broadcastPlanChannels, igps, outputPlans, estimator);
break;
}
}
}
} else {
// hint fixed the strategy
final Channel c = new Channel(child, this.inConn.getMaterializationMode());
final ShipStrategyType shipStrategy = this.inConn.getShipStrategy();
final DataExchangeMode exMode = DataExchangeMode.select(executionMode, shipStrategy, breaksPipeline);
if (this.keys != null) {
c.setShipStrategy(shipStrategy, this.keys.toFieldList(), exMode);
} else {
c.setShipStrategy(shipStrategy, exMode);
}
if (parallelismChange) {
c.adjustGlobalPropertiesForFullParallelismChange();
}
// check whether we meet any of the accepted properties
for (RequestedGlobalProperties rgps : allValidGlobals) {
if (rgps.isMetBy(c.getGlobalProperties())) {
addLocalCandidates(c, broadcastPlanChannels, rgps, outputPlans, estimator);
break;
}
}
}
}
if (outputPlans.isEmpty()) {
if (childrenSkippedDueToReplicatedInput) {
throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Invalid use of replicated input.");
} else {
throw new CompilerException("No plan meeting the requirements could be created @ " + this + ". Most likely reason: Too restrictive plan hints.");
}
}
// cost and prune the plans
for (PlanNode node : outputPlans) {
estimator.costOperator(node);
}
prunePlanAlternatives(outputPlans);
outputPlans.trimToSize();
this.cachedPlans = outputPlans;
return outputPlans;
}
use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class BulkIterationNode method instantiateCandidate.
@SuppressWarnings("unchecked")
@Override
protected void instantiateCandidate(OperatorDescriptorSingle dps, Channel in, List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator, RequestedGlobalProperties globPropsReq, RequestedLocalProperties locPropsReq) {
// NOTES ON THE ENUMERATION OF THE STEP FUNCTION PLANS:
// Whenever we instantiate the iteration, we enumerate new candidates for the step function.
// That way, we make sure we have an appropriate plan for each candidate for the initial partial solution,
// we have a fitting candidate for the step function (often, work is pushed out of the step function).
// Among the candidates of the step function, we keep only those that meet the requested properties of the
// current candidate initial partial solution. That makes sure these properties exist at the beginning of
// the successive iteration.
// 1) Because we enumerate multiple times, we may need to clean the cached plans
// before starting another enumeration
this.nextPartialSolution.accept(PlanCacheCleaner.INSTANCE);
if (this.terminationCriterion != null) {
this.terminationCriterion.accept(PlanCacheCleaner.INSTANCE);
}
// 2) Give the partial solution the properties of the current candidate for the initial partial solution
this.partialSolution.setCandidateProperties(in.getGlobalProperties(), in.getLocalProperties(), in);
final BulkPartialSolutionPlanNode pspn = this.partialSolution.getCurrentPartialSolutionPlanNode();
// 3) Get the alternative plans
List<PlanNode> candidates = this.nextPartialSolution.getAlternativePlans(estimator);
// 4) Make sure that the beginning of the step function does not assume properties that
// are not also produced by the end of the step function.
{
List<PlanNode> newCandidates = new ArrayList<PlanNode>();
for (Iterator<PlanNode> planDeleter = candidates.iterator(); planDeleter.hasNext(); ) {
PlanNode candidate = planDeleter.next();
GlobalProperties atEndGlobal = candidate.getGlobalProperties();
LocalProperties atEndLocal = candidate.getLocalProperties();
FeedbackPropertiesMeetRequirementsReport report = candidate.checkPartialSolutionPropertiesMet(pspn, atEndGlobal, atEndLocal);
if (report == FeedbackPropertiesMeetRequirementsReport.NO_PARTIAL_SOLUTION) {
// depends only through broadcast variable on the partial solution
} else if (report == FeedbackPropertiesMeetRequirementsReport.NOT_MET) {
// attach a no-op node through which we create the properties of the original input
Channel toNoOp = new Channel(candidate);
globPropsReq.parameterizeChannel(toNoOp, false, rootConnection.getDataExchangeMode(), false);
locPropsReq.parameterizeChannel(toNoOp);
NoOpUnaryUdfOp noOpUnaryUdfOp = new NoOpUnaryUdfOp<>();
noOpUnaryUdfOp.setInput(candidate.getProgramOperator());
UnaryOperatorNode rebuildPropertiesNode = new UnaryOperatorNode("Rebuild Partial Solution Properties", noOpUnaryUdfOp, true);
rebuildPropertiesNode.setParallelism(candidate.getParallelism());
SingleInputPlanNode rebuildPropertiesPlanNode = new SingleInputPlanNode(rebuildPropertiesNode, "Rebuild Partial Solution Properties", toNoOp, DriverStrategy.UNARY_NO_OP);
rebuildPropertiesPlanNode.initProperties(toNoOp.getGlobalProperties(), toNoOp.getLocalProperties());
estimator.costOperator(rebuildPropertiesPlanNode);
GlobalProperties atEndGlobalModified = rebuildPropertiesPlanNode.getGlobalProperties();
LocalProperties atEndLocalModified = rebuildPropertiesPlanNode.getLocalProperties();
if (!(atEndGlobalModified.equals(atEndGlobal) && atEndLocalModified.equals(atEndLocal))) {
FeedbackPropertiesMeetRequirementsReport report2 = candidate.checkPartialSolutionPropertiesMet(pspn, atEndGlobalModified, atEndLocalModified);
if (report2 != FeedbackPropertiesMeetRequirementsReport.NOT_MET) {
newCandidates.add(rebuildPropertiesPlanNode);
}
}
planDeleter.remove();
}
}
candidates.addAll(newCandidates);
}
if (candidates.isEmpty()) {
return;
}
// 5) Create a candidate for the Iteration Node for every remaining plan of the step function.
if (terminationCriterion == null) {
for (PlanNode candidate : candidates) {
BulkIterationPlanNode node = new BulkIterationPlanNode(this, this.getOperator().getName(), in, pspn, candidate);
GlobalProperties gProps = candidate.getGlobalProperties().clone();
LocalProperties lProps = candidate.getLocalProperties().clone();
node.initProperties(gProps, lProps);
target.add(node);
}
} else if (candidates.size() > 0) {
List<PlanNode> terminationCriterionCandidates = this.terminationCriterion.getAlternativePlans(estimator);
SingleRootJoiner singleRoot = (SingleRootJoiner) this.singleRoot;
for (PlanNode candidate : candidates) {
for (PlanNode terminationCandidate : terminationCriterionCandidates) {
if (singleRoot.areBranchCompatible(candidate, terminationCandidate)) {
BulkIterationPlanNode node = new BulkIterationPlanNode(this, "BulkIteration (" + this.getOperator().getName() + ")", in, pspn, candidate, terminationCandidate);
GlobalProperties gProps = candidate.getGlobalProperties().clone();
LocalProperties lProps = candidate.getLocalProperties().clone();
node.initProperties(gProps, lProps);
target.add(node);
}
}
}
}
}
use of org.apache.flink.optimizer.plan.Channel in project flink by apache.
the class FeedbackPropertiesMatchTest method testSingleInputOperatorsChainOfThree.
@Test
public void testSingleInputOperatorsChainOfThree() {
try {
SourcePlanNode target = new SourcePlanNode(getSourceNode(), "Source");
Channel toMap1 = new Channel(target);
SingleInputPlanNode map1 = new SingleInputPlanNode(getMapNode(), "Mapper 1", toMap1, DriverStrategy.MAP);
Channel toMap2 = new Channel(map1);
SingleInputPlanNode map2 = new SingleInputPlanNode(getMapNode(), "Mapper 2", toMap2, DriverStrategy.MAP);
Channel toMap3 = new Channel(map2);
SingleInputPlanNode map3 = new SingleInputPlanNode(getMapNode(), "Mapper 3", toMap3, DriverStrategy.MAP);
// set local strategy in first channel, so later non matching local properties do not matter
{
GlobalProperties gp = new GlobalProperties();
LocalProperties lp = LocalProperties.forOrdering(new Ordering(3, null, Order.ASCENDING).appendOrdering(1, null, Order.DESCENDING));
RequestedLocalProperties reqLp = new RequestedLocalProperties();
reqLp.setGroupedFields(new FieldList(4, 1));
toMap1.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
toMap1.setLocalStrategy(LocalStrategy.SORT, new FieldList(5, 7), new boolean[] { false, false });
toMap2.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
toMap2.setLocalStrategy(LocalStrategy.NONE);
toMap3.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
toMap3.setLocalStrategy(LocalStrategy.NONE);
toMap1.setRequiredGlobalProps(null);
toMap1.setRequiredLocalProps(null);
toMap2.setRequiredGlobalProps(null);
toMap2.setRequiredLocalProps(null);
toMap3.setRequiredGlobalProps(null);
toMap3.setRequiredLocalProps(reqLp);
FeedbackPropertiesMeetRequirementsReport report = map3.checkPartialSolutionPropertiesMet(target, gp, lp);
assertTrue(report != null && report != NO_PARTIAL_SOLUTION && report != NOT_MET);
}
// set global strategy in first channel, so later non matching global properties do not matter
{
GlobalProperties gp = new GlobalProperties();
gp.setHashPartitioned(new FieldList(5, 3));
LocalProperties lp = LocalProperties.EMPTY;
RequestedGlobalProperties reqGp = new RequestedGlobalProperties();
reqGp.setAnyPartitioning(new FieldSet(2, 3));
toMap1.setShipStrategy(ShipStrategyType.PARTITION_HASH, new FieldList(1, 2), DataExchangeMode.PIPELINED);
toMap1.setLocalStrategy(LocalStrategy.NONE);
toMap2.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
toMap2.setLocalStrategy(LocalStrategy.NONE);
toMap3.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
toMap3.setLocalStrategy(LocalStrategy.NONE);
toMap1.setRequiredGlobalProps(null);
toMap1.setRequiredLocalProps(null);
toMap2.setRequiredGlobalProps(null);
toMap2.setRequiredLocalProps(null);
toMap3.setRequiredGlobalProps(reqGp);
toMap3.setRequiredLocalProps(null);
FeedbackPropertiesMeetRequirementsReport report = map3.checkPartialSolutionPropertiesMet(target, gp, lp);
assertTrue(report != null && report != NO_PARTIAL_SOLUTION && report != NOT_MET);
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
Aggregations