use of org.apache.flink.optimizer.plan.NamedChannel in project flink by apache.
the class TwoInputNode method instantiate.
protected void instantiate(OperatorDescriptorDual operator, Channel in1, Channel in2, List<Set<? extends NamedChannel>> broadcastPlanChannels, List<PlanNode> target, CostEstimator estimator, RequestedGlobalProperties globPropsReq1, RequestedGlobalProperties globPropsReq2, RequestedLocalProperties locPropsReq1, RequestedLocalProperties locPropsReq2) {
final PlanNode inputSource1 = in1.getSource();
final PlanNode inputSource2 = in2.getSource();
for (List<NamedChannel> broadcastChannelsCombination : Sets.cartesianProduct(broadcastPlanChannels)) {
boolean validCombination = true;
// check whether the broadcast inputs use the same plan candidate at the branching point
for (int i = 0; i < broadcastChannelsCombination.size(); i++) {
NamedChannel nc = broadcastChannelsCombination.get(i);
PlanNode bcSource = nc.getSource();
if (!(areBranchCompatible(bcSource, inputSource1) || areBranchCompatible(bcSource, inputSource2))) {
validCombination = false;
break;
}
// check branch compatibility against all other broadcast variables
for (int k = 0; k < i; k++) {
PlanNode otherBcSource = broadcastChannelsCombination.get(k).getSource();
if (!areBranchCompatible(bcSource, otherBcSource)) {
validCombination = false;
break;
}
}
}
if (!validCombination) {
continue;
}
placePipelineBreakersIfNecessary(operator.getStrategy(), in1, in2);
DualInputPlanNode node = operator.instantiate(in1, in2, this);
node.setBroadcastInputs(broadcastChannelsCombination);
SemanticProperties semPropsGlobalPropFiltering = getSemanticPropertiesForGlobalPropertyFiltering();
GlobalProperties gp1 = in1.getGlobalProperties().clone().filterBySemanticProperties(semPropsGlobalPropFiltering, 0);
GlobalProperties gp2 = in2.getGlobalProperties().clone().filterBySemanticProperties(semPropsGlobalPropFiltering, 1);
GlobalProperties combined = operator.computeGlobalProperties(gp1, gp2);
SemanticProperties semPropsLocalPropFiltering = getSemanticPropertiesForLocalPropertyFiltering();
LocalProperties lp1 = in1.getLocalProperties().clone().filterBySemanticProperties(semPropsLocalPropFiltering, 0);
LocalProperties lp2 = in2.getLocalProperties().clone().filterBySemanticProperties(semPropsLocalPropFiltering, 1);
LocalProperties locals = operator.computeLocalProperties(lp1, lp2);
node.initProperties(combined, locals);
node.updatePropertiesWithUniqueSets(getUniqueFields());
target.add(node);
}
}
use of org.apache.flink.optimizer.plan.NamedChannel in project flink by apache.
the class TwoInputNode method addLocalCandidates.
protected void addLocalCandidates(Channel template1, Channel template2, List<Set<? extends NamedChannel>> broadcastPlanChannels, RequestedGlobalProperties rgps1, RequestedGlobalProperties rgps2, List<PlanNode> target, LocalPropertiesPair[] validLocalCombinations, CostEstimator estimator) {
for (RequestedLocalProperties ilp1 : this.input1.getInterestingProperties().getLocalProperties()) {
final Channel in1 = template1.clone();
ilp1.parameterizeChannel(in1);
for (RequestedLocalProperties ilp2 : this.input2.getInterestingProperties().getLocalProperties()) {
final Channel in2 = template2.clone();
ilp2.parameterizeChannel(in2);
for (OperatorDescriptorDual dps : getProperties()) {
for (LocalPropertiesPair lpp : dps.getPossibleLocalProperties()) {
if (lpp.getProperties1().isMetBy(in1.getLocalProperties()) && lpp.getProperties2().isMetBy(in2.getLocalProperties())) {
// sort order
if (dps.areCoFulfilled(lpp.getProperties1(), lpp.getProperties2(), in1.getLocalProperties(), in2.getLocalProperties())) {
// copy, because setting required properties and instantiation may
// change the channels and should not affect prior candidates
Channel in1Copy = in1.clone();
in1Copy.setRequiredLocalProps(lpp.getProperties1());
Channel in2Copy = in2.clone();
in2Copy.setRequiredLocalProps(lpp.getProperties2());
// all right, co compatible
instantiate(dps, in1Copy, in2Copy, broadcastPlanChannels, target, estimator, rgps1, rgps2, ilp1, ilp2);
break;
}
// else cannot use this pair, fall through the loop and try the next one
}
}
}
}
}
}
use of org.apache.flink.optimizer.plan.NamedChannel in project flink by apache.
the class SingleInputNode method addLocalCandidates.
protected void addLocalCandidates(Channel template, List<Set<? extends NamedChannel>> broadcastPlanChannels, RequestedGlobalProperties rgps, List<PlanNode> target, CostEstimator estimator) {
for (RequestedLocalProperties ilp : this.inConn.getInterestingProperties().getLocalProperties()) {
final Channel in = template.clone();
ilp.parameterizeChannel(in);
// property set
outer: for (OperatorDescriptorSingle dps : getPossibleProperties()) {
for (RequestedLocalProperties ilps : dps.getPossibleLocalProperties()) {
if (ilps.isMetBy(in.getLocalProperties())) {
in.setRequiredLocalProps(ilps);
instantiateCandidate(dps, in, broadcastPlanChannels, target, estimator, rgps, ilp);
break outer;
}
}
}
}
}
use of org.apache.flink.optimizer.plan.NamedChannel in project flink by apache.
the class JobGraphGenerator method postVisit.
/**
* This method implements the post-visit during the depth-first traversal. When the post visit
* happens, all of the descendants have been processed, so this method connects all of the
* current node's predecessors to the current node.
*
* @param node The node currently processed during the post-visit.
* @see org.apache.flink.util.Visitor#postVisit(org.apache.flink.util.Visitable) t
*/
@Override
public void postVisit(PlanNode node) {
try {
// iteration node is in its postVisit
if (node instanceof SourcePlanNode || node instanceof NAryUnionPlanNode || node instanceof SolutionSetPlanNode) {
return;
}
// predecessor and return
if (checkAndConfigurePersistentIntermediateResult(node)) {
return;
}
// check if we have an iteration. in that case, translate the step function now
if (node instanceof IterationPlanNode) {
// prevent nested iterations
if (node.isOnDynamicPath()) {
throw new CompilerException("Nested Iterations are not possible at the moment!");
}
// another one), we push the current one onto the stack
if (this.currentIteration != null) {
this.iterationStack.add(this.currentIteration);
}
this.currentIteration = (IterationPlanNode) node;
this.currentIteration.acceptForStepFunction(this);
// pop the current iteration from the stack
if (this.iterationStack.isEmpty()) {
this.currentIteration = null;
} else {
this.currentIteration = this.iterationStack.remove(this.iterationStack.size() - 1);
}
// connect the initial solution set now.
if (node instanceof WorksetIterationPlanNode) {
// connect the initial solution set
WorksetIterationPlanNode wsNode = (WorksetIterationPlanNode) node;
JobVertex headVertex = this.iterations.get(wsNode).getHeadTask();
TaskConfig headConfig = new TaskConfig(headVertex.getConfiguration());
int inputIndex = headConfig.getDriverStrategy().getNumInputs();
headConfig.setIterationHeadSolutionSetInputIndex(inputIndex);
translateChannel(wsNode.getInitialSolutionSetInput(), inputIndex, headVertex, headConfig, false);
}
return;
}
final JobVertex targetVertex = this.vertices.get(node);
// check whether this node has its own task, or is merged with another one
if (targetVertex == null) {
// node's task is merged with another task. it is either chained, of a merged head
// vertex
// from an iteration
final TaskInChain chainedTask;
if ((chainedTask = this.chainedTasks.get(node)) != null) {
// Chained Task. Sanity check first...
final Iterator<Channel> inConns = node.getInputs().iterator();
if (!inConns.hasNext()) {
throw new CompilerException("Bug: Found chained task with no input.");
}
final Channel inConn = inConns.next();
if (inConns.hasNext()) {
throw new CompilerException("Bug: Found a chained task with more than one input!");
}
if (inConn.getLocalStrategy() != null && inConn.getLocalStrategy() != LocalStrategy.NONE) {
throw new CompilerException("Bug: Found a chained task with an input local strategy.");
}
if (inConn.getShipStrategy() != null && inConn.getShipStrategy() != ShipStrategyType.FORWARD) {
throw new CompilerException("Bug: Found a chained task with an input ship strategy other than FORWARD.");
}
JobVertex container = chainedTask.getContainingVertex();
if (container == null) {
final PlanNode sourceNode = inConn.getSource();
container = this.vertices.get(sourceNode);
if (container == null) {
// predecessor is itself chained
container = this.chainedTasks.get(sourceNode).getContainingVertex();
if (container == null) {
throw new IllegalStateException("Bug: Chained task predecessor has not been assigned its containing vertex.");
}
} else {
// predecessor is a proper task job vertex and this is the first chained
// task. add a forward connection entry.
new TaskConfig(container.getConfiguration()).addOutputShipStrategy(ShipStrategyType.FORWARD);
}
chainedTask.setContainingVertex(container);
}
// add info about the input serializer type
chainedTask.getTaskConfig().setInputSerializer(inConn.getSerializer(), 0);
// update name of container task
String containerTaskName = container.getName();
if (containerTaskName.startsWith("CHAIN ")) {
container.setName(containerTaskName + " -> " + chainedTask.getTaskName());
} else {
container.setName("CHAIN " + containerTaskName + " -> " + chainedTask.getTaskName());
}
// update resource of container task
container.setResources(container.getMinResources().merge(node.getMinResources()), container.getPreferredResources().merge(node.getPreferredResources()));
this.chainedTasksInSequence.add(chainedTask);
return;
} else if (node instanceof BulkPartialSolutionPlanNode || node instanceof WorksetPlanNode) {
// care of it
return;
} else {
throw new CompilerException("Bug: Unrecognized merged task vertex.");
}
}
if (this.currentIteration != null) {
JobVertex head = this.iterations.get(this.currentIteration).getHeadTask();
// their execution determines the deployment slots of the co-location group
if (node.isOnDynamicPath()) {
targetVertex.setStrictlyCoLocatedWith(head);
}
}
// create the config that will contain all the description of the inputs
final TaskConfig targetVertexConfig = new TaskConfig(targetVertex.getConfiguration());
// get the inputs. if this node is the head of an iteration, we obtain the inputs from
// the
// enclosing iteration node, because the inputs are the initial inputs to the iteration.
final Iterator<Channel> inConns;
if (node instanceof BulkPartialSolutionPlanNode) {
inConns = ((BulkPartialSolutionPlanNode) node).getContainingIterationNode().getInputs().iterator();
// because the partial solution has its own vertex, is has only one (logical) input.
// note this in the task configuration
targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
} else if (node instanceof WorksetPlanNode) {
WorksetPlanNode wspn = (WorksetPlanNode) node;
// input that is the initial workset
inConns = Collections.singleton(wspn.getContainingIterationNode().getInput2()).iterator();
// because we have a stand-alone (non-merged) workset iteration head, the initial
// workset will
// be input 0 and the solution set will be input 1
targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(0);
targetVertexConfig.setIterationHeadSolutionSetInputIndex(1);
} else {
inConns = node.getInputs().iterator();
}
if (!inConns.hasNext()) {
throw new CompilerException("Bug: Found a non-source task with no input.");
}
int inputIndex = 0;
while (inConns.hasNext()) {
Channel input = inConns.next();
inputIndex += translateChannel(input, inputIndex, targetVertex, targetVertexConfig, false);
}
// broadcast variables
int broadcastInputIndex = 0;
for (NamedChannel broadcastInput : node.getBroadcastInputs()) {
int broadcastInputIndexDelta = translateChannel(broadcastInput, broadcastInputIndex, targetVertex, targetVertexConfig, true);
targetVertexConfig.setBroadcastInputName(broadcastInput.getName(), broadcastInputIndex);
targetVertexConfig.setBroadcastInputSerializer(broadcastInput.getSerializer(), broadcastInputIndex);
broadcastInputIndex += broadcastInputIndexDelta;
}
} catch (Exception e) {
throw new CompilerException("An error occurred while translating the optimized plan to a JobGraph: " + e.getMessage(), e);
}
}
use of org.apache.flink.optimizer.plan.NamedChannel in project flink by apache.
the class RangePartitionRewriter method rewriteRangePartitionChannel.
private List<Channel> rewriteRangePartitionChannel(Channel channel) {
final List<Channel> sourceNewOutputChannels = new ArrayList<>();
final PlanNode sourceNode = channel.getSource();
final PlanNode targetNode = channel.getTarget();
final int sourceParallelism = sourceNode.getParallelism();
final int targetParallelism = targetNode.getParallelism();
final Costs defaultZeroCosts = new Costs(0, 0, 0);
final TypeComparatorFactory<?> comparator = Utils.getShipComparator(channel, this.plan.getOriginalPlan().getExecutionConfig());
// 1. Fixed size sample in each partitions.
final int sampleSize = SAMPLES_PER_PARTITION * targetParallelism;
final SampleInPartition sampleInPartition = new SampleInPartition(false, sampleSize, SEED);
final TypeInformation<?> sourceOutputType = sourceNode.getOptimizerNode().getOperator().getOperatorInfo().getOutputType();
final TypeInformation<IntermediateSampleData> isdTypeInformation = TypeExtractor.getForClass(IntermediateSampleData.class);
final UnaryOperatorInformation sipOperatorInformation = new UnaryOperatorInformation(sourceOutputType, isdTypeInformation);
final MapPartitionOperatorBase sipOperatorBase = new MapPartitionOperatorBase(sampleInPartition, sipOperatorInformation, SIP_NAME);
final MapPartitionNode sipNode = new MapPartitionNode(sipOperatorBase);
final Channel sipChannel = new Channel(sourceNode, TempMode.NONE);
sipChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode sipPlanNode = new SingleInputPlanNode(sipNode, SIP_NAME, sipChannel, DriverStrategy.MAP_PARTITION);
sipNode.setParallelism(sourceParallelism);
sipPlanNode.setParallelism(sourceParallelism);
sipPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
sipPlanNode.setCosts(defaultZeroCosts);
sipChannel.setTarget(sipPlanNode);
this.plan.getAllNodes().add(sipPlanNode);
sourceNewOutputChannels.add(sipChannel);
// 2. Fixed size sample in a single coordinator.
final SampleInCoordinator sampleInCoordinator = new SampleInCoordinator(false, sampleSize, SEED);
final UnaryOperatorInformation sicOperatorInformation = new UnaryOperatorInformation(isdTypeInformation, sourceOutputType);
final GroupReduceOperatorBase sicOperatorBase = new GroupReduceOperatorBase(sampleInCoordinator, sicOperatorInformation, SIC_NAME);
final GroupReduceNode sicNode = new GroupReduceNode(sicOperatorBase);
final Channel sicChannel = new Channel(sipPlanNode, TempMode.NONE);
sicChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode sicPlanNode = new SingleInputPlanNode(sicNode, SIC_NAME, sicChannel, DriverStrategy.ALL_GROUP_REDUCE);
sicNode.setParallelism(1);
sicPlanNode.setParallelism(1);
sicPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
sicPlanNode.setCosts(defaultZeroCosts);
sicChannel.setTarget(sicPlanNode);
sipPlanNode.addOutgoingChannel(sicChannel);
this.plan.getAllNodes().add(sicPlanNode);
// 3. Use sampled data to build range boundaries.
final RangeBoundaryBuilder rangeBoundaryBuilder = new RangeBoundaryBuilder(comparator, targetParallelism);
final TypeInformation<CommonRangeBoundaries> rbTypeInformation = TypeExtractor.getForClass(CommonRangeBoundaries.class);
final UnaryOperatorInformation rbOperatorInformation = new UnaryOperatorInformation(sourceOutputType, rbTypeInformation);
final MapPartitionOperatorBase rbOperatorBase = new MapPartitionOperatorBase(rangeBoundaryBuilder, rbOperatorInformation, RB_NAME);
final MapPartitionNode rbNode = new MapPartitionNode(rbOperatorBase);
final Channel rbChannel = new Channel(sicPlanNode, TempMode.NONE);
rbChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
final SingleInputPlanNode rbPlanNode = new SingleInputPlanNode(rbNode, RB_NAME, rbChannel, DriverStrategy.MAP_PARTITION);
rbNode.setParallelism(1);
rbPlanNode.setParallelism(1);
rbPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
rbPlanNode.setCosts(defaultZeroCosts);
rbChannel.setTarget(rbPlanNode);
sicPlanNode.addOutgoingChannel(rbChannel);
this.plan.getAllNodes().add(rbPlanNode);
// 4. Take range boundaries as broadcast input and take the tuple of partition id and record
// as output.
final AssignRangeIndex assignRangeIndex = new AssignRangeIndex(comparator);
final TypeInformation<Tuple2> ariOutputTypeInformation = new TupleTypeInfo<>(BasicTypeInfo.INT_TYPE_INFO, sourceOutputType);
final UnaryOperatorInformation ariOperatorInformation = new UnaryOperatorInformation(sourceOutputType, ariOutputTypeInformation);
final MapPartitionOperatorBase ariOperatorBase = new MapPartitionOperatorBase(assignRangeIndex, ariOperatorInformation, ARI_NAME);
final MapPartitionNode ariNode = new MapPartitionNode(ariOperatorBase);
final Channel ariChannel = new Channel(sourceNode, TempMode.NONE);
// To avoid deadlock, set the DataExchangeMode of channel between source node and this to
// Batch.
ariChannel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.BATCH);
final SingleInputPlanNode ariPlanNode = new SingleInputPlanNode(ariNode, ARI_NAME, ariChannel, DriverStrategy.MAP_PARTITION);
ariNode.setParallelism(sourceParallelism);
ariPlanNode.setParallelism(sourceParallelism);
ariPlanNode.initProperties(new GlobalProperties(), new LocalProperties());
ariPlanNode.setCosts(defaultZeroCosts);
ariChannel.setTarget(ariPlanNode);
this.plan.getAllNodes().add(ariPlanNode);
sourceNewOutputChannels.add(ariChannel);
final NamedChannel broadcastChannel = new NamedChannel("RangeBoundaries", rbPlanNode);
broadcastChannel.setShipStrategy(ShipStrategyType.BROADCAST, DataExchangeMode.PIPELINED);
broadcastChannel.setTarget(ariPlanNode);
List<NamedChannel> broadcastChannels = new ArrayList<>(1);
broadcastChannels.add(broadcastChannel);
ariPlanNode.setBroadcastInputs(broadcastChannels);
// 5. Remove the partition id.
final Channel partChannel = new Channel(ariPlanNode, TempMode.NONE);
final FieldList keys = new FieldList(0);
partChannel.setShipStrategy(ShipStrategyType.PARTITION_CUSTOM, keys, idPartitioner, DataExchangeMode.PIPELINED);
ariPlanNode.addOutgoingChannel(partChannel);
final RemoveRangeIndex partitionIDRemoveWrapper = new RemoveRangeIndex();
final UnaryOperatorInformation prOperatorInformation = new UnaryOperatorInformation(ariOutputTypeInformation, sourceOutputType);
final MapOperatorBase prOperatorBase = new MapOperatorBase(partitionIDRemoveWrapper, prOperatorInformation, PR_NAME);
final MapNode prRemoverNode = new MapNode(prOperatorBase);
final SingleInputPlanNode prPlanNode = new SingleInputPlanNode(prRemoverNode, PR_NAME, partChannel, DriverStrategy.MAP);
partChannel.setTarget(prPlanNode);
prRemoverNode.setParallelism(targetParallelism);
prPlanNode.setParallelism(targetParallelism);
GlobalProperties globalProperties = new GlobalProperties();
globalProperties.setRangePartitioned(new Ordering(0, null, Order.ASCENDING));
prPlanNode.initProperties(globalProperties, new LocalProperties());
prPlanNode.setCosts(defaultZeroCosts);
this.plan.getAllNodes().add(prPlanNode);
// 6. Connect to target node.
channel.setSource(prPlanNode);
channel.setShipStrategy(ShipStrategyType.FORWARD, DataExchangeMode.PIPELINED);
prPlanNode.addOutgoingChannel(channel);
return sourceNewOutputChannels;
}
Aggregations