use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.
the class JobGraphGenerator method finalizeBulkIteration.
private void finalizeBulkIteration(IterationDescriptor descr) {
final BulkIterationPlanNode bulkNode = (BulkIterationPlanNode) descr.getIterationNode();
final JobVertex headVertex = descr.getHeadTask();
final TaskConfig headConfig = new TaskConfig(headVertex.getConfiguration());
final TaskConfig headFinalOutputConfig = descr.getHeadFinalResultConfig();
// ------------ finalize the head config with the final outputs and the sync gate
// ------------
final int numStepFunctionOuts = headConfig.getNumOutputs();
final int numFinalOuts = headFinalOutputConfig.getNumOutputs();
if (numStepFunctionOuts == 0) {
throw new CompilerException("The iteration has no operation inside the step function.");
}
headConfig.setIterationHeadFinalOutputConfig(headFinalOutputConfig);
headConfig.setIterationHeadIndexOfSyncOutput(numStepFunctionOuts + numFinalOuts);
final double relativeMemForBackChannel = bulkNode.getRelativeMemoryPerSubTask();
if (relativeMemForBackChannel <= 0) {
throw new CompilerException("Bug: No memory has been assigned to the iteration back channel.");
}
headConfig.setRelativeBackChannelMemory(relativeMemForBackChannel);
// --------------------------- create the sync task ---------------------------
final JobVertex sync = new JobVertex("Sync (" + bulkNode.getNodeName() + ")");
sync.setResources(bulkNode.getMinResources(), bulkNode.getPreferredResources());
sync.setInvokableClass(IterationSynchronizationSinkTask.class);
sync.setParallelism(1);
sync.setMaxParallelism(1);
this.auxVertices.add(sync);
final TaskConfig syncConfig = new TaskConfig(sync.getConfiguration());
syncConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(0, headVertex.getParallelism());
// set the number of iteration / convergence criterion for the sync
final int maxNumIterations = bulkNode.getIterationNode().getIterationContract().getMaximumNumberOfIterations();
if (maxNumIterations < 1) {
throw new CompilerException("Cannot create bulk iteration with unspecified maximum number of iterations.");
}
syncConfig.setNumberOfIterations(maxNumIterations);
// connect the sync task
sync.connectNewDataSetAsInput(headVertex, DistributionPattern.POINTWISE, ResultPartitionType.PIPELINED);
// ----------------------------- create the iteration tail ------------------------------
final PlanNode rootOfTerminationCriterion = bulkNode.getRootOfTerminationCriterion();
final PlanNode rootOfStepFunction = bulkNode.getRootOfStepFunction();
final TaskConfig tailConfig;
JobVertex rootOfStepFunctionVertex = this.vertices.get(rootOfStepFunction);
if (rootOfStepFunctionVertex == null) {
// last op is chained
final TaskInChain taskInChain = this.chainedTasks.get(rootOfStepFunction);
if (taskInChain == null) {
throw new CompilerException("Bug: Tail of step function not found as vertex or chained task.");
}
rootOfStepFunctionVertex = taskInChain.getContainingVertex();
// the fake channel is statically typed to pact record. no data is sent over this
// channel anyways.
tailConfig = taskInChain.getTaskConfig();
} else {
tailConfig = new TaskConfig(rootOfStepFunctionVertex.getConfiguration());
}
tailConfig.setIsWorksetUpdate();
// No following termination criterion
if (rootOfStepFunction.getOutgoingChannels().isEmpty()) {
rootOfStepFunctionVertex.setInvokableClass(IterationTailTask.class);
tailConfig.setOutputSerializer(bulkNode.getSerializerForIterationChannel());
}
// create the fake output task for termination criterion, if needed
final TaskConfig tailConfigOfTerminationCriterion;
// If we have a termination criterion and it is not an intermediate node
if (rootOfTerminationCriterion != null && rootOfTerminationCriterion.getOutgoingChannels().isEmpty()) {
JobVertex rootOfTerminationCriterionVertex = this.vertices.get(rootOfTerminationCriterion);
if (rootOfTerminationCriterionVertex == null) {
// last op is chained
final TaskInChain taskInChain = this.chainedTasks.get(rootOfTerminationCriterion);
if (taskInChain == null) {
throw new CompilerException("Bug: Tail of termination criterion not found as vertex or chained task.");
}
rootOfTerminationCriterionVertex = taskInChain.getContainingVertex();
// the fake channel is statically typed to pact record. no data is sent over this
// channel anyways.
tailConfigOfTerminationCriterion = taskInChain.getTaskConfig();
} else {
tailConfigOfTerminationCriterion = new TaskConfig(rootOfTerminationCriterionVertex.getConfiguration());
}
rootOfTerminationCriterionVertex.setInvokableClass(IterationTailTask.class);
// Hack
tailConfigOfTerminationCriterion.setIsSolutionSetUpdate();
tailConfigOfTerminationCriterion.setOutputSerializer(bulkNode.getSerializerForIterationChannel());
// tell the head that it needs to wait for the solution set updates
headConfig.setWaitForSolutionSetUpdate();
}
// ------------------- register the aggregators -------------------
AggregatorRegistry aggs = bulkNode.getIterationNode().getIterationContract().getAggregators();
Collection<AggregatorWithName<?>> allAggregators = aggs.getAllRegisteredAggregators();
headConfig.addIterationAggregators(allAggregators);
syncConfig.addIterationAggregators(allAggregators);
String convAggName = aggs.getConvergenceCriterionAggregatorName();
ConvergenceCriterion<?> convCriterion = aggs.getConvergenceCriterion();
if (convCriterion != null || convAggName != null) {
if (convCriterion == null) {
throw new CompilerException("Error: Convergence criterion aggregator set, but criterion is null.");
}
if (convAggName == null) {
throw new CompilerException("Error: Aggregator convergence criterion set, but aggregator is null.");
}
syncConfig.setConvergenceCriterion(convAggName, convCriterion);
}
}
use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.
the class JobGraphGenerator method translateChannel.
private int translateChannel(Channel input, int inputIndex, JobVertex targetVertex, TaskConfig targetVertexConfig, boolean isBroadcast) throws Exception {
final PlanNode inputPlanNode = input.getSource();
final Iterator<Channel> allInChannels;
if (inputPlanNode instanceof NAryUnionPlanNode) {
allInChannels = ((NAryUnionPlanNode) inputPlanNode).getListOfInputs().iterator();
// deadlocks when closing a branching flow at runtime.
for (Channel in : inputPlanNode.getInputs()) {
if (input.getDataExchangeMode().equals(DataExchangeMode.BATCH)) {
in.setDataExchangeMode(DataExchangeMode.BATCH);
}
if (isBroadcast) {
in.setShipStrategy(ShipStrategyType.BROADCAST, in.getDataExchangeMode());
}
}
// The outgoing connection of an NAryUnion must be a forward connection.
if (input.getShipStrategy() != ShipStrategyType.FORWARD && !isBroadcast) {
throw new CompilerException("Optimized plan contains Union with non-forward outgoing ship strategy.");
}
} else if (inputPlanNode instanceof BulkPartialSolutionPlanNode) {
if (this.vertices.get(inputPlanNode) == null) {
// merged iteration head
final BulkPartialSolutionPlanNode pspn = (BulkPartialSolutionPlanNode) inputPlanNode;
final BulkIterationPlanNode iterationNode = pspn.getContainingIterationNode();
// check if the iteration's input is a union
if (iterationNode.getInput().getSource() instanceof NAryUnionPlanNode) {
allInChannels = (iterationNode.getInput().getSource()).getInputs().iterator();
} else {
allInChannels = Collections.singletonList(iterationNode.getInput()).iterator();
}
// also, set the index of the gate with the partial solution
targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
} else {
// standalone iteration head
allInChannels = Collections.singletonList(input).iterator();
}
} else if (inputPlanNode instanceof WorksetPlanNode) {
if (this.vertices.get(inputPlanNode) == null) {
// merged iteration head
final WorksetPlanNode wspn = (WorksetPlanNode) inputPlanNode;
final WorksetIterationPlanNode iterationNode = wspn.getContainingIterationNode();
// check if the iteration's input is a union
if (iterationNode.getInput2().getSource() instanceof NAryUnionPlanNode) {
allInChannels = (iterationNode.getInput2().getSource()).getInputs().iterator();
} else {
allInChannels = Collections.singletonList(iterationNode.getInput2()).iterator();
}
// also, set the index of the gate with the partial solution
targetVertexConfig.setIterationHeadPartialSolutionOrWorksetInputIndex(inputIndex);
} else {
// standalone iteration head
allInChannels = Collections.singletonList(input).iterator();
}
} else if (inputPlanNode instanceof SolutionSetPlanNode) {
// rather than a vertex connection
return 0;
} else {
allInChannels = Collections.singletonList(input).iterator();
}
// check that the type serializer is consistent
TypeSerializerFactory<?> typeSerFact = null;
// accounting for channels on the dynamic path
int numChannelsTotal = 0;
int numChannelsDynamicPath = 0;
int numDynamicSenderTasksTotal = 0;
// source
while (allInChannels.hasNext()) {
final Channel inConn = allInChannels.next();
// sanity check the common serializer
if (typeSerFact == null) {
typeSerFact = inConn.getSerializer();
} else if (!typeSerFact.equals(inConn.getSerializer())) {
throw new CompilerException("Conflicting types in union operator.");
}
final PlanNode sourceNode = inConn.getSource();
JobVertex sourceVertex = this.vertices.get(sourceNode);
TaskConfig sourceVertexConfig;
if (sourceVertex == null) {
// this predecessor is chained to another task or an iteration
final TaskInChain chainedTask;
final IterationDescriptor iteration;
if ((chainedTask = this.chainedTasks.get(sourceNode)) != null) {
// push chained task
if (chainedTask.getContainingVertex() == null) {
throw new IllegalStateException("Bug: Chained task has not been assigned its containing vertex when connecting.");
}
sourceVertex = chainedTask.getContainingVertex();
sourceVertexConfig = chainedTask.getTaskConfig();
} else if ((iteration = this.iterations.get(sourceNode)) != null) {
// predecessor is an iteration
sourceVertex = iteration.getHeadTask();
sourceVertexConfig = iteration.getHeadFinalResultConfig();
} else {
throw new CompilerException("Bug: Could not resolve source node for a channel.");
}
} else {
// predecessor is its own vertex
sourceVertexConfig = new TaskConfig(sourceVertex.getConfiguration());
}
DistributionPattern pattern = connectJobVertices(inConn, inputIndex, sourceVertex, sourceVertexConfig, targetVertex, targetVertexConfig, isBroadcast);
// accounting on channels and senders
numChannelsTotal++;
if (inConn.isOnDynamicPath()) {
numChannelsDynamicPath++;
numDynamicSenderTasksTotal += getNumberOfSendersPerReceiver(pattern, sourceVertex.getParallelism(), targetVertex.getParallelism());
}
}
// is a union between nodes on the static and nodes on the dynamic path
if (numChannelsDynamicPath > 0 && numChannelsTotal != numChannelsDynamicPath) {
throw new CompilerException("Error: It is currently not supported to union between dynamic and static path in an iteration.");
}
if (numDynamicSenderTasksTotal > 0) {
if (isBroadcast) {
targetVertexConfig.setBroadcastGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
} else {
targetVertexConfig.setGateIterativeWithNumberOfEventsUntilInterrupt(inputIndex, numDynamicSenderTasksTotal);
}
}
// the local strategy is added only once. in non-union case that is the actual edge,
// in the union case, it is the edge between union and the target node
addLocalInfoFromChannelToConfig(input, targetVertexConfig, inputIndex, isBroadcast);
return 1;
}
use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.
the class JobGraphGenerator method compileJobGraph.
public JobGraph compileJobGraph(OptimizedPlan program, JobID jobId) {
if (program == null) {
throw new NullPointerException("Program is null, did you called " + "ExecutionEnvironment.execute()");
}
if (jobId == null) {
jobId = JobID.generate();
}
this.vertices = new HashMap<PlanNode, JobVertex>();
this.chainedTasks = new HashMap<PlanNode, TaskInChain>();
this.chainedTasksInSequence = new ArrayList<TaskInChain>();
this.auxVertices = new ArrayList<JobVertex>();
this.iterations = new HashMap<IterationPlanNode, IterationDescriptor>();
this.iterationStack = new ArrayList<IterationPlanNode>();
this.sharingGroup = new SlotSharingGroup();
// this starts the traversal that generates the job graph
program.accept(this);
// sanity check that we are not somehow in an iteration at the end
if (this.currentIteration != null) {
throw new CompilerException("The graph translation ended prematurely, leaving an unclosed iteration.");
}
// finalize the iterations
for (IterationDescriptor iteration : this.iterations.values()) {
if (iteration.getIterationNode() instanceof BulkIterationPlanNode) {
finalizeBulkIteration(iteration);
} else if (iteration.getIterationNode() instanceof WorksetIterationPlanNode) {
finalizeWorksetIteration(iteration);
} else {
throw new CompilerException();
}
}
// parents' configurations
for (TaskInChain tic : this.chainedTasksInSequence) {
TaskConfig t = new TaskConfig(tic.getContainingVertex().getConfiguration());
t.addChainedTask(tic.getChainedTask(), tic.getTaskConfig(), tic.getTaskName());
}
// ----- attach the additional info to the job vertices, for display in the runtime monitor
attachOperatorNamesAndDescriptions();
for (JobVertex vertex : this.auxVertices) {
vertex.setSlotSharingGroup(sharingGroup);
}
final Map<String, DistributedCache.DistributedCacheEntry> userArtifacts = JobGraphUtils.prepareUserArtifactEntries(program.getOriginalPlan().getCachedFiles().stream().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)), jobId);
// create the job graph object
final JobGraph graph;
try {
graph = JobGraphBuilder.newBatchJobGraphBuilder().setJobId(jobId).setJobName(program.getJobName()).setExecutionConfig(program.getOriginalPlan().getExecutionConfig()).addJobVertices(vertices.values()).addJobVertices(auxVertices).addUserArtifacts(userArtifacts).build();
} catch (IOException e) {
throw new CompilerException("Could not serialize the ExecutionConfig." + "This indicates that non-serializable types (like custom serializers) were registered");
}
// release all references again
this.vertices = null;
this.chainedTasks = null;
this.chainedTasksInSequence = null;
this.auxVertices = null;
this.iterations = null;
this.iterationStack = null;
// return job graph
return graph;
}
use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.
the class JobGraphGenerator method createDualInputVertex.
private JobVertex createDualInputVertex(DualInputPlanNode node) throws CompilerException {
final String taskName = node.getNodeName();
final DriverStrategy ds = node.getDriverStrategy();
final JobVertex vertex = new JobVertex(taskName);
final TaskConfig config = new TaskConfig(vertex.getConfiguration());
vertex.setResources(node.getMinResources(), node.getPreferredResources());
vertex.setInvokableClass((this.currentIteration != null && node.isOnDynamicPath()) ? IterationIntermediateTask.class : BatchTask.class);
// set user code
config.setStubWrapper(node.getProgramOperator().getUserCodeWrapper());
config.setStubParameters(node.getProgramOperator().getParameters());
// set the driver strategy
config.setDriver(ds.getDriverClass());
config.setDriverStrategy(ds);
if (node.getComparator1() != null) {
config.setDriverComparator(node.getComparator1(), 0);
}
if (node.getComparator2() != null) {
config.setDriverComparator(node.getComparator2(), 1);
}
if (node.getPairComparator() != null) {
config.setDriverPairComparator(node.getPairComparator());
}
// assign memory, file-handles, etc.
assignDriverResources(node, config);
return vertex;
}
use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.
the class JobGraphGenerator method checkAndConfigurePersistentIntermediateResult.
private boolean checkAndConfigurePersistentIntermediateResult(PlanNode node) {
if (!(node instanceof SinkPlanNode)) {
return false;
}
final Object userCodeObject = node.getProgramOperator().getUserCodeWrapper().getUserCodeObject();
if (!(userCodeObject instanceof BlockingShuffleOutputFormat)) {
return false;
}
final Iterator<Channel> inputIterator = node.getInputs().iterator();
checkState(inputIterator.hasNext(), "SinkPlanNode must have a input.");
final PlanNode predecessorNode = inputIterator.next().getSource();
final JobVertex predecessorVertex = (vertices.containsKey(predecessorNode)) ? vertices.get(predecessorNode) : chainedTasks.get(predecessorNode).getContainingVertex();
checkState(predecessorVertex != null, "Bug: Chained task has not been assigned its containing vertex when connecting.");
predecessorVertex.createAndAddResultDataSet(// use specified intermediateDataSetID
new IntermediateDataSetID(((BlockingShuffleOutputFormat) userCodeObject).getIntermediateDataSetId()), ResultPartitionType.BLOCKING_PERSISTENT);
// remove this node so the OutputFormatVertex will not shown in the final JobGraph.
vertices.remove(node);
return true;
}
Aggregations