use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.
the class JobGraphGenerator method createBulkIterationHead.
private JobVertex createBulkIterationHead(BulkPartialSolutionPlanNode pspn) {
// get the bulk iteration that corresponds to this partial solution node
final BulkIterationPlanNode iteration = pspn.getContainingIterationNode();
// check whether we need an individual vertex for the partial solution, or whether we
// attach ourselves to the vertex of the parent node. We can combine the head with a node of
// the step function, if
// 1) There is one parent that the partial solution connects to via a forward pattern and no
// local strategy
// 2) parallelism and the number of subtasks per instance does not change
// 3) That successor is not a union
// 4) That successor is not itself the last node of the step function
// 5) There is no local strategy on the edge for the initial partial solution, as
// this translates to a local strategy that would only be executed in the first iteration
final boolean merge;
if (mergeIterationAuxTasks && pspn.getOutgoingChannels().size() == 1) {
final Channel c = pspn.getOutgoingChannels().get(0);
final PlanNode successor = c.getTarget();
merge = c.getShipStrategy() == ShipStrategyType.FORWARD && c.getLocalStrategy() == LocalStrategy.NONE && c.getTempMode() == TempMode.NONE && successor.getParallelism() == pspn.getParallelism() && !(successor instanceof NAryUnionPlanNode) && successor != iteration.getRootOfStepFunction() && iteration.getInput().getLocalStrategy() == LocalStrategy.NONE;
} else {
merge = false;
}
// create or adopt the head vertex
final JobVertex toReturn;
final JobVertex headVertex;
final TaskConfig headConfig;
if (merge) {
final PlanNode successor = pspn.getOutgoingChannels().get(0).getTarget();
headVertex = this.vertices.get(successor);
if (headVertex == null) {
throw new CompilerException("Bug: Trying to merge solution set with its successor, but successor has not been created.");
}
// reset the vertex type to iteration head
headVertex.setInvokableClass(IterationHeadTask.class);
headConfig = new TaskConfig(headVertex.getConfiguration());
toReturn = null;
} else {
// instantiate the head vertex and give it a no-op driver as the driver strategy.
// everything else happens in the post visit, after the input (the initial partial
// solution)
// is connected.
headVertex = new JobVertex("PartialSolution (" + iteration.getNodeName() + ")");
headVertex.setResources(iteration.getMinResources(), iteration.getPreferredResources());
headVertex.setInvokableClass(IterationHeadTask.class);
headConfig = new TaskConfig(headVertex.getConfiguration());
headConfig.setDriver(NoOpDriver.class);
toReturn = headVertex;
}
// create the iteration descriptor and the iteration to it
IterationDescriptor descr = this.iterations.get(iteration);
if (descr == null) {
throw new CompilerException("Bug: Iteration descriptor was not created at when translating the iteration node.");
}
descr.setHeadTask(headVertex, headConfig);
return toReturn;
}
use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.
the class JobGraphGenerator method preVisit.
/**
* This methods implements the pre-visiting during a depth-first traversal. It create the job
* vertex and sets local strategy.
*
* @param node The node that is currently processed.
* @return True, if the visitor should descend to the node's children, false if not.
* @see org.apache.flink.util.Visitor#preVisit(org.apache.flink.util.Visitable)
*/
@Override
public boolean preVisit(PlanNode node) {
// check if we have visited this node before. in non-tree graphs, this happens
if (this.vertices.containsKey(node) || this.chainedTasks.containsKey(node) || this.iterations.containsKey(node)) {
// return false to prevent further descend
return false;
}
// the vertex to be created for the current node
final JobVertex vertex;
try {
if (node instanceof SinkPlanNode) {
vertex = createDataSinkVertex((SinkPlanNode) node);
} else if (node instanceof SourcePlanNode) {
vertex = createDataSourceVertex((SourcePlanNode) node);
} else if (node instanceof BulkIterationPlanNode) {
BulkIterationPlanNode iterationNode = (BulkIterationPlanNode) node;
// for the bulk iteration, we skip creating anything for now. we create the graph
// for the step function in the post visit.
// check that the root of the step function has the same parallelism as the
// iteration.
// because the tail must have the same parallelism as the head, we can only merge
// the last
// operator with the tail, if they have the same parallelism. not merging is
// currently not
// implemented
PlanNode root = iterationNode.getRootOfStepFunction();
if (root.getParallelism() != node.getParallelism()) {
throw new CompilerException("Error: The final operator of the step " + "function has a different parallelism than the iteration operator itself.");
}
IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
this.iterations.put(iterationNode, descr);
vertex = null;
} else if (node instanceof WorksetIterationPlanNode) {
WorksetIterationPlanNode iterationNode = (WorksetIterationPlanNode) node;
// we have the same constraints as for the bulk iteration
PlanNode nextWorkSet = iterationNode.getNextWorkSetPlanNode();
PlanNode solutionSetDelta = iterationNode.getSolutionSetDeltaPlanNode();
if (nextWorkSet.getParallelism() != node.getParallelism()) {
throw new CompilerException("It is currently not supported that the final operator of the step " + "function has a different parallelism than the iteration operator itself.");
}
if (solutionSetDelta.getParallelism() != node.getParallelism()) {
throw new CompilerException("It is currently not supported that the final operator of the step " + "function has a different parallelism than the iteration operator itself.");
}
IterationDescriptor descr = new IterationDescriptor(iterationNode, this.iterationIdEnumerator++);
this.iterations.put(iterationNode, descr);
vertex = null;
} else if (node instanceof SingleInputPlanNode) {
vertex = createSingleInputVertex((SingleInputPlanNode) node);
} else if (node instanceof DualInputPlanNode) {
vertex = createDualInputVertex((DualInputPlanNode) node);
} else if (node instanceof NAryUnionPlanNode) {
// skip the union for now
vertex = null;
} else if (node instanceof BulkPartialSolutionPlanNode) {
// create a head node (or not, if it is merged into its successor)
vertex = createBulkIterationHead((BulkPartialSolutionPlanNode) node);
} else if (node instanceof SolutionSetPlanNode) {
// we adjust the joins / cogroups that go into the solution set here
for (Channel c : node.getOutgoingChannels()) {
DualInputPlanNode target = (DualInputPlanNode) c.getTarget();
JobVertex accessingVertex = this.vertices.get(target);
TaskConfig conf = new TaskConfig(accessingVertex.getConfiguration());
int inputNum = c == target.getInput1() ? 0 : c == target.getInput2() ? 1 : -1;
// sanity checks
if (inputNum == -1) {
throw new CompilerException();
}
// adjust the driver
if (conf.getDriver().equals(JoinDriver.class)) {
conf.setDriver(inputNum == 0 ? JoinWithSolutionSetFirstDriver.class : JoinWithSolutionSetSecondDriver.class);
} else if (conf.getDriver().equals(CoGroupDriver.class)) {
conf.setDriver(inputNum == 0 ? CoGroupWithSolutionSetFirstDriver.class : CoGroupWithSolutionSetSecondDriver.class);
} else {
throw new CompilerException("Found join with solution set using incompatible operator (only Join/CoGroup are valid).");
}
}
// make sure we do not visit this node again. for that, we add a 'already seen'
// entry into one of the sets
this.chainedTasks.put(node, ALREADY_VISITED_PLACEHOLDER);
vertex = null;
} else if (node instanceof WorksetPlanNode) {
// create the iteration head here
vertex = createWorksetIterationHead((WorksetPlanNode) node);
} else {
throw new CompilerException("Unrecognized node type: " + node.getClass().getName());
}
} catch (Exception e) {
throw new CompilerException("Error translating node '" + node + "': " + e.getMessage(), e);
}
// check if a vertex was created, or if it was chained or skipped
if (vertex != null) {
// set parallelism
int pd = node.getParallelism();
vertex.setParallelism(pd);
vertex.setMaxParallelism(pd);
vertex.setSlotSharingGroup(sharingGroup);
// check whether this vertex is part of an iteration step function
if (this.currentIteration != null) {
// check that the task has the same parallelism as the iteration as such
PlanNode iterationNode = (PlanNode) this.currentIteration;
if (iterationNode.getParallelism() < pd) {
throw new CompilerException("Error: All functions that are part of an iteration must have the same, or a lower, parallelism than the iteration operator.");
}
// store the id of the iterations the step functions participate in
IterationDescriptor descr = this.iterations.get(this.currentIteration);
new TaskConfig(vertex.getConfiguration()).setIterationId(descr.getId());
}
// store in the map
this.vertices.put(node, vertex);
}
// returning true causes deeper descend
return true;
}
use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.
the class Dispatcher method isPartialResourceConfigured.
private boolean isPartialResourceConfigured(JobGraph jobGraph) {
boolean hasVerticesWithUnknownResource = false;
boolean hasVerticesWithConfiguredResource = false;
for (JobVertex jobVertex : jobGraph.getVertices()) {
if (jobVertex.getMinResources() == ResourceSpec.UNKNOWN) {
hasVerticesWithUnknownResource = true;
} else {
hasVerticesWithConfiguredResource = true;
}
if (hasVerticesWithUnknownResource && hasVerticesWithConfiguredResource) {
return true;
}
}
return false;
}
use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.
the class DefaultExecutionGraphDeploymentTest method testNoResourceAvailableFailure.
/**
* Tests that a blocking batch job fails if there are not enough resources left to schedule the
* succeeding tasks. This test case is related to [FLINK-4296] where finished producing tasks
* swallow the fail exception when scheduling a consumer task.
*/
@Test
public void testNoResourceAvailableFailure() throws Exception {
JobVertex v1 = new JobVertex("source");
JobVertex v2 = new JobVertex("sink");
int dop1 = 2;
int dop2 = 2;
v1.setParallelism(dop1);
v2.setParallelism(dop2);
v1.setInvokableClass(BatchTask.class);
v2.setInvokableClass(BatchTask.class);
v2.connectNewDataSetAsInput(v1, DistributionPattern.POINTWISE, ResultPartitionType.BLOCKING);
final JobGraph graph = JobGraphTestUtils.batchJobGraph(v1, v2);
DirectScheduledExecutorService directExecutor = new DirectScheduledExecutorService();
// execution graph that executes actions synchronously
final SchedulerBase scheduler = SchedulerTestingUtils.newSchedulerBuilder(graph, ComponentMainThreadExecutorServiceAdapter.forMainThread()).setExecutionSlotAllocatorFactory(SchedulerTestingUtils.newSlotSharingExecutionSlotAllocatorFactory(TestingPhysicalSlotProvider.createWithLimitedAmountOfPhysicalSlots(1))).setFutureExecutor(directExecutor).setBlobWriter(blobWriter).build();
final ExecutionGraph eg = scheduler.getExecutionGraph();
checkJobOffloaded((DefaultExecutionGraph) eg);
// schedule, this triggers mock deployment
scheduler.startScheduling();
ExecutionAttemptID attemptID = eg.getJobVertex(v1.getID()).getTaskVertices()[0].getCurrentExecutionAttempt().getAttemptId();
scheduler.updateTaskExecutionState(new TaskExecutionState(attemptID, ExecutionState.RUNNING));
scheduler.updateTaskExecutionState(new TaskExecutionState(attemptID, ExecutionState.FINISHED, null));
assertEquals(JobStatus.FAILED, eg.getState());
}
use of org.apache.flink.runtime.jobgraph.JobVertex in project flink by apache.
the class DefaultExecutionGraphDeploymentTest method testBuildDeploymentDescriptor.
@Test
public void testBuildDeploymentDescriptor() throws Exception {
final JobVertexID jid1 = new JobVertexID();
final JobVertexID jid2 = new JobVertexID();
final JobVertexID jid3 = new JobVertexID();
final JobVertexID jid4 = new JobVertexID();
JobVertex v1 = new JobVertex("v1", jid1);
JobVertex v2 = new JobVertex("v2", jid2);
JobVertex v3 = new JobVertex("v3", jid3);
JobVertex v4 = new JobVertex("v4", jid4);
v1.setParallelism(10);
v2.setParallelism(10);
v3.setParallelism(10);
v4.setParallelism(10);
v1.setInvokableClass(BatchTask.class);
v2.setInvokableClass(BatchTask.class);
v3.setInvokableClass(BatchTask.class);
v4.setInvokableClass(BatchTask.class);
v2.connectNewDataSetAsInput(v1, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
v3.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
v4.connectNewDataSetAsInput(v2, DistributionPattern.ALL_TO_ALL, ResultPartitionType.PIPELINED);
final JobGraph jobGraph = JobGraphTestUtils.batchJobGraph(v1, v2, v3, v4);
final JobID jobId = jobGraph.getJobID();
DirectScheduledExecutorService executor = new DirectScheduledExecutorService();
DefaultExecutionGraph eg = TestingDefaultExecutionGraphBuilder.newBuilder().setJobGraph(jobGraph).setFutureExecutor(executor).setIoExecutor(executor).setBlobWriter(blobWriter).build();
eg.start(ComponentMainThreadExecutorServiceAdapter.forMainThread());
checkJobOffloaded(eg);
ExecutionJobVertex ejv = eg.getAllVertices().get(jid2);
ExecutionVertex vertex = ejv.getTaskVertices()[3];
final SimpleAckingTaskManagerGateway taskManagerGateway = new SimpleAckingTaskManagerGateway();
final CompletableFuture<TaskDeploymentDescriptor> tdd = new CompletableFuture<>();
taskManagerGateway.setSubmitConsumer(FunctionUtils.uncheckedConsumer(taskDeploymentDescriptor -> {
taskDeploymentDescriptor.loadBigData(blobCache);
tdd.complete(taskDeploymentDescriptor);
}));
final LogicalSlot slot = new TestingLogicalSlotBuilder().setTaskManagerGateway(taskManagerGateway).createTestingLogicalSlot();
assertEquals(ExecutionState.CREATED, vertex.getExecutionState());
vertex.getCurrentExecutionAttempt().transitionState(ExecutionState.SCHEDULED);
vertex.getCurrentExecutionAttempt().registerProducedPartitions(slot.getTaskManagerLocation(), true).get();
vertex.deployToSlot(slot);
assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState());
checkTaskOffloaded(eg, vertex.getJobvertexId());
TaskDeploymentDescriptor descr = tdd.get();
assertNotNull(descr);
JobInformation jobInformation = descr.getSerializedJobInformation().deserializeValue(getClass().getClassLoader());
TaskInformation taskInformation = descr.getSerializedTaskInformation().deserializeValue(getClass().getClassLoader());
assertEquals(jobId, descr.getJobId());
assertEquals(jobId, jobInformation.getJobId());
assertEquals(jid2, taskInformation.getJobVertexId());
assertEquals(3, descr.getSubtaskIndex());
assertEquals(10, taskInformation.getNumberOfSubtasks());
assertEquals(BatchTask.class.getName(), taskInformation.getInvokableClassName());
assertEquals("v2", taskInformation.getTaskName());
Collection<ResultPartitionDeploymentDescriptor> producedPartitions = descr.getProducedPartitions();
Collection<InputGateDeploymentDescriptor> consumedPartitions = descr.getInputGates();
assertEquals(2, producedPartitions.size());
assertEquals(1, consumedPartitions.size());
Iterator<ResultPartitionDeploymentDescriptor> iteratorProducedPartitions = producedPartitions.iterator();
Iterator<InputGateDeploymentDescriptor> iteratorConsumedPartitions = consumedPartitions.iterator();
assertEquals(10, iteratorProducedPartitions.next().getNumberOfSubpartitions());
assertEquals(10, iteratorProducedPartitions.next().getNumberOfSubpartitions());
ShuffleDescriptor[] shuffleDescriptors = iteratorConsumedPartitions.next().getShuffleDescriptors();
assertEquals(10, shuffleDescriptors.length);
Iterator<ConsumedPartitionGroup> iteratorConsumedPartitionGroup = vertex.getAllConsumedPartitionGroups().iterator();
int idx = 0;
for (IntermediateResultPartitionID partitionId : iteratorConsumedPartitionGroup.next()) {
assertEquals(partitionId, shuffleDescriptors[idx++].getResultPartitionID().getPartitionId());
}
}
Aggregations