use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.
the class RestartPipelinedRegionFailoverStrategy method getTasksNeedingRestart.
// ------------------------------------------------------------------------
// task failure handling
// ------------------------------------------------------------------------
/**
* Returns a set of IDs corresponding to the set of vertices that should be restarted. In this
* strategy, all task vertices in 'involved' regions are proposed to be restarted. The
* 'involved' regions are calculated with rules below: 1. The region containing the failed task
* is always involved 2. If an input result partition of an involved region is not available,
* i.e. Missing or Corrupted, the region containing the partition producer task is involved 3.
* If a region is involved, all of its consumer regions are involved
*
* @param executionVertexId ID of the failed task
* @param cause cause of the failure
* @return set of IDs of vertices to restart
*/
@Override
public Set<ExecutionVertexID> getTasksNeedingRestart(ExecutionVertexID executionVertexId, Throwable cause) {
LOG.info("Calculating tasks to restart to recover the failed task {}.", executionVertexId);
final SchedulingPipelinedRegion failedRegion = topology.getPipelinedRegionOfVertex(executionVertexId);
if (failedRegion == null) {
// TODO: show the task name in the log
throw new IllegalStateException("Can not find the failover region for task " + executionVertexId, cause);
}
// if the failure cause is data consumption error, mark the corresponding data partition to
// be failed,
// so that the failover process will try to recover it
Optional<PartitionException> dataConsumptionException = ExceptionUtils.findThrowable(cause, PartitionException.class);
if (dataConsumptionException.isPresent()) {
resultPartitionAvailabilityChecker.markResultPartitionFailed(dataConsumptionException.get().getPartitionId().getPartitionId());
}
// calculate the tasks to restart based on the result of regions to restart
Set<ExecutionVertexID> tasksToRestart = new HashSet<>();
for (SchedulingPipelinedRegion region : getRegionsToRestart(failedRegion)) {
for (SchedulingExecutionVertex vertex : region.getVertices()) {
// we do not need to restart tasks which are already in the initial state
if (vertex.getState() != ExecutionState.CREATED) {
tasksToRestart.add(vertex.getId());
}
}
}
// the previous failed partition will be recovered. remove its failed state from the checker
if (dataConsumptionException.isPresent()) {
resultPartitionAvailabilityChecker.removeResultPartitionFromFailedState(dataConsumptionException.get().getPartitionId().getPartitionId());
}
LOG.info("{} tasks should be restarted to recover the failed task {}. ", tasksToRestart.size(), executionVertexId);
return tasksToRestart;
}
use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.
the class RestartPipelinedRegionFailoverStrategy method getConsumerVerticesToVisit.
private Iterable<ExecutionVertexID> getConsumerVerticesToVisit(SchedulingPipelinedRegion regionToRestart, Set<ConsumerVertexGroup> visitedConsumerVertexGroups) {
final List<ConsumerVertexGroup> consumerVertexGroupsToVisit = new ArrayList<>();
for (SchedulingExecutionVertex vertex : regionToRestart.getVertices()) {
for (SchedulingResultPartition producedPartition : vertex.getProducedResults()) {
final Optional<ConsumerVertexGroup> consumerVertexGroup = producedPartition.getConsumerVertexGroup();
if (consumerVertexGroup.isPresent() && !visitedConsumerVertexGroups.contains(consumerVertexGroup.get())) {
visitedConsumerVertexGroups.add(consumerVertexGroup.get());
consumerVertexGroupsToVisit.add(consumerVertexGroup.get());
}
}
}
return IterableUtils.flatMap(consumerVertexGroupsToVisit, Function.identity());
}
use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.
the class RestartPipelinedRegionFailoverStrategy method getConsumedPartitionsToVisit.
private Iterable<IntermediateResultPartitionID> getConsumedPartitionsToVisit(SchedulingPipelinedRegion regionToRestart, Set<ConsumedPartitionGroup> visitedConsumedResultGroups) {
final List<ConsumedPartitionGroup> consumedPartitionGroupsToVisit = new ArrayList<>();
for (SchedulingExecutionVertex vertex : regionToRestart.getVertices()) {
for (ConsumedPartitionGroup consumedPartitionGroup : vertex.getConsumedPartitionGroups()) {
if (!visitedConsumedResultGroups.contains(consumedPartitionGroup)) {
visitedConsumedResultGroups.add(consumedPartitionGroup);
consumedPartitionGroupsToVisit.add(consumedPartitionGroup);
}
}
}
return IterableUtils.flatMap(consumedPartitionGroupsToVisit, Function.identity());
}
use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.
the class SchedulingPipelinedRegionComputeUtil method buildOutEdgesDesc.
private static List<List<Integer>> buildOutEdgesDesc(final Map<SchedulingExecutionVertex, Set<SchedulingExecutionVertex>> vertexToRegion, final List<Set<SchedulingExecutionVertex>> regionList, final Function<ExecutionVertexID, ? extends SchedulingExecutionVertex> executionVertexRetriever) {
final Map<Set<SchedulingExecutionVertex>, Integer> regionIndices = new IdentityHashMap<>();
for (int i = 0; i < regionList.size(); i++) {
regionIndices.put(regionList.get(i), i);
}
final List<List<Integer>> outEdges = new ArrayList<>(regionList.size());
for (Set<SchedulingExecutionVertex> currentRegion : regionList) {
final List<Integer> currentRegionOutEdges = new ArrayList<>();
for (SchedulingExecutionVertex vertex : currentRegion) {
for (SchedulingResultPartition producedResult : vertex.getProducedResults()) {
if (!producedResult.getResultType().isReconnectable()) {
continue;
}
final Optional<ConsumerVertexGroup> consumerVertexGroup = producedResult.getConsumerVertexGroup();
if (!consumerVertexGroup.isPresent()) {
continue;
}
for (ExecutionVertexID consumerVertexId : consumerVertexGroup.get()) {
SchedulingExecutionVertex consumerVertex = executionVertexRetriever.apply(consumerVertexId);
// regions and cannot be merged
if (!vertexToRegion.containsKey(consumerVertex)) {
break;
}
if (!currentRegion.contains(consumerVertex)) {
currentRegionOutEdges.add(regionIndices.get(vertexToRegion.get(consumerVertex)));
}
}
}
}
outEdges.add(currentRegionOutEdges);
}
return outEdges;
}
use of org.apache.flink.runtime.scheduler.strategy.SchedulingExecutionVertex in project flink by apache.
the class SchedulingPipelinedRegionComputeUtilTest method testPipelinedApproximateDifferentRegions.
@Test
public void testPipelinedApproximateDifferentRegions() {
TestingSchedulingTopology topology = new TestingSchedulingTopology();
TestingSchedulingExecutionVertex v1 = topology.newExecutionVertex();
TestingSchedulingExecutionVertex v2 = topology.newExecutionVertex();
TestingSchedulingExecutionVertex v3 = topology.newExecutionVertex();
TestingSchedulingExecutionVertex v4 = topology.newExecutionVertex();
topology.connect(v1, v2, ResultPartitionType.PIPELINED_APPROXIMATE).connect(v1, v3, ResultPartitionType.PIPELINED_APPROXIMATE).connect(v2, v4, ResultPartitionType.PIPELINED_APPROXIMATE).connect(v3, v4, ResultPartitionType.PIPELINED_APPROXIMATE);
Map<ExecutionVertexID, Set<SchedulingExecutionVertex>> pipelinedRegionByVertex = computePipelinedRegionByVertex(topology);
Set<SchedulingExecutionVertex> r1 = pipelinedRegionByVertex.get(v1.getId());
Set<SchedulingExecutionVertex> r2 = pipelinedRegionByVertex.get(v2.getId());
Set<SchedulingExecutionVertex> r3 = pipelinedRegionByVertex.get(v3.getId());
Set<SchedulingExecutionVertex> r4 = pipelinedRegionByVertex.get(v4.getId());
assertDistinctRegions(r1, r2, r3, r4);
}
Aggregations