use of org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID in project flink by apache.
the class EdgeManagerBuildUtil method connectAllToAll.
private static void connectAllToAll(ExecutionVertex[] taskVertices, IntermediateResult intermediateResult) {
List<IntermediateResultPartitionID> consumedPartitions = Arrays.stream(intermediateResult.getPartitions()).map(IntermediateResultPartition::getPartitionId).collect(Collectors.toList());
ConsumedPartitionGroup consumedPartitionGroup = createAndRegisterConsumedPartitionGroupToEdgeManager(consumedPartitions, intermediateResult);
for (ExecutionVertex ev : taskVertices) {
ev.addConsumedPartitionGroup(consumedPartitionGroup);
}
List<ExecutionVertexID> consumerVertices = Arrays.stream(taskVertices).map(ExecutionVertex::getID).collect(Collectors.toList());
ConsumerVertexGroup consumerVertexGroup = ConsumerVertexGroup.fromMultipleVertices(consumerVertices);
for (IntermediateResultPartition partition : intermediateResult.getPartitions()) {
partition.addConsumers(consumerVertexGroup);
}
}
use of org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID in project flink by apache.
the class Execution method updatePartitionConsumers.
private void updatePartitionConsumers(final IntermediateResultPartition partition) {
final Optional<ConsumerVertexGroup> consumerVertexGroup = partition.getConsumerVertexGroupOptional();
if (!consumerVertexGroup.isPresent()) {
return;
}
for (ExecutionVertexID consumerVertexId : consumerVertexGroup.get()) {
final ExecutionVertex consumerVertex = vertex.getExecutionGraphAccessor().getExecutionVertexOrThrow(consumerVertexId);
final Execution consumer = consumerVertex.getCurrentExecutionAttempt();
final ExecutionState consumerState = consumer.getState();
// ----------------------------------------------------------------
if (consumerState == DEPLOYING || consumerState == RUNNING || consumerState == INITIALIZING) {
final PartitionInfo partitionInfo = createPartitionInfo(partition);
if (consumerState == DEPLOYING) {
consumerVertex.cachePartitionInfo(partitionInfo);
} else {
consumer.sendUpdatePartitionInfoRpcCall(Collections.singleton(partitionInfo));
}
}
}
}
use of org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID in project flink by apache.
the class DefaultExecutionGraph method maybeReleasePartitionGroupsFor.
private void maybeReleasePartitionGroupsFor(final Execution attempt) {
final ExecutionVertexID finishedExecutionVertex = attempt.getVertex().getID();
if (attempt.getState() == ExecutionState.FINISHED) {
final List<ConsumedPartitionGroup> releasablePartitionGroups = partitionGroupReleaseStrategy.vertexFinished(finishedExecutionVertex);
releasePartitionGroups(releasablePartitionGroups);
} else {
partitionGroupReleaseStrategy.vertexUnfinished(finishedExecutionVertex);
}
}
use of org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID in project flink by apache.
the class RestartPipelinedRegionFailoverStrategy method getTasksNeedingRestart.
// ------------------------------------------------------------------------
// task failure handling
// ------------------------------------------------------------------------
/**
* Returns a set of IDs corresponding to the set of vertices that should be restarted. In this
* strategy, all task vertices in 'involved' regions are proposed to be restarted. The
* 'involved' regions are calculated with rules below: 1. The region containing the failed task
* is always involved 2. If an input result partition of an involved region is not available,
* i.e. Missing or Corrupted, the region containing the partition producer task is involved 3.
* If a region is involved, all of its consumer regions are involved
*
* @param executionVertexId ID of the failed task
* @param cause cause of the failure
* @return set of IDs of vertices to restart
*/
@Override
public Set<ExecutionVertexID> getTasksNeedingRestart(ExecutionVertexID executionVertexId, Throwable cause) {
LOG.info("Calculating tasks to restart to recover the failed task {}.", executionVertexId);
final SchedulingPipelinedRegion failedRegion = topology.getPipelinedRegionOfVertex(executionVertexId);
if (failedRegion == null) {
// TODO: show the task name in the log
throw new IllegalStateException("Can not find the failover region for task " + executionVertexId, cause);
}
// if the failure cause is data consumption error, mark the corresponding data partition to
// be failed,
// so that the failover process will try to recover it
Optional<PartitionException> dataConsumptionException = ExceptionUtils.findThrowable(cause, PartitionException.class);
if (dataConsumptionException.isPresent()) {
resultPartitionAvailabilityChecker.markResultPartitionFailed(dataConsumptionException.get().getPartitionId().getPartitionId());
}
// calculate the tasks to restart based on the result of regions to restart
Set<ExecutionVertexID> tasksToRestart = new HashSet<>();
for (SchedulingPipelinedRegion region : getRegionsToRestart(failedRegion)) {
for (SchedulingExecutionVertex vertex : region.getVertices()) {
// we do not need to restart tasks which are already in the initial state
if (vertex.getState() != ExecutionState.CREATED) {
tasksToRestart.add(vertex.getId());
}
}
}
// the previous failed partition will be recovered. remove its failed state from the checker
if (dataConsumptionException.isPresent()) {
resultPartitionAvailabilityChecker.removeResultPartitionFromFailedState(dataConsumptionException.get().getPartitionId().getPartitionId());
}
LOG.info("{} tasks should be restarted to recover the failed task {}. ", tasksToRestart.size(), executionVertexId);
return tasksToRestart;
}
use of org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID in project flink by apache.
the class RestartPipelinedRegionFailoverStrategy method getRegionsToRestart.
/**
* All 'involved' regions are proposed to be restarted. The 'involved' regions are calculated
* with rules below: 1. The region containing the failed task is always involved 2. If an input
* result partition of an involved region is not available, i.e. Missing or Corrupted, the
* region containing the partition producer task is involved 3. If a region is involved, all of
* its consumer regions are involved
*/
private Set<SchedulingPipelinedRegion> getRegionsToRestart(SchedulingPipelinedRegion failedRegion) {
Set<SchedulingPipelinedRegion> regionsToRestart = Collections.newSetFromMap(new IdentityHashMap<>());
Set<SchedulingPipelinedRegion> visitedRegions = Collections.newSetFromMap(new IdentityHashMap<>());
Set<ConsumedPartitionGroup> visitedConsumedResultGroups = Collections.newSetFromMap(new IdentityHashMap<>());
Set<ConsumerVertexGroup> visitedConsumerVertexGroups = Collections.newSetFromMap(new IdentityHashMap<>());
// start from the failed region to visit all involved regions
Queue<SchedulingPipelinedRegion> regionsToVisit = new ArrayDeque<>();
visitedRegions.add(failedRegion);
regionsToVisit.add(failedRegion);
while (!regionsToVisit.isEmpty()) {
SchedulingPipelinedRegion regionToRestart = regionsToVisit.poll();
// an involved region should be restarted
regionsToRestart.add(regionToRestart);
// if a needed input result partition is not available, its producer region is involved
for (IntermediateResultPartitionID consumedPartitionId : getConsumedPartitionsToVisit(regionToRestart, visitedConsumedResultGroups)) {
if (!resultPartitionAvailabilityChecker.isAvailable(consumedPartitionId)) {
SchedulingResultPartition consumedPartition = topology.getResultPartition(consumedPartitionId);
SchedulingPipelinedRegion producerRegion = topology.getPipelinedRegionOfVertex(consumedPartition.getProducer().getId());
if (!visitedRegions.contains(producerRegion)) {
visitedRegions.add(producerRegion);
regionsToVisit.add(producerRegion);
}
}
}
// all consumer regions of an involved region should be involved
for (ExecutionVertexID consumerVertexId : getConsumerVerticesToVisit(regionToRestart, visitedConsumerVertexGroups)) {
SchedulingPipelinedRegion consumerRegion = topology.getPipelinedRegionOfVertex(consumerVertexId);
if (!visitedRegions.contains(consumerRegion)) {
visitedRegions.add(consumerRegion);
regionsToVisit.add(consumerRegion);
}
}
}
return regionsToRestart;
}
Aggregations