use of org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID in project flink-mirror by flink-ci.
the class SchedulingPipelinedRegionComputeUtil method buildOutEdgesDesc.
private static List<List<Integer>> buildOutEdgesDesc(final Map<SchedulingExecutionVertex, Set<SchedulingExecutionVertex>> vertexToRegion, final List<Set<SchedulingExecutionVertex>> regionList, final Function<ExecutionVertexID, ? extends SchedulingExecutionVertex> executionVertexRetriever) {
final Map<Set<SchedulingExecutionVertex>, Integer> regionIndices = new IdentityHashMap<>();
for (int i = 0; i < regionList.size(); i++) {
regionIndices.put(regionList.get(i), i);
}
final List<List<Integer>> outEdges = new ArrayList<>(regionList.size());
for (Set<SchedulingExecutionVertex> currentRegion : regionList) {
final List<Integer> currentRegionOutEdges = new ArrayList<>();
for (SchedulingExecutionVertex vertex : currentRegion) {
for (SchedulingResultPartition producedResult : vertex.getProducedResults()) {
if (!producedResult.getResultType().isReconnectable()) {
continue;
}
final Optional<ConsumerVertexGroup> consumerVertexGroup = producedResult.getConsumerVertexGroup();
if (!consumerVertexGroup.isPresent()) {
continue;
}
for (ExecutionVertexID consumerVertexId : consumerVertexGroup.get()) {
SchedulingExecutionVertex consumerVertex = executionVertexRetriever.apply(consumerVertexId);
// regions and cannot be merged
if (!vertexToRegion.containsKey(consumerVertex)) {
break;
}
if (!currentRegion.contains(consumerVertex)) {
currentRegionOutEdges.add(regionIndices.get(vertexToRegion.get(consumerVertex)));
}
}
}
}
outEdges.add(currentRegionOutEdges);
}
return outEdges;
}
use of org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID in project flink-mirror by flink-ci.
the class Execution method updatePartitionConsumers.
private void updatePartitionConsumers(final IntermediateResultPartition partition) {
final Optional<ConsumerVertexGroup> consumerVertexGroup = partition.getConsumerVertexGroupOptional();
if (!consumerVertexGroup.isPresent()) {
return;
}
for (ExecutionVertexID consumerVertexId : consumerVertexGroup.get()) {
final ExecutionVertex consumerVertex = vertex.getExecutionGraphAccessor().getExecutionVertexOrThrow(consumerVertexId);
final Execution consumer = consumerVertex.getCurrentExecutionAttempt();
final ExecutionState consumerState = consumer.getState();
// ----------------------------------------------------------------
if (consumerState == DEPLOYING || consumerState == RUNNING || consumerState == INITIALIZING) {
final PartitionInfo partitionInfo = createPartitionInfo(partition);
if (consumerState == DEPLOYING) {
consumerVertex.cachePartitionInfo(partitionInfo);
} else {
consumer.sendUpdatePartitionInfoRpcCall(Collections.singleton(partitionInfo));
}
}
}
}
use of org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID in project flink-mirror by flink-ci.
the class RestartPipelinedRegionFailoverStrategy method getTasksNeedingRestart.
// ------------------------------------------------------------------------
// task failure handling
// ------------------------------------------------------------------------
/**
* Returns a set of IDs corresponding to the set of vertices that should be restarted. In this
* strategy, all task vertices in 'involved' regions are proposed to be restarted. The
* 'involved' regions are calculated with rules below: 1. The region containing the failed task
* is always involved 2. If an input result partition of an involved region is not available,
* i.e. Missing or Corrupted, the region containing the partition producer task is involved 3.
* If a region is involved, all of its consumer regions are involved
*
* @param executionVertexId ID of the failed task
* @param cause cause of the failure
* @return set of IDs of vertices to restart
*/
@Override
public Set<ExecutionVertexID> getTasksNeedingRestart(ExecutionVertexID executionVertexId, Throwable cause) {
LOG.info("Calculating tasks to restart to recover the failed task {}.", executionVertexId);
final SchedulingPipelinedRegion failedRegion = topology.getPipelinedRegionOfVertex(executionVertexId);
if (failedRegion == null) {
// TODO: show the task name in the log
throw new IllegalStateException("Can not find the failover region for task " + executionVertexId, cause);
}
// if the failure cause is data consumption error, mark the corresponding data partition to
// be failed,
// so that the failover process will try to recover it
Optional<PartitionException> dataConsumptionException = ExceptionUtils.findThrowable(cause, PartitionException.class);
if (dataConsumptionException.isPresent()) {
resultPartitionAvailabilityChecker.markResultPartitionFailed(dataConsumptionException.get().getPartitionId().getPartitionId());
}
// calculate the tasks to restart based on the result of regions to restart
Set<ExecutionVertexID> tasksToRestart = new HashSet<>();
for (SchedulingPipelinedRegion region : getRegionsToRestart(failedRegion)) {
for (SchedulingExecutionVertex vertex : region.getVertices()) {
// we do not need to restart tasks which are already in the initial state
if (vertex.getState() != ExecutionState.CREATED) {
tasksToRestart.add(vertex.getId());
}
}
}
// the previous failed partition will be recovered. remove its failed state from the checker
if (dataConsumptionException.isPresent()) {
resultPartitionAvailabilityChecker.removeResultPartitionFromFailedState(dataConsumptionException.get().getPartitionId().getPartitionId());
}
LOG.info("{} tasks should be restarted to recover the failed task {}. ", tasksToRestart.size(), executionVertexId);
return tasksToRestart;
}
use of org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID in project flink-mirror by flink-ci.
the class DefaultPreferredLocationsRetriever method getInputLocationFutures.
private Collection<CompletableFuture<TaskManagerLocation>> getInputLocationFutures(final Set<ExecutionVertexID> producersToIgnore, final Collection<ExecutionVertexID> producers) {
final Collection<CompletableFuture<TaskManagerLocation>> locationsFutures = new ArrayList<>();
for (ExecutionVertexID producer : producers) {
final Optional<CompletableFuture<TaskManagerLocation>> optionalLocationFuture;
if (!producersToIgnore.contains(producer)) {
optionalLocationFuture = inputsLocationsRetriever.getTaskManagerLocation(producer);
} else {
optionalLocationFuture = Optional.empty();
}
optionalLocationFuture.ifPresent(locationsFutures::add);
// be a long time to wait for all the location futures to complete
if (locationsFutures.size() > MAX_DISTINCT_LOCATIONS_TO_CONSIDER) {
return Collections.emptyList();
}
}
return locationsFutures;
}
use of org.apache.flink.runtime.scheduler.strategy.ExecutionVertexID in project flink-mirror by flink-ci.
the class DefaultScheduler method assignResource.
private BiFunction<LogicalSlot, Throwable, LogicalSlot> assignResource(final DeploymentHandle deploymentHandle) {
final ExecutionVertexVersion requiredVertexVersion = deploymentHandle.getRequiredVertexVersion();
final ExecutionVertexID executionVertexId = deploymentHandle.getExecutionVertexId();
return (logicalSlot, throwable) -> {
if (executionVertexVersioner.isModified(requiredVertexVersion)) {
if (throwable == null) {
log.debug("Refusing to assign slot to execution vertex {} because this deployment was " + "superseded by another deployment", executionVertexId);
releaseSlotIfPresent(logicalSlot);
}
return null;
}
// a task which is about to cancel in #restartTasksWithDelay(...)
if (throwable != null) {
throw new CompletionException(maybeWrapWithNoResourceAvailableException(throwable));
}
final ExecutionVertex executionVertex = getExecutionVertex(executionVertexId);
executionVertex.tryAssignResource(logicalSlot);
startReserveAllocation(executionVertexId, logicalSlot.getAllocationId());
return logicalSlot;
};
}
Aggregations