Search in sources :

Example 6 with Execution

use of org.apache.flink.runtime.executiongraph.Execution in project flink by apache.

the class CheckpointCoordinatorTest method mockExecutionVertex.

private static ExecutionVertex mockExecutionVertex(ExecutionAttemptID attemptID, JobVertexID jobVertexID, int parallelism, int maxParallelism, ExecutionState state, ExecutionState... successiveStates) {
    ExecutionVertex vertex = mock(ExecutionVertex.class);
    final Execution exec = spy(new Execution(mock(Executor.class), vertex, 1, 1L, Time.milliseconds(500L)));
    when(exec.getAttemptId()).thenReturn(attemptID);
    when(exec.getState()).thenReturn(state, successiveStates);
    when(vertex.getJobvertexId()).thenReturn(jobVertexID);
    when(vertex.getCurrentExecutionAttempt()).thenReturn(exec);
    when(vertex.getTotalNumberOfParallelSubtasks()).thenReturn(parallelism);
    when(vertex.getMaxParallelism()).thenReturn(maxParallelism);
    return vertex;
}
Also used : Execution(org.apache.flink.runtime.executiongraph.Execution) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex)

Example 7 with Execution

use of org.apache.flink.runtime.executiongraph.Execution in project flink by apache.

the class CheckpointCoordinatorTest method testPeriodicTriggering.

@Test
public void testPeriodicTriggering() {
    try {
        final JobID jid = new JobID();
        final long start = System.currentTimeMillis();
        // create some mock execution vertices and trigger some checkpoint
        final ExecutionAttemptID triggerAttemptID = new ExecutionAttemptID();
        final ExecutionAttemptID ackAttemptID = new ExecutionAttemptID();
        final ExecutionAttemptID commitAttemptID = new ExecutionAttemptID();
        ExecutionVertex triggerVertex = mockExecutionVertex(triggerAttemptID);
        ExecutionVertex ackVertex = mockExecutionVertex(ackAttemptID);
        ExecutionVertex commitVertex = mockExecutionVertex(commitAttemptID);
        final AtomicInteger numCalls = new AtomicInteger();
        final Execution execution = triggerVertex.getCurrentExecutionAttempt();
        doAnswer(new Answer<Void>() {

            private long lastId = -1;

            private long lastTs = -1;

            @Override
            public Void answer(InvocationOnMock invocation) throws Throwable {
                long id = (Long) invocation.getArguments()[0];
                long ts = (Long) invocation.getArguments()[1];
                assertTrue(id > lastId);
                assertTrue(ts >= lastTs);
                assertTrue(ts >= start);
                lastId = id;
                lastTs = ts;
                numCalls.incrementAndGet();
                return null;
            }
        }).when(execution).triggerCheckpoint(anyLong(), anyLong(), any(CheckpointOptions.class));
        CheckpointCoordinator coord = new CheckpointCoordinator(jid, // periodic interval is 10 ms
        10, // timeout is very long (200 s)
        200000, 0, Integer.MAX_VALUE, ExternalizedCheckpointSettings.none(), new ExecutionVertex[] { triggerVertex }, new ExecutionVertex[] { ackVertex }, new ExecutionVertex[] { commitVertex }, new StandaloneCheckpointIDCounter(), new StandaloneCompletedCheckpointStore(2), null, Executors.directExecutor());
        coord.startCheckpointScheduler();
        long timeout = System.currentTimeMillis() + 60000;
        do {
            Thread.sleep(20);
        } while (timeout > System.currentTimeMillis() && numCalls.get() < 5);
        assertTrue(numCalls.get() >= 5);
        coord.stopCheckpointScheduler();
        // for 400 ms, no further calls may come.
        // there may be the case that one trigger was fired and about to
        // acquire the lock, such that after cancelling it will still do
        // the remainder of its work
        int numCallsSoFar = numCalls.get();
        Thread.sleep(400);
        assertTrue(numCallsSoFar == numCalls.get() || numCallsSoFar + 1 == numCalls.get());
        // start another sequence of periodic scheduling
        numCalls.set(0);
        coord.startCheckpointScheduler();
        timeout = System.currentTimeMillis() + 60000;
        do {
            Thread.sleep(20);
        } while (timeout > System.currentTimeMillis() && numCalls.get() < 5);
        assertTrue(numCalls.get() >= 5);
        coord.stopCheckpointScheduler();
        // for 400 ms, no further calls may come
        // there may be the case that one trigger was fired and about to
        // acquire the lock, such that after cancelling it will still do
        // the remainder of its work
        numCallsSoFar = numCalls.get();
        Thread.sleep(400);
        assertTrue(numCallsSoFar == numCalls.get() || numCallsSoFar + 1 == numCalls.get());
        coord.shutdown(JobStatus.FINISHED);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) IOException(java.io.IOException) Execution(org.apache.flink.runtime.executiongraph.Execution) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) InvocationOnMock(org.mockito.invocation.InvocationOnMock) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 8 with Execution

use of org.apache.flink.runtime.executiongraph.Execution in project flink by apache.

the class StateAssignmentOperation method assignTaskStatesToOperatorInstances.

private static void assignTaskStatesToOperatorInstances(TaskState taskState, ExecutionJobVertex executionJobVertex) {
    final int oldParallelism = taskState.getParallelism();
    final int newParallelism = executionJobVertex.getParallelism();
    List<KeyGroupRange> keyGroupPartitions = createKeyGroupPartitions(executionJobVertex.getMaxParallelism(), newParallelism);
    final int chainLength = taskState.getChainLength();
    // operator chain idx -> list of the stored op states from all parallel instances for this chain idx
    @SuppressWarnings("unchecked") List<OperatorStateHandle>[] parallelOpStatesBackend = new List[chainLength];
    @SuppressWarnings("unchecked") List<OperatorStateHandle>[] parallelOpStatesStream = new List[chainLength];
    List<KeyGroupsStateHandle> parallelKeyedStatesBackend = new ArrayList<>(oldParallelism);
    List<KeyGroupsStateHandle> parallelKeyedStateStream = new ArrayList<>(oldParallelism);
    for (int p = 0; p < oldParallelism; ++p) {
        SubtaskState subtaskState = taskState.getState(p);
        if (null != subtaskState) {
            collectParallelStatesByChainOperator(parallelOpStatesBackend, subtaskState.getManagedOperatorState());
            collectParallelStatesByChainOperator(parallelOpStatesStream, subtaskState.getRawOperatorState());
            KeyGroupsStateHandle keyedStateBackend = subtaskState.getManagedKeyedState();
            if (null != keyedStateBackend) {
                parallelKeyedStatesBackend.add(keyedStateBackend);
            }
            KeyGroupsStateHandle keyedStateStream = subtaskState.getRawKeyedState();
            if (null != keyedStateStream) {
                parallelKeyedStateStream.add(keyedStateStream);
            }
        }
    }
    // operator chain index -> lists with collected states (one collection for each parallel subtasks)
    @SuppressWarnings("unchecked") List<Collection<OperatorStateHandle>>[] partitionedParallelStatesBackend = new List[chainLength];
    @SuppressWarnings("unchecked") List<Collection<OperatorStateHandle>>[] partitionedParallelStatesStream = new List[chainLength];
    //TODO here we can employ different redistribution strategies for state, e.g. union state.
    // For now we only offer round robin as the default.
    OperatorStateRepartitioner opStateRepartitioner = RoundRobinOperatorStateRepartitioner.INSTANCE;
    for (int chainIdx = 0; chainIdx < chainLength; ++chainIdx) {
        List<OperatorStateHandle> chainOpParallelStatesBackend = parallelOpStatesBackend[chainIdx];
        List<OperatorStateHandle> chainOpParallelStatesStream = parallelOpStatesStream[chainIdx];
        partitionedParallelStatesBackend[chainIdx] = applyRepartitioner(opStateRepartitioner, chainOpParallelStatesBackend, oldParallelism, newParallelism);
        partitionedParallelStatesStream[chainIdx] = applyRepartitioner(opStateRepartitioner, chainOpParallelStatesStream, oldParallelism, newParallelism);
    }
    for (int subTaskIdx = 0; subTaskIdx < newParallelism; ++subTaskIdx) {
        // non-partitioned state
        ChainedStateHandle<StreamStateHandle> nonPartitionableState = null;
        if (oldParallelism == newParallelism) {
            if (taskState.getState(subTaskIdx) != null) {
                nonPartitionableState = taskState.getState(subTaskIdx).getLegacyOperatorState();
            }
        }
        // partitionable state
        @SuppressWarnings("unchecked") Collection<OperatorStateHandle>[] iab = new Collection[chainLength];
        @SuppressWarnings("unchecked") Collection<OperatorStateHandle>[] ias = new Collection[chainLength];
        List<Collection<OperatorStateHandle>> operatorStateFromBackend = Arrays.asList(iab);
        List<Collection<OperatorStateHandle>> operatorStateFromStream = Arrays.asList(ias);
        for (int chainIdx = 0; chainIdx < partitionedParallelStatesBackend.length; ++chainIdx) {
            List<Collection<OperatorStateHandle>> redistributedOpStateBackend = partitionedParallelStatesBackend[chainIdx];
            List<Collection<OperatorStateHandle>> redistributedOpStateStream = partitionedParallelStatesStream[chainIdx];
            if (redistributedOpStateBackend != null) {
                operatorStateFromBackend.set(chainIdx, redistributedOpStateBackend.get(subTaskIdx));
            }
            if (redistributedOpStateStream != null) {
                operatorStateFromStream.set(chainIdx, redistributedOpStateStream.get(subTaskIdx));
            }
        }
        Execution currentExecutionAttempt = executionJobVertex.getTaskVertices()[subTaskIdx].getCurrentExecutionAttempt();
        List<KeyGroupsStateHandle> newKeyedStatesBackend;
        List<KeyGroupsStateHandle> newKeyedStateStream;
        if (oldParallelism == newParallelism) {
            SubtaskState subtaskState = taskState.getState(subTaskIdx);
            if (subtaskState != null) {
                KeyGroupsStateHandle oldKeyedStatesBackend = subtaskState.getManagedKeyedState();
                KeyGroupsStateHandle oldKeyedStatesStream = subtaskState.getRawKeyedState();
                newKeyedStatesBackend = oldKeyedStatesBackend != null ? Collections.singletonList(oldKeyedStatesBackend) : null;
                newKeyedStateStream = oldKeyedStatesStream != null ? Collections.singletonList(oldKeyedStatesStream) : null;
            } else {
                newKeyedStatesBackend = null;
                newKeyedStateStream = null;
            }
        } else {
            KeyGroupRange subtaskKeyGroupIds = keyGroupPartitions.get(subTaskIdx);
            newKeyedStatesBackend = getKeyGroupsStateHandles(parallelKeyedStatesBackend, subtaskKeyGroupIds);
            newKeyedStateStream = getKeyGroupsStateHandles(parallelKeyedStateStream, subtaskKeyGroupIds);
        }
        TaskStateHandles taskStateHandles = new TaskStateHandles(nonPartitionableState, operatorStateFromBackend, operatorStateFromStream, newKeyedStatesBackend, newKeyedStateStream);
        currentExecutionAttempt.setInitialState(taskStateHandles);
    }
}
Also used : KeyGroupRange(org.apache.flink.runtime.state.KeyGroupRange) ArrayList(java.util.ArrayList) KeyGroupsStateHandle(org.apache.flink.runtime.state.KeyGroupsStateHandle) TaskStateHandles(org.apache.flink.runtime.state.TaskStateHandles) StreamStateHandle(org.apache.flink.runtime.state.StreamStateHandle) Execution(org.apache.flink.runtime.executiongraph.Execution) Collection(java.util.Collection) ArrayList(java.util.ArrayList) List(java.util.List) OperatorStateHandle(org.apache.flink.runtime.state.OperatorStateHandle)

Example 9 with Execution

use of org.apache.flink.runtime.executiongraph.Execution in project flink by apache.

the class CheckpointCoordinator method triggerCheckpoint.

@VisibleForTesting
CheckpointTriggerResult triggerCheckpoint(long timestamp, CheckpointProperties props, String targetDirectory, boolean isPeriodic) {
    // Sanity check
    if (props.externalizeCheckpoint() && targetDirectory == null) {
        throw new IllegalStateException("No target directory specified to persist checkpoint to.");
    }
    // make some eager pre-checks
    synchronized (lock) {
        // abort if the coordinator has been shutdown in the meantime
        if (shutdown) {
            return new CheckpointTriggerResult(CheckpointDeclineReason.COORDINATOR_SHUTDOWN);
        }
        // Don't allow periodic checkpoint if scheduling has been disabled
        if (isPeriodic && !periodicScheduling) {
            return new CheckpointTriggerResult(CheckpointDeclineReason.PERIODIC_SCHEDULER_SHUTDOWN);
        }
        // these checks are not relevant for savepoints
        if (!props.forceCheckpoint()) {
            // sanity check: there should never be more than one trigger request queued
            if (triggerRequestQueued) {
                LOG.warn("Trying to trigger another checkpoint while one was queued already");
                return new CheckpointTriggerResult(CheckpointDeclineReason.ALREADY_QUEUED);
            }
            // if too many checkpoints are currently in progress, we need to mark that a request is queued
            if (pendingCheckpoints.size() >= maxConcurrentCheckpointAttempts) {
                triggerRequestQueued = true;
                if (currentPeriodicTrigger != null) {
                    currentPeriodicTrigger.cancel(false);
                    currentPeriodicTrigger = null;
                }
                return new CheckpointTriggerResult(CheckpointDeclineReason.TOO_MANY_CONCURRENT_CHECKPOINTS);
            }
            // make sure the minimum interval between checkpoints has passed
            final long earliestNext = lastCheckpointCompletionNanos + minPauseBetweenCheckpointsNanos;
            final long durationTillNextMillis = (earliestNext - System.nanoTime()) / 1_000_000;
            if (durationTillNextMillis > 0) {
                if (currentPeriodicTrigger != null) {
                    currentPeriodicTrigger.cancel(false);
                    currentPeriodicTrigger = null;
                }
                // Reassign the new trigger to the currentPeriodicTrigger
                currentPeriodicTrigger = timer.scheduleAtFixedRate(new ScheduledTrigger(), durationTillNextMillis, baseInterval, TimeUnit.MILLISECONDS);
                return new CheckpointTriggerResult(CheckpointDeclineReason.MINIMUM_TIME_BETWEEN_CHECKPOINTS);
            }
        }
    }
    // check if all tasks that we need to trigger are running.
    // if not, abort the checkpoint
    Execution[] executions = new Execution[tasksToTrigger.length];
    for (int i = 0; i < tasksToTrigger.length; i++) {
        Execution ee = tasksToTrigger[i].getCurrentExecutionAttempt();
        if (ee != null && ee.getState() == ExecutionState.RUNNING) {
            executions[i] = ee;
        } else {
            LOG.info("Checkpoint triggering task {} is not being executed at the moment. Aborting checkpoint.", tasksToTrigger[i].getSimpleName());
            return new CheckpointTriggerResult(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING);
        }
    }
    // next, check if all tasks that need to acknowledge the checkpoint are running.
    // if not, abort the checkpoint
    Map<ExecutionAttemptID, ExecutionVertex> ackTasks = new HashMap<>(tasksToWaitFor.length);
    for (ExecutionVertex ev : tasksToWaitFor) {
        Execution ee = ev.getCurrentExecutionAttempt();
        if (ee != null) {
            ackTasks.put(ee.getAttemptId(), ev);
        } else {
            LOG.info("Checkpoint acknowledging task {} is not being executed at the moment. Aborting checkpoint.", ev.getSimpleName());
            return new CheckpointTriggerResult(CheckpointDeclineReason.NOT_ALL_REQUIRED_TASKS_RUNNING);
        }
    }
    // we avoid blocking the processing of 'acknowledge/decline' messages during that time.
    synchronized (triggerLock) {
        final long checkpointID;
        try {
            // this must happen outside the coordinator-wide lock, because it communicates
            // with external services (in HA mode) and may block for a while.
            checkpointID = checkpointIdCounter.getAndIncrement();
        } catch (Throwable t) {
            int numUnsuccessful = numUnsuccessfulCheckpointsTriggers.incrementAndGet();
            LOG.warn("Failed to trigger checkpoint (" + numUnsuccessful + " consecutive failed attempts so far)", t);
            return new CheckpointTriggerResult(CheckpointDeclineReason.EXCEPTION);
        }
        final PendingCheckpoint checkpoint = new PendingCheckpoint(job, checkpointID, timestamp, ackTasks, props, targetDirectory, executor);
        if (statsTracker != null) {
            PendingCheckpointStats callback = statsTracker.reportPendingCheckpoint(checkpointID, timestamp, props);
            checkpoint.setStatsCallback(callback);
        }
        // schedule the timer that will clean up the expired checkpoints
        final Runnable canceller = new Runnable() {

            @Override
            public void run() {
                synchronized (lock) {
                    // note that checkpoint completion discards the pending checkpoint object
                    if (!checkpoint.isDiscarded()) {
                        LOG.info("Checkpoint " + checkpointID + " expired before completing.");
                        checkpoint.abortExpired();
                        pendingCheckpoints.remove(checkpointID);
                        rememberRecentCheckpointId(checkpointID);
                        triggerQueuedRequests();
                    }
                }
            }
        };
        try {
            // re-acquire the coordinator-wide lock
            synchronized (lock) {
                // that the conditions still hold.
                if (shutdown) {
                    return new CheckpointTriggerResult(CheckpointDeclineReason.COORDINATOR_SHUTDOWN);
                } else if (!props.forceCheckpoint()) {
                    if (triggerRequestQueued) {
                        LOG.warn("Trying to trigger another checkpoint while one was queued already");
                        return new CheckpointTriggerResult(CheckpointDeclineReason.ALREADY_QUEUED);
                    }
                    if (pendingCheckpoints.size() >= maxConcurrentCheckpointAttempts) {
                        triggerRequestQueued = true;
                        if (currentPeriodicTrigger != null) {
                            currentPeriodicTrigger.cancel(false);
                            currentPeriodicTrigger = null;
                        }
                        return new CheckpointTriggerResult(CheckpointDeclineReason.TOO_MANY_CONCURRENT_CHECKPOINTS);
                    }
                    // make sure the minimum interval between checkpoints has passed
                    final long earliestNext = lastCheckpointCompletionNanos + minPauseBetweenCheckpointsNanos;
                    final long durationTillNextMillis = (earliestNext - System.nanoTime()) / 1_000_000;
                    if (durationTillNextMillis > 0) {
                        if (currentPeriodicTrigger != null) {
                            currentPeriodicTrigger.cancel(false);
                            currentPeriodicTrigger = null;
                        }
                        // Reassign the new trigger to the currentPeriodicTrigger
                        currentPeriodicTrigger = timer.scheduleAtFixedRate(new ScheduledTrigger(), durationTillNextMillis, baseInterval, TimeUnit.MILLISECONDS);
                        return new CheckpointTriggerResult(CheckpointDeclineReason.MINIMUM_TIME_BETWEEN_CHECKPOINTS);
                    }
                }
                LOG.info("Triggering checkpoint " + checkpointID + " @ " + timestamp);
                pendingCheckpoints.put(checkpointID, checkpoint);
                ScheduledFuture<?> cancellerHandle = timer.schedule(canceller, checkpointTimeout, TimeUnit.MILLISECONDS);
                if (!checkpoint.setCancellerHandle(cancellerHandle)) {
                    // checkpoint is already disposed!
                    cancellerHandle.cancel(false);
                }
            }
            // end of lock scope
            CheckpointOptions checkpointOptions;
            if (!props.isSavepoint()) {
                checkpointOptions = CheckpointOptions.forFullCheckpoint();
            } else {
                checkpointOptions = CheckpointOptions.forSavepoint(targetDirectory);
            }
            // send the messages to the tasks that trigger their checkpoint
            for (Execution execution : executions) {
                execution.triggerCheckpoint(checkpointID, timestamp, checkpointOptions);
            }
            numUnsuccessfulCheckpointsTriggers.set(0);
            return new CheckpointTriggerResult(checkpoint);
        } catch (Throwable t) {
            // guard the map against concurrent modifications
            synchronized (lock) {
                pendingCheckpoints.remove(checkpointID);
            }
            int numUnsuccessful = numUnsuccessfulCheckpointsTriggers.incrementAndGet();
            LOG.warn("Failed to trigger checkpoint (" + numUnsuccessful + " consecutive failed attempts so far)", t);
            if (!checkpoint.isDiscarded()) {
                checkpoint.abortError(new Exception("Failed to trigger checkpoint"));
            }
            return new CheckpointTriggerResult(CheckpointDeclineReason.EXCEPTION);
        }
    }
// end trigger lock
}
Also used : ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) AcknowledgeCheckpoint(org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint) DeclineCheckpoint(org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex) Execution(org.apache.flink.runtime.executiongraph.Execution) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting)

Example 10 with Execution

use of org.apache.flink.runtime.executiongraph.Execution in project flink by apache.

the class DefaultCheckpointPlanCalculatorTest method runSingleTest.

private void runSingleTest(List<VertexDeclaration> vertexDeclarations, List<EdgeDeclaration> edgeDeclarations, List<TaskDeclaration> expectedToTriggerTaskDeclarations, List<TaskDeclaration> expectedFinishedTaskDeclarations) throws Exception {
    ExecutionGraph graph = createExecutionGraph(vertexDeclarations, edgeDeclarations);
    DefaultCheckpointPlanCalculator planCalculator = createCheckpointPlanCalculator(graph);
    List<TaskDeclaration> expectedRunningTaskDeclarations = new ArrayList<>();
    List<ExecutionJobVertex> expectedFullyFinishedJobVertices = new ArrayList<>();
    expectedFinishedTaskDeclarations.forEach(finishedDeclaration -> {
        ExecutionJobVertex jobVertex = chooseJobVertex(graph, finishedDeclaration.vertexIndex);
        expectedRunningTaskDeclarations.add(new TaskDeclaration(finishedDeclaration.vertexIndex, minus(range(0, jobVertex.getParallelism()), finishedDeclaration.subtaskIndices)));
        if (finishedDeclaration.subtaskIndices.size() == jobVertex.getParallelism()) {
            expectedFullyFinishedJobVertices.add(jobVertex);
        }
    });
    List<ExecutionVertex> expectedRunningTasks = chooseTasks(graph, expectedRunningTaskDeclarations.toArray(new TaskDeclaration[0]));
    List<Execution> expectedFinishedTasks = chooseTasks(graph, expectedFinishedTaskDeclarations.toArray(new TaskDeclaration[0])).stream().map(ExecutionVertex::getCurrentExecutionAttempt).collect(Collectors.toList());
    List<ExecutionVertex> expectedToTriggerTasks = chooseTasks(graph, expectedToTriggerTaskDeclarations.toArray(new TaskDeclaration[0]));
    // Tests computing checkpoint plan(isUnalignedCheckpoint flag doesn't influence on result
    // because all tasks are in RUNNING state here).
    CheckpointPlan checkpointPlan = planCalculator.calculateCheckpointPlan().get();
    checkCheckpointPlan(expectedToTriggerTasks, expectedRunningTasks, expectedFinishedTasks, expectedFullyFinishedJobVertices, checkpointPlan);
}
Also used : Execution(org.apache.flink.runtime.executiongraph.Execution) ExecutionJobVertex(org.apache.flink.runtime.executiongraph.ExecutionJobVertex) ExecutionGraph(org.apache.flink.runtime.executiongraph.ExecutionGraph) ArrayList(java.util.ArrayList) ExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionVertex)

Aggregations

Execution (org.apache.flink.runtime.executiongraph.Execution)45 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)26 ExecutionJobVertex (org.apache.flink.runtime.executiongraph.ExecutionJobVertex)11 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)8 JobID (org.apache.flink.api.common.JobID)7 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)7 Test (org.junit.Test)7 AcknowledgeCheckpoint (org.apache.flink.runtime.messages.checkpoint.AcknowledgeCheckpoint)6 ArrayList (java.util.ArrayList)5 IOException (java.io.IOException)4 ExecutionGraph (org.apache.flink.runtime.executiongraph.ExecutionGraph)4 DeclineCheckpoint (org.apache.flink.runtime.messages.checkpoint.DeclineCheckpoint)4 HashMap (java.util.HashMap)3 CompletableFuture (java.util.concurrent.CompletableFuture)3 TimeoutException (java.util.concurrent.TimeoutException)3 Time (org.apache.flink.api.common.time.Time)3 PartitionProducerDisposedException (org.apache.flink.runtime.jobmanager.PartitionProducerDisposedException)3 LogicalSlot (org.apache.flink.runtime.jobmaster.LogicalSlot)3 StackTraceSampleResponse (org.apache.flink.runtime.messages.StackTraceSampleResponse)3 Collection (java.util.Collection)2