Search in sources :

Example 6 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class Execution method processFail.

private boolean processFail(Throwable t, boolean isCallback, Map<String, Accumulator<?, ?>> userAccumulators, IOMetrics metrics) {
    // atomically switch to failed
    while (true) {
        ExecutionState current = this.state;
        if (current == FAILED) {
            // already failed. It is enough to remember once that we failed (its sad enough)
            return false;
        }
        if (current == CANCELED || current == FINISHED) {
            // we are already aborting or are already aborted or we are already finished
            if (LOG.isDebugEnabled()) {
                LOG.debug("Ignoring transition of vertex {} to {} while being {}.", getVertexWithAttempt(), FAILED, current);
            }
            return false;
        }
        if (current == CANCELING) {
            cancelingComplete(userAccumulators, metrics);
            return false;
        }
        if (transitionState(current, FAILED, t)) {
            // success (in a manner of speaking)
            this.failureCause = t;
            updateAccumulatorsAndMetrics(userAccumulators, metrics);
            try {
                if (assignedResource != null) {
                    assignedResource.releaseSlot();
                }
                vertex.getExecutionGraph().deregisterExecution(this);
            } finally {
                vertex.executionFailed(t);
            }
            if (!isCallback && (current == RUNNING || current == DEPLOYING)) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Sending out cancel request, to remove task execution from TaskManager.");
                }
                try {
                    if (assignedResource != null) {
                        sendCancelRpcCall();
                    }
                } catch (Throwable tt) {
                    // no reason this should ever happen, but log it to be safe
                    LOG.error("Error triggering cancel call while marking task as failed.", tt);
                }
            }
            // leave the loop
            return true;
        }
    }
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState)

Example 7 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class TaskAsyncCallTest method testMixedAsyncCallsInOrder.

@Test
public void testMixedAsyncCallsInOrder() {
    try {
        Task task = createTask();
        task.startTaskThread();
        awaitLatch.await();
        for (int i = 1; i <= NUM_CALLS; i++) {
            task.triggerCheckpointBarrier(i, 156865867234L, CheckpointOptions.forFullCheckpoint());
            task.notifyCheckpointComplete(i);
        }
        triggerLatch.await();
        assertFalse(task.isCanceledOrFailed());
        ExecutionState currentState = task.getExecutionState();
        if (currentState != ExecutionState.RUNNING && currentState != ExecutionState.FINISHED) {
            fail("Task should be RUNNING or FINISHED, but is " + currentState);
        }
        task.cancelExecution();
        task.getExecutingThread().join();
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) StatefulTask(org.apache.flink.runtime.jobgraph.tasks.StatefulTask) Test(org.junit.Test)

Example 8 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class TaskTest method testOnPartitionStateUpdate.

@Test
public void testOnPartitionStateUpdate() throws Exception {
    IntermediateDataSetID resultId = new IntermediateDataSetID();
    ResultPartitionID partitionId = new ResultPartitionID();
    SingleInputGate inputGate = mock(SingleInputGate.class);
    when(inputGate.getConsumedResultId()).thenReturn(resultId);
    final Task task = createTask(InvokableBlockingInInvoke.class);
    // Set the mock input gate
    setInputGate(task, inputGate);
    // Expected task state for each producer state
    final Map<ExecutionState, ExecutionState> expected = new HashMap<>(ExecutionState.values().length);
    // Fail the task for unexpected states
    for (ExecutionState state : ExecutionState.values()) {
        expected.put(state, ExecutionState.FAILED);
    }
    expected.put(ExecutionState.RUNNING, ExecutionState.RUNNING);
    expected.put(ExecutionState.SCHEDULED, ExecutionState.RUNNING);
    expected.put(ExecutionState.DEPLOYING, ExecutionState.RUNNING);
    expected.put(ExecutionState.FINISHED, ExecutionState.RUNNING);
    expected.put(ExecutionState.CANCELED, ExecutionState.CANCELING);
    expected.put(ExecutionState.CANCELING, ExecutionState.CANCELING);
    expected.put(ExecutionState.FAILED, ExecutionState.CANCELING);
    for (ExecutionState state : ExecutionState.values()) {
        setState(task, ExecutionState.RUNNING);
        task.onPartitionStateUpdate(resultId, partitionId, state);
        ExecutionState newTaskState = task.getExecutionState();
        assertEquals(expected.get(state), newTaskState);
    }
    verify(inputGate, times(4)).retriggerPartitionRequest(eq(partitionId.getPartitionId()));
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) HashMap(java.util.HashMap) IntermediateDataSetID(org.apache.flink.runtime.jobgraph.IntermediateDataSetID) ResultPartitionID(org.apache.flink.runtime.io.network.partition.ResultPartitionID) SingleInputGate(org.apache.flink.runtime.io.network.partition.consumer.SingleInputGate) Test(org.junit.Test)

Example 9 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class TaskExecutionStateTest method testEqualsHashCode.

@Test
public void testEqualsHashCode() {
    try {
        final JobID jid = new JobID();
        final ExecutionAttemptID executionId = new ExecutionAttemptID();
        final ExecutionState state = ExecutionState.RUNNING;
        final Throwable error = new RuntimeException("some test error message");
        TaskExecutionState s1 = new TaskExecutionState(jid, executionId, state, error);
        TaskExecutionState s2 = new TaskExecutionState(jid, executionId, state, error);
        assertEquals(s1.hashCode(), s2.hashCode());
        assertEquals(s1, s2);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) ExecutionAttemptID(org.apache.flink.runtime.executiongraph.ExecutionAttemptID) JobID(org.apache.flink.api.common.JobID) IOException(java.io.IOException) Test(org.junit.Test)

Example 10 with ExecutionState

use of org.apache.flink.runtime.execution.ExecutionState in project flink by apache.

the class ExecutionGraphRestartTest method testCancelWhileFailing.

@Test
public void testCancelWhileFailing() throws Exception {
    // We want to manually control the restart and delay
    RestartStrategy restartStrategy = new InfiniteDelayRestartStrategy();
    Tuple2<ExecutionGraph, Instance> executionGraphInstanceTuple = createSpyExecutionGraph(restartStrategy);
    ExecutionGraph executionGraph = executionGraphInstanceTuple.f0;
    Instance instance = executionGraphInstanceTuple.f1;
    doNothing().when(executionGraph).jobVertexInFinalState();
    // Kill the instance...
    instance.markDead();
    Deadline deadline = TestingUtils.TESTING_DURATION().fromNow();
    // ...and wait for all vertices to be in state FAILED. The
    // jobVertexInFinalState does nothing, that's why we don't wait on the
    // job status.
    boolean success = false;
    while (deadline.hasTimeLeft() && !success) {
        success = true;
        for (ExecutionVertex vertex : executionGraph.getAllExecutionVertices()) {
            ExecutionState state = vertex.getExecutionState();
            if (state != ExecutionState.FAILED && state != ExecutionState.CANCELED) {
                success = false;
                Thread.sleep(100);
                break;
            }
        }
    }
    // Still in failing
    assertEquals(JobStatus.FAILING, executionGraph.getState());
    // The cancel call needs to change the state to CANCELLING
    executionGraph.cancel();
    assertEquals(JobStatus.CANCELLING, executionGraph.getState());
    // Unspy and finalize the job state
    doCallRealMethod().when(executionGraph).jobVertexInFinalState();
    executionGraph.jobVertexInFinalState();
    assertEquals(JobStatus.CANCELED, executionGraph.getState());
}
Also used : ExecutionState(org.apache.flink.runtime.execution.ExecutionState) InfiniteDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy) Instance(org.apache.flink.runtime.instance.Instance) Deadline(scala.concurrent.duration.Deadline) FailureRateRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FailureRateRestartStrategy) InfiniteDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) RestartStrategy(org.apache.flink.runtime.executiongraph.restart.RestartStrategy) FixedDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FixedDelayRestartStrategy) Test(org.junit.Test)

Aggregations

ExecutionState (org.apache.flink.runtime.execution.ExecutionState)26 Test (org.junit.Test)11 ResultPartitionID (org.apache.flink.runtime.io.network.partition.ResultPartitionID)6 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)6 JsonGenerator (com.fasterxml.jackson.core.JsonGenerator)5 IOException (java.io.IOException)5 StringWriter (java.io.StringWriter)5 TimeoutException (java.util.concurrent.TimeoutException)5 JobID (org.apache.flink.api.common.JobID)5 AccessExecutionVertex (org.apache.flink.runtime.executiongraph.AccessExecutionVertex)5 ExecutionAttemptID (org.apache.flink.runtime.executiongraph.ExecutionAttemptID)4 HashMap (java.util.HashMap)3 JobException (org.apache.flink.runtime.JobException)3 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)3 ExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionVertex)3 IntermediateResultPartition (org.apache.flink.runtime.executiongraph.IntermediateResultPartition)3 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)3 ConnectionID (org.apache.flink.runtime.io.network.ConnectionID)3 IntermediateDataSetID (org.apache.flink.runtime.jobgraph.IntermediateDataSetID)3 MutableIOMetrics (org.apache.flink.runtime.webmonitor.utils.MutableIOMetrics)3