Search in sources :

Example 6 with SimpleActorGateway

use of org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway in project flink by apache.

the class ExecutionGraphRestartTest method testFailExecutionAfterCancel.

/**
	 * Tests that a graph is not restarted after cancellation via a call to
	 * {@link ExecutionGraph#fail(Throwable)}. This can happen when a slot is
	 * released concurrently with cancellation.
	 */
@Test
public void testFailExecutionAfterCancel() throws Exception {
    Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), 2);
    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    scheduler.newInstanceAvailable(instance);
    JobVertex vertex = newJobVertex("Test Vertex", 1, NoOpInvokable.class);
    ExecutionConfig executionConfig = new ExecutionConfig();
    executionConfig.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, Integer.MAX_VALUE));
    JobGraph jobGraph = new JobGraph("Test Job", vertex);
    jobGraph.setExecutionConfig(executionConfig);
    ExecutionGraph eg = newExecutionGraph(new InfiniteDelayRestartStrategy(), scheduler);
    eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
    assertEquals(JobStatus.CREATED, eg.getState());
    eg.scheduleForExecution();
    assertEquals(JobStatus.RUNNING, eg.getState());
    // Fail right after cancel (for example with concurrent slot release)
    eg.cancel();
    for (ExecutionVertex v : eg.getAllExecutionVertices()) {
        v.getCurrentExecutionAttempt().fail(new Exception("Test Exception"));
    }
    assertEquals(JobStatus.CANCELED, eg.getState());
    Execution execution = eg.getAllExecutionVertices().iterator().next().getCurrentExecutionAttempt();
    execution.cancelingComplete();
    assertEquals(JobStatus.CANCELED, eg.getState());
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) InfiniteDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy) Instance(org.apache.flink.runtime.instance.Instance) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SimpleActorGateway(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) IOException(java.io.IOException) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) Test(org.junit.Test)

Example 7 with SimpleActorGateway

use of org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway in project flink by apache.

the class ExecutionGraphRestartTest method testFailWhileRestarting.

@Test
public void testFailWhileRestarting() throws Exception {
    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), NUM_TASKS);
    scheduler.newInstanceAvailable(instance);
    // Blocking program
    ExecutionGraph executionGraph = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new JobID(), "TestJob", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), // We want to manually control the restart and delay
    new InfiniteDelayRestartStrategy(), scheduler);
    JobVertex jobVertex = new JobVertex("NoOpInvokable");
    jobVertex.setInvokableClass(NoOpInvokable.class);
    jobVertex.setParallelism(NUM_TASKS);
    JobGraph jobGraph = new JobGraph("TestJob", jobVertex);
    executionGraph.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
    assertEquals(JobStatus.CREATED, executionGraph.getState());
    executionGraph.scheduleForExecution();
    assertEquals(JobStatus.RUNNING, executionGraph.getState());
    // Kill the instance and wait for the job to restart
    instance.markDead();
    Deadline deadline = TestingUtils.TESTING_DURATION().fromNow();
    while (deadline.hasTimeLeft() && executionGraph.getState() != JobStatus.RESTARTING) {
        Thread.sleep(100);
    }
    assertEquals(JobStatus.RESTARTING, executionGraph.getState());
    // The restarting should not fail with an ordinary exception
    executionGraph.fail(new Exception("Test exception"));
    assertEquals(JobStatus.RESTARTING, executionGraph.getState());
    // but it should fail when sending a SuppressRestartsException
    executionGraph.fail(new SuppressRestartsException(new Exception("Test exception")));
    assertEquals(JobStatus.FAILED, executionGraph.getState());
    // The restart has been aborted
    executionGraph.restart();
    assertEquals(JobStatus.FAILED, executionGraph.getState());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) Instance(org.apache.flink.runtime.instance.Instance) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) Deadline(scala.concurrent.duration.Deadline) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SimpleActorGateway(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) IOException(java.io.IOException) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) InfiniteDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 8 with SimpleActorGateway

use of org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway in project flink by apache.

the class ExecutionGraphRestartTest method testSuspendWhileRestarting.

/**
	 * Tests that a suspend call while restarting a job, will abort the restarting.
	 *
	 * @throws Exception
	 */
@Test
public void testSuspendWhileRestarting() throws Exception {
    FiniteDuration timeout = new FiniteDuration(1, TimeUnit.MINUTES);
    Deadline deadline = timeout.fromNow();
    Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), NUM_TASKS);
    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    scheduler.newInstanceAvailable(instance);
    JobVertex sender = new JobVertex("Task");
    sender.setInvokableClass(NoOpInvokable.class);
    sender.setParallelism(NUM_TASKS);
    JobGraph jobGraph = new JobGraph("Pointwise job", sender);
    ControllableRestartStrategy controllableRestartStrategy = new ControllableRestartStrategy(timeout);
    ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new JobID(), "Test job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), controllableRestartStrategy, scheduler);
    eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
    assertEquals(JobStatus.CREATED, eg.getState());
    eg.scheduleForExecution();
    assertEquals(JobStatus.RUNNING, eg.getState());
    instance.markDead();
    Await.ready(controllableRestartStrategy.getReachedCanRestart(), deadline.timeLeft());
    assertEquals(JobStatus.RESTARTING, eg.getState());
    eg.suspend(new Exception("Test exception"));
    assertEquals(JobStatus.SUSPENDED, eg.getState());
    controllableRestartStrategy.unlockRestart();
    Await.ready(controllableRestartStrategy.getRestartDone(), deadline.timeLeft());
    assertEquals(JobStatus.SUSPENDED, eg.getState());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) Instance(org.apache.flink.runtime.instance.Instance) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SimpleActorGateway(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) IOException(java.io.IOException) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 9 with SimpleActorGateway

use of org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway in project flink by apache.

the class ExecutionVertexDeploymentTest method testFailExternallyDuringDeploy.

@Test
public void testFailExternallyDuringDeploy() {
    try {
        final JobVertexID jid = new JobVertexID();
        final TestingUtils.QueuedActionExecutionContext ec = TestingUtils.queuedActionExecutionContext();
        final TestingUtils.ActionQueue queue = ec.actionQueue();
        final ExecutionJobVertex ejv = getExecutionVertex(jid, ec);
        final ExecutionVertex vertex = new ExecutionVertex(ejv, 0, new IntermediateResult[0], AkkaUtils.getDefaultTimeout());
        final Instance instance = getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())));
        final SimpleSlot slot = instance.allocateSimpleSlot(ejv.getJobId());
        assertEquals(ExecutionState.CREATED, vertex.getExecutionState());
        vertex.deployToSlot(slot);
        assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState());
        Exception testError = new Exception("test error");
        vertex.fail(testError);
        assertEquals(ExecutionState.FAILED, vertex.getExecutionState());
        assertEquals(testError, vertex.getFailureCause());
        queue.triggerNextAction();
        queue.triggerNextAction();
        assertTrue(vertex.getStateTimestamp(ExecutionState.CREATED) > 0);
        assertTrue(vertex.getStateTimestamp(ExecutionState.DEPLOYING) > 0);
        assertTrue(vertex.getStateTimestamp(ExecutionState.FAILED) > 0);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : TestingUtils(org.apache.flink.runtime.testingUtils.TestingUtils) Instance(org.apache.flink.runtime.instance.Instance) ExecutionGraphTestUtils.getInstance(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getInstance) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) SimpleActorGateway(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ExecutionGraphTestUtils.getExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getExecutionVertex) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) Test(org.junit.Test)

Example 10 with SimpleActorGateway

use of org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway in project flink by apache.

the class ExecutionVertexDeploymentTest method testDeployCall.

@Test
public void testDeployCall() {
    try {
        final JobVertexID jid = new JobVertexID();
        final ExecutionJobVertex ejv = getExecutionVertex(jid);
        // mock taskmanager to simply accept the call
        Instance instance = getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())));
        final SimpleSlot slot = instance.allocateSimpleSlot(ejv.getJobId());
        final ExecutionVertex vertex = new ExecutionVertex(ejv, 0, new IntermediateResult[0], AkkaUtils.getDefaultTimeout());
        assertEquals(ExecutionState.CREATED, vertex.getExecutionState());
        vertex.deployToSlot(slot);
        assertEquals(ExecutionState.DEPLOYING, vertex.getExecutionState());
        // no repeated scheduling
        try {
            vertex.deployToSlot(slot);
            fail("Scheduled from wrong state");
        } catch (IllegalStateException e) {
        // as expected
        }
        assertNull(vertex.getFailureCause());
        assertTrue(vertex.getStateTimestamp(ExecutionState.CREATED) > 0);
        assertTrue(vertex.getStateTimestamp(ExecutionState.DEPLOYING) > 0);
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Instance(org.apache.flink.runtime.instance.Instance) ExecutionGraphTestUtils.getInstance(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getInstance) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) SimpleActorGateway(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) ExecutionGraphTestUtils.getExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getExecutionVertex) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) Test(org.junit.Test)

Aggregations

SimpleActorGateway (org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway)11 Instance (org.apache.flink.runtime.instance.Instance)11 ActorTaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway)11 Test (org.junit.Test)10 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)7 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)7 Scheduler (org.apache.flink.runtime.jobmanager.scheduler.Scheduler)7 IOException (java.io.IOException)5 SuppressRestartsException (org.apache.flink.runtime.execution.SuppressRestartsException)5 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)4 ExecutionGraphTestUtils.getExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getExecutionVertex)4 ExecutionGraphTestUtils.getInstance (org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getInstance)4 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)4 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)4 InfiniteDelayRestartStrategy (org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy)3 FiniteDuration (scala.concurrent.duration.FiniteDuration)3 JobID (org.apache.flink.api.common.JobID)2 Configuration (org.apache.flink.configuration.Configuration)2 FixedDelayRestartStrategy (org.apache.flink.runtime.executiongraph.restart.FixedDelayRestartStrategy)2 Deadline (scala.concurrent.duration.Deadline)2