Search in sources :

Example 1 with ActorTaskManagerGateway

use of org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway in project flink by apache.

the class ExecutionGraphRestartTest method createExecutionGraph.

private static Tuple2<ExecutionGraph, Instance> createExecutionGraph(RestartStrategy restartStrategy, boolean isSpy) throws Exception {
    Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), NUM_TASKS);
    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    scheduler.newInstanceAvailable(instance);
    JobVertex sender = newJobVertex("Task", NUM_TASKS, NoOpInvokable.class);
    JobGraph jobGraph = new JobGraph("Pointwise job", sender);
    ExecutionGraph eg = newExecutionGraph(restartStrategy, scheduler);
    if (isSpy) {
        eg = spy(eg);
    }
    eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
    assertEquals(JobStatus.CREATED, eg.getState());
    eg.scheduleForExecution();
    assertEquals(JobStatus.RUNNING, eg.getState());
    return new Tuple2<>(eg, instance);
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Instance(org.apache.flink.runtime.instance.Instance) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) Tuple2(org.apache.flink.api.java.tuple.Tuple2) SimpleActorGateway(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway)

Example 2 with ActorTaskManagerGateway

use of org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway in project flink by apache.

the class ExecutionGraphRestartTest method testConstraintsAfterRestart.

@Test
public void testConstraintsAfterRestart() throws Exception {
    //setting up
    Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), NUM_TASKS);
    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    scheduler.newInstanceAvailable(instance);
    JobVertex groupVertex = newJobVertex("Task1", NUM_TASKS, NoOpInvokable.class);
    JobVertex groupVertex2 = newJobVertex("Task2", NUM_TASKS, NoOpInvokable.class);
    SlotSharingGroup sharingGroup = new SlotSharingGroup();
    groupVertex.setSlotSharingGroup(sharingGroup);
    groupVertex2.setSlotSharingGroup(sharingGroup);
    groupVertex.setStrictlyCoLocatedWith(groupVertex2);
    //initiate and schedule job
    JobGraph jobGraph = new JobGraph("Pointwise job", groupVertex, groupVertex2);
    ExecutionGraph eg = newExecutionGraph(new FixedDelayRestartStrategy(1, 0L), scheduler);
    eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
    assertEquals(JobStatus.CREATED, eg.getState());
    eg.scheduleForExecution();
    assertEquals(JobStatus.RUNNING, eg.getState());
    //sanity checks
    validateConstraints(eg);
    //restart automatically
    restartAfterFailure(eg, new FiniteDuration(2, TimeUnit.MINUTES), false);
    //checking execution vertex properties
    validateConstraints(eg);
    haltExecution(eg);
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Instance(org.apache.flink.runtime.instance.Instance) FixedDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FixedDelayRestartStrategy) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) FiniteDuration(scala.concurrent.duration.FiniteDuration) SimpleActorGateway(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway) SlotSharingGroup(org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) Test(org.junit.Test)

Example 3 with ActorTaskManagerGateway

use of org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway in project flink by apache.

the class ExecutionGraphRestartTest method testFailExecutionGraphAfterCancel.

/**
	 * Tests that it is possible to fail a graph via a call to
	 * {@link ExecutionGraph#fail(Throwable)} after cancellation.
	 */
@Test
public void testFailExecutionGraphAfterCancel() throws Exception {
    Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), 2);
    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    scheduler.newInstanceAvailable(instance);
    JobVertex vertex = newJobVertex("Test Vertex", 1, NoOpInvokable.class);
    ExecutionConfig executionConfig = new ExecutionConfig();
    executionConfig.setRestartStrategy(RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, Integer.MAX_VALUE));
    JobGraph jobGraph = new JobGraph("Test Job", vertex);
    jobGraph.setExecutionConfig(executionConfig);
    ExecutionGraph eg = newExecutionGraph(new InfiniteDelayRestartStrategy(), scheduler);
    eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
    assertEquals(JobStatus.CREATED, eg.getState());
    eg.scheduleForExecution();
    assertEquals(JobStatus.RUNNING, eg.getState());
    // Fail right after cancel (for example with concurrent slot release)
    eg.cancel();
    assertEquals(JobStatus.CANCELLING, eg.getState());
    eg.fail(new Exception("Test Exception"));
    assertEquals(JobStatus.FAILING, eg.getState());
    Execution execution = eg.getAllExecutionVertices().iterator().next().getCurrentExecutionAttempt();
    execution.cancelingComplete();
    assertEquals(JobStatus.RESTARTING, eg.getState());
}
Also used : JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) InfiniteDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy) Instance(org.apache.flink.runtime.instance.Instance) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SimpleActorGateway(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) IOException(java.io.IOException) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) Test(org.junit.Test)

Example 4 with ActorTaskManagerGateway

use of org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway in project flink by apache.

the class ExecutionGraphRestartTest method testFailingExecutionAfterRestart.

/**
	 * Tests that a failing execution does not affect a restarted job. This is important if a
	 * callback handler fails an execution after it has already reached a final state and the job
	 * has been restarted.
	 */
@Test
public void testFailingExecutionAfterRestart() throws Exception {
    Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), 2);
    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    scheduler.newInstanceAvailable(instance);
    JobVertex sender = newJobVertex("Task1", 1, NoOpInvokable.class);
    JobVertex receiver = newJobVertex("Task2", 1, NoOpInvokable.class);
    JobGraph jobGraph = new JobGraph("Pointwise job", sender, receiver);
    ExecutionGraph eg = newExecutionGraph(new FixedDelayRestartStrategy(1, 1000), scheduler);
    eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
    assertEquals(JobStatus.CREATED, eg.getState());
    eg.scheduleForExecution();
    assertEquals(JobStatus.RUNNING, eg.getState());
    Iterator<ExecutionVertex> executionVertices = eg.getAllExecutionVertices().iterator();
    Execution finishedExecution = executionVertices.next().getCurrentExecutionAttempt();
    Execution failedExecution = executionVertices.next().getCurrentExecutionAttempt();
    finishedExecution.markFinished();
    failedExecution.fail(new Exception("Test Exception"));
    failedExecution.cancelingComplete();
    FiniteDuration timeout = new FiniteDuration(2, TimeUnit.MINUTES);
    waitForAsyncRestart(eg, timeout);
    assertEquals(JobStatus.RUNNING, eg.getState());
    // Wait for all resources to be assigned after async restart
    waitForAllResourcesToBeAssignedAfterAsyncRestart(eg, timeout.fromNow());
    // At this point all resources have been assigned
    for (ExecutionVertex vertex : eg.getAllExecutionVertices()) {
        assertNotNull("No assigned resource (test instability).", vertex.getCurrentAssignedResource());
        vertex.getCurrentExecutionAttempt().switchToRunning();
    }
    // fail old finished execution, this should not affect the execution
    finishedExecution.fail(new Exception("This should have no effect"));
    for (ExecutionVertex vertex : eg.getAllExecutionVertices()) {
        vertex.getCurrentExecutionAttempt().markFinished();
    }
    // the state of the finished execution should have not changed since it is terminal
    assertEquals(ExecutionState.FINISHED, finishedExecution.getState());
    assertEquals(JobStatus.FINISHED, eg.getState());
}
Also used : Instance(org.apache.flink.runtime.instance.Instance) FixedDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FixedDelayRestartStrategy) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) FiniteDuration(scala.concurrent.duration.FiniteDuration) SimpleActorGateway(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) IOException(java.io.IOException) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) Test(org.junit.Test)

Example 5 with ActorTaskManagerGateway

use of org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway in project flink by apache.

the class ExecutionVertexSchedulingTest method testSlotReleasedWhenScheduledQueued.

@Test
public void testSlotReleasedWhenScheduledQueued() {
    try {
        final ExecutionJobVertex ejv = getExecutionVertex(new JobVertexID());
        final ExecutionVertex vertex = new ExecutionVertex(ejv, 0, new IntermediateResult[0], AkkaUtils.getDefaultTimeout());
        // a slot than cannot be deployed to
        final Instance instance = getInstance(new ActorTaskManagerGateway(DummyActorGateway.INSTANCE));
        final SimpleSlot slot = instance.allocateSimpleSlot(ejv.getJobId());
        slot.releaseSlot();
        assertTrue(slot.isReleased());
        final FlinkCompletableFuture<SimpleSlot> future = new FlinkCompletableFuture<>();
        Scheduler scheduler = mock(Scheduler.class);
        when(scheduler.allocateSlot(Matchers.any(ScheduledUnit.class), anyBoolean())).thenReturn(future);
        assertEquals(ExecutionState.CREATED, vertex.getExecutionState());
        // try to deploy to the slot
        vertex.scheduleForExecution(scheduler, true);
        // future has not yet a slot
        assertEquals(ExecutionState.SCHEDULED, vertex.getExecutionState());
        future.complete(slot);
        // will have failed
        assertEquals(ExecutionState.FAILED, vertex.getExecutionState());
    } catch (Exception e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
}
Also used : Instance(org.apache.flink.runtime.instance.Instance) ExecutionGraphTestUtils.getInstance(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getInstance) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) JobVertexID(org.apache.flink.runtime.jobgraph.JobVertexID) ScheduledUnit(org.apache.flink.runtime.jobmanager.scheduler.ScheduledUnit) SimpleSlot(org.apache.flink.runtime.instance.SimpleSlot) FlinkCompletableFuture(org.apache.flink.runtime.concurrent.impl.FlinkCompletableFuture) ExecutionGraphTestUtils.getExecutionVertex(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getExecutionVertex) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) Test(org.junit.Test)

Aggregations

ActorTaskManagerGateway (org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway)40 Test (org.junit.Test)36 Instance (org.apache.flink.runtime.instance.Instance)29 SimpleSlot (org.apache.flink.runtime.instance.SimpleSlot)22 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)22 JobID (org.apache.flink.api.common.JobID)20 Scheduler (org.apache.flink.runtime.jobmanager.scheduler.Scheduler)16 IOException (java.io.IOException)14 ExecutionGraphTestUtils.getInstance (org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getInstance)12 SimpleActorGateway (org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway)11 ExecutionGraphTestUtils.getExecutionVertex (org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.getExecutionVertex)11 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)11 InetAddress (java.net.InetAddress)9 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)9 TaskManagerLocation (org.apache.flink.runtime.taskmanager.TaskManagerLocation)9 ExecutionConfig (org.apache.flink.api.common.ExecutionConfig)8 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)8 BaseTestingActorGateway (org.apache.flink.runtime.instance.BaseTestingActorGateway)8 DirectScheduledExecutorService (org.apache.flink.runtime.testutils.DirectScheduledExecutorService)8 DummyActorGateway (org.apache.flink.runtime.instance.DummyActorGateway)7