Search in sources :

Example 66 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class JobManagerServices method fromConfiguration.

// ------------------------------------------------------------------------
//  Creating the components from a configuration 
// ------------------------------------------------------------------------
public static JobManagerServices fromConfiguration(Configuration config, HighAvailabilityServices haServices) throws Exception {
    final BlobServer blobServer = new BlobServer(config, haServices);
    final long cleanupInterval = config.getLong(ConfigConstants.LIBRARY_CACHE_MANAGER_CLEANUP_INTERVAL, ConfigConstants.DEFAULT_LIBRARY_CACHE_MANAGER_CLEANUP_INTERVAL) * 1000;
    final BlobLibraryCacheManager libraryCacheManager = new BlobLibraryCacheManager(blobServer, cleanupInterval);
    final FiniteDuration timeout;
    try {
        timeout = AkkaUtils.getTimeout(config);
    } catch (NumberFormatException e) {
        throw new IllegalConfigurationException(AkkaUtils.formatDurationParingErrorMessage());
    }
    final ScheduledExecutorService futureExecutor = Executors.newScheduledThreadPool(Hardware.getNumberCPUCores(), new ExecutorThreadFactory("jobmanager-future"));
    return new JobManagerServices(futureExecutor, libraryCacheManager, RestartStrategyFactory.createRestartStrategyFactory(config), Time.of(timeout.length(), timeout.unit()));
}
Also used : ExecutorThreadFactory(org.apache.flink.runtime.util.ExecutorThreadFactory) BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) IllegalConfigurationException(org.apache.flink.configuration.IllegalConfigurationException) FiniteDuration(scala.concurrent.duration.FiniteDuration) BlobServer(org.apache.flink.runtime.blob.BlobServer)

Example 67 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class ExecutionGraphRestartTest method testRestartAutomatically.

@Test
public void testRestartAutomatically() throws Exception {
    RestartStrategy restartStrategy = new FixedDelayRestartStrategy(1, 1000);
    Tuple2<ExecutionGraph, Instance> executionGraphInstanceTuple = createExecutionGraph(restartStrategy);
    ExecutionGraph eg = executionGraphInstanceTuple.f0;
    restartAfterFailure(eg, new FiniteDuration(2, TimeUnit.MINUTES), true);
}
Also used : FixedDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FixedDelayRestartStrategy) Instance(org.apache.flink.runtime.instance.Instance) FailureRateRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FailureRateRestartStrategy) InfiniteDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) RestartStrategy(org.apache.flink.runtime.executiongraph.restart.RestartStrategy) FixedDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FixedDelayRestartStrategy) FiniteDuration(scala.concurrent.duration.FiniteDuration) Test(org.junit.Test)

Example 68 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class ExecutionGraphRestartTest method testNoRestartOnSuppressException.

@Test
public void testNoRestartOnSuppressException() throws Exception {
    Tuple2<ExecutionGraph, Instance> executionGraphInstanceTuple = createSpyExecutionGraph(new FixedDelayRestartStrategy(1, 1000));
    ExecutionGraph eg = executionGraphInstanceTuple.f0;
    // Fail with unrecoverable Exception
    eg.getAllExecutionVertices().iterator().next().fail(new SuppressRestartsException(new Exception("Test Exception")));
    assertEquals(JobStatus.FAILING, eg.getState());
    for (ExecutionVertex vertex : eg.getAllExecutionVertices()) {
        vertex.getCurrentExecutionAttempt().cancelingComplete();
    }
    FiniteDuration timeout = new FiniteDuration(2, TimeUnit.MINUTES);
    // Wait for async restart
    Deadline deadline = timeout.fromNow();
    while (deadline.hasTimeLeft() && eg.getState() != JobStatus.FAILED) {
        Thread.sleep(100);
    }
    assertEquals(JobStatus.FAILED, eg.getState());
    // No restart
    verify(eg, never()).restart();
    RestartStrategy restartStrategy = eg.getRestartStrategy();
    assertTrue(restartStrategy instanceof FixedDelayRestartStrategy);
    assertEquals(0, ((FixedDelayRestartStrategy) restartStrategy).getCurrentRestartAttempt());
}
Also used : SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) FixedDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FixedDelayRestartStrategy) Instance(org.apache.flink.runtime.instance.Instance) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) FailureRateRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FailureRateRestartStrategy) InfiniteDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.InfiniteDelayRestartStrategy) NoRestartStrategy(org.apache.flink.runtime.executiongraph.restart.NoRestartStrategy) RestartStrategy(org.apache.flink.runtime.executiongraph.restart.RestartStrategy) FixedDelayRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FixedDelayRestartStrategy) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) IOException(java.io.IOException) Test(org.junit.Test)

Example 69 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class ExecutionGraphRestartTest method testSuspendWhileRestarting.

/**
	 * Tests that a suspend call while restarting a job, will abort the restarting.
	 *
	 * @throws Exception
	 */
@Test
public void testSuspendWhileRestarting() throws Exception {
    FiniteDuration timeout = new FiniteDuration(1, TimeUnit.MINUTES);
    Deadline deadline = timeout.fromNow();
    Instance instance = ExecutionGraphTestUtils.getInstance(new ActorTaskManagerGateway(new SimpleActorGateway(TestingUtils.directExecutionContext())), NUM_TASKS);
    Scheduler scheduler = new Scheduler(TestingUtils.defaultExecutionContext());
    scheduler.newInstanceAvailable(instance);
    JobVertex sender = new JobVertex("Task");
    sender.setInvokableClass(NoOpInvokable.class);
    sender.setParallelism(NUM_TASKS);
    JobGraph jobGraph = new JobGraph("Pointwise job", sender);
    ControllableRestartStrategy controllableRestartStrategy = new ControllableRestartStrategy(timeout);
    ExecutionGraph eg = new ExecutionGraph(TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), new JobID(), "Test job", new Configuration(), new SerializedValue<>(new ExecutionConfig()), AkkaUtils.getDefaultTimeout(), controllableRestartStrategy, scheduler);
    eg.attachJobGraph(jobGraph.getVerticesSortedTopologicallyFromSources());
    assertEquals(JobStatus.CREATED, eg.getState());
    eg.scheduleForExecution();
    assertEquals(JobStatus.RUNNING, eg.getState());
    instance.markDead();
    Await.ready(controllableRestartStrategy.getReachedCanRestart(), deadline.timeLeft());
    assertEquals(JobStatus.RESTARTING, eg.getState());
    eg.suspend(new Exception("Test exception"));
    assertEquals(JobStatus.SUSPENDED, eg.getState());
    controllableRestartStrategy.unlockRestart();
    Await.ready(controllableRestartStrategy.getRestartDone(), deadline.timeLeft());
    assertEquals(JobStatus.SUSPENDED, eg.getState());
}
Also used : Configuration(org.apache.flink.configuration.Configuration) Instance(org.apache.flink.runtime.instance.Instance) Scheduler(org.apache.flink.runtime.jobmanager.scheduler.Scheduler) Deadline(scala.concurrent.duration.Deadline) FiniteDuration(scala.concurrent.duration.FiniteDuration) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) SimpleActorGateway(org.apache.flink.runtime.executiongraph.ExecutionGraphTestUtils.SimpleActorGateway) SuppressRestartsException(org.apache.flink.runtime.execution.SuppressRestartsException) IOException(java.io.IOException) ActorTaskManagerGateway(org.apache.flink.runtime.jobmanager.slots.ActorTaskManagerGateway) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 70 with FiniteDuration

use of scala.concurrent.duration.FiniteDuration in project flink by apache.

the class ExecutionGraphRestartTest method taskShouldNotFailWhenFailureRateLimitWasNotExceeded.

@Test
public void taskShouldNotFailWhenFailureRateLimitWasNotExceeded() throws Exception {
    FailureRateRestartStrategy restartStrategy = new FailureRateRestartStrategy(1, Time.of(1, TimeUnit.MILLISECONDS), Time.of(0, TimeUnit.SECONDS));
    FiniteDuration timeout = new FiniteDuration(2, TimeUnit.SECONDS);
    Tuple2<ExecutionGraph, Instance> executionGraphInstanceTuple = createExecutionGraph(restartStrategy);
    ExecutionGraph eg = executionGraphInstanceTuple.f0;
    //task restarted many times, but after all job is still running, because rate limit was not exceeded
    restartAfterFailure(eg, timeout, false);
    restartAfterFailure(eg, timeout, false);
    restartAfterFailure(eg, timeout, false);
    assertEquals(JobStatus.RUNNING, eg.getState());
}
Also used : FailureRateRestartStrategy(org.apache.flink.runtime.executiongraph.restart.FailureRateRestartStrategy) Instance(org.apache.flink.runtime.instance.Instance) FiniteDuration(scala.concurrent.duration.FiniteDuration) Test(org.junit.Test)

Aggregations

FiniteDuration (scala.concurrent.duration.FiniteDuration)78 Test (org.junit.Test)61 Configuration (org.apache.flink.configuration.Configuration)37 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)30 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)27 ActorRef (akka.actor.ActorRef)25 Deadline (scala.concurrent.duration.Deadline)24 JobID (org.apache.flink.api.common.JobID)19 JobVertex (org.apache.flink.runtime.jobgraph.JobVertex)19 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)17 TestingJobManagerMessages (org.apache.flink.runtime.testingUtils.TestingJobManagerMessages)13 ActorSystem (akka.actor.ActorSystem)12 JavaTestKit (akka.testkit.JavaTestKit)11 Timeout (akka.util.Timeout)11 File (java.io.File)11 TimeoutException (java.util.concurrent.TimeoutException)11 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)11 JobVertexID (org.apache.flink.runtime.jobgraph.JobVertexID)11 Props (akka.actor.Props)10 IOException (java.io.IOException)10