use of org.apache.flink.runtime.util.TestingFatalErrorHandler in project flink by apache.
the class JobMasterTest method testTriggerSavepointTimeout.
/**
* Tests that the timeout in {@link JobMasterGateway#triggerSavepoint(String, boolean,
* SavepointFormatType, Time)} is respected.
*/
@Test
public void testTriggerSavepointTimeout() throws Exception {
final TestingSchedulerNG testingSchedulerNG = TestingSchedulerNG.newBuilder().setTriggerSavepointFunction((ignoredA, ignoredB, formatType) -> new CompletableFuture<>()).build();
final JobMaster jobMaster = new JobMasterBuilder(jobGraph, rpcService).withFatalErrorHandler(testingFatalErrorHandler).withSlotPoolServiceSchedulerFactory(DefaultSlotPoolServiceSchedulerFactory.create(TestingSlotPoolServiceBuilder.newBuilder(), new TestingSchedulerNGFactory(testingSchedulerNG))).createJobMaster();
try {
jobMaster.start();
final JobMasterGateway jobMasterGateway = jobMaster.getSelfGateway(JobMasterGateway.class);
final CompletableFuture<String> savepointFutureLowTimeout = jobMasterGateway.triggerSavepoint("/tmp", false, SavepointFormatType.CANONICAL, Time.milliseconds(1));
final CompletableFuture<String> savepointFutureHighTimeout = jobMasterGateway.triggerSavepoint("/tmp", false, SavepointFormatType.CANONICAL, RpcUtils.INF_TIMEOUT);
try {
savepointFutureLowTimeout.get(testingTimeout.getSize(), testingTimeout.getUnit());
fail();
} catch (final ExecutionException e) {
final Throwable cause = ExceptionUtils.stripExecutionException(e);
assertThat(cause, instanceOf(TimeoutException.class));
}
assertThat(savepointFutureHighTimeout.isDone(), is(equalTo(false)));
} finally {
RpcUtils.terminateRpcEndpoint(jobMaster, testingTimeout);
}
}
use of org.apache.flink.runtime.util.TestingFatalErrorHandler in project flink by apache.
the class JobMasterTest method setup.
@Before
public void setup() throws IOException {
configuration = new Configuration();
haServices = new TestingHighAvailabilityServices();
jobMasterId = JobMasterId.generate();
jmResourceId = ResourceID.generate();
testingFatalErrorHandler = new TestingFatalErrorHandler();
haServices.setCheckpointRecoveryFactory(new StandaloneCheckpointRecoveryFactory());
rmLeaderRetrievalService = new SettableLeaderRetrievalService(null, null);
haServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
configuration.setString(BlobServerOptions.STORAGE_DIRECTORY, temporaryFolder.newFolder().getAbsolutePath());
}
use of org.apache.flink.runtime.util.TestingFatalErrorHandler in project flink by apache.
the class AdaptiveSchedulerTest method testFatalErrorsForwardedToFatalErrorHandler.
@Test
public void testFatalErrorsForwardedToFatalErrorHandler() throws Exception {
final TestingFatalErrorHandler fatalErrorHandler = new TestingFatalErrorHandler();
final AdaptiveScheduler scheduler = new AdaptiveSchedulerBuilder(createJobGraph(), mainThreadExecutor).setFatalErrorHandler(fatalErrorHandler).build();
final RuntimeException exception = new RuntimeException();
scheduler.runIfState(scheduler.getState(), () -> {
throw exception;
});
assertThat(fatalErrorHandler.getException()).isEqualTo(exception);
}
use of org.apache.flink.runtime.util.TestingFatalErrorHandler in project flink by apache.
the class ProcessFailureCancelingITCase method testCancelingOnProcessFailure.
@Test
public void testCancelingOnProcessFailure() throws Throwable {
Assume.assumeTrue("---- Skipping Process Failure test : Could not find java executable ----", getJavaCommandPath() != null);
TestProcess taskManagerProcess = null;
final TestingFatalErrorHandler fatalErrorHandler = new TestingFatalErrorHandler();
Configuration config = new Configuration();
config.setString(JobManagerOptions.ADDRESS, "localhost");
config.set(AkkaOptions.ASK_TIMEOUT_DURATION, Duration.ofSeconds(100));
config.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zooKeeperResource.getConnectString());
config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.newFolder().getAbsolutePath());
config.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, 2);
config.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.parse("4m"));
config.set(TaskManagerOptions.NETWORK_MEMORY_MIN, MemorySize.parse("3200k"));
config.set(TaskManagerOptions.NETWORK_MEMORY_MAX, MemorySize.parse("3200k"));
config.set(TaskManagerOptions.TASK_HEAP_MEMORY, MemorySize.parse("128m"));
config.set(TaskManagerOptions.CPU_CORES, 1.0);
config.setInteger(RestOptions.PORT, 0);
final RpcService rpcService = RpcSystem.load().remoteServiceBuilder(config, "localhost", "0").createAndStart();
final int jobManagerPort = rpcService.getPort();
config.setInteger(JobManagerOptions.PORT, jobManagerPort);
final DispatcherResourceManagerComponentFactory resourceManagerComponentFactory = DefaultDispatcherResourceManagerComponentFactory.createSessionComponentFactory(StandaloneResourceManagerFactory.getInstance());
DispatcherResourceManagerComponent dispatcherResourceManagerComponent = null;
final ScheduledExecutorService ioExecutor = TestingUtils.defaultExecutor();
final HighAvailabilityServices haServices = HighAvailabilityServicesUtils.createHighAvailabilityServices(config, ioExecutor, AddressResolution.NO_ADDRESS_RESOLUTION, RpcSystem.load(), NoOpFatalErrorHandler.INSTANCE);
final AtomicReference<Throwable> programException = new AtomicReference<>();
try {
dispatcherResourceManagerComponent = resourceManagerComponentFactory.create(config, ResourceID.generate(), ioExecutor, rpcService, haServices, blobServerResource.getBlobServer(), new HeartbeatServices(100L, 10000L, 2), NoOpMetricRegistry.INSTANCE, new MemoryExecutionGraphInfoStore(), VoidMetricQueryServiceRetriever.INSTANCE, fatalErrorHandler);
TestProcessBuilder taskManagerProcessBuilder = new TestProcessBuilder(TaskExecutorProcessEntryPoint.class.getName());
taskManagerProcessBuilder.addConfigAsMainClassArgs(config);
taskManagerProcess = taskManagerProcessBuilder.start();
// start the test program, which infinitely blocks
Runnable programRunner = new Runnable() {
@Override
public void run() {
try {
ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", 1337, config);
env.setParallelism(2);
env.setRestartStrategy(RestartStrategies.noRestart());
env.generateSequence(0, Long.MAX_VALUE).map(new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
synchronized (this) {
System.out.println(TASK_DEPLOYED_MARKER);
wait();
}
return 0L;
}
}).output(new DiscardingOutputFormat<>());
env.execute();
} catch (Throwable t) {
programException.set(t);
}
}
};
Thread programThread = new Thread(programRunner);
programThread.start();
waitUntilAtLeastOneTaskHasBeenDeployed(taskManagerProcess);
// kill the TaskManager after the job started to run
taskManagerProcess.destroy();
taskManagerProcess = null;
// the job should fail within a few seconds due to heartbeat timeouts
// since the CI environment is often slow, we conservatively give it up to 2 minutes
programThread.join(TIMEOUT.toMillis());
assertFalse("The program did not cancel in time", programThread.isAlive());
Throwable error = programException.get();
assertNotNull("The program did not fail properly", error);
assertTrue(error instanceof ProgramInvocationException);
// all seems well :-)
} catch (Exception | Error e) {
if (taskManagerProcess != null) {
printOutput("TaskManager OUT", taskManagerProcess.getProcessOutput().toString());
printOutput("TaskManager ERR", taskManagerProcess.getErrorOutput().toString());
}
throw ExceptionUtils.firstOrSuppressed(e, programException.get());
} finally {
if (taskManagerProcess != null) {
taskManagerProcess.destroy();
}
if (dispatcherResourceManagerComponent != null) {
dispatcherResourceManagerComponent.stopApplication(ApplicationStatus.SUCCEEDED, null);
}
fatalErrorHandler.rethrowError();
RpcUtils.terminateRpcService(rpcService, Time.seconds(100L));
haServices.closeAndCleanupAllData();
}
}
Aggregations