use of org.apache.flink.test.util.TestProcessBuilder in project flink by apache.
the class TaskManagerRunnerITCase method testDeterministicWorkingDirIsNotDeletedInCaseOfProcessFailure.
@Test
public void testDeterministicWorkingDirIsNotDeletedInCaseOfProcessFailure() throws Exception {
final File workingDirBase = TEMPORARY_FOLDER.newFolder();
final ResourceID resourceId = ResourceID.generate();
final Configuration configuration = new Configuration();
configuration.set(ClusterOptions.PROCESS_WORKING_DIR_BASE, workingDirBase.getAbsolutePath());
configuration.set(TaskManagerOptions.TASK_MANAGER_RESOURCE_ID, resourceId.toString());
configuration.set(JobManagerOptions.ADDRESS, "localhost");
configuration.set(AkkaOptions.LOOKUP_TIMEOUT_DURATION, Duration.ZERO);
final File workingDirectory = ClusterEntrypointUtils.generateTaskManagerWorkingDirectoryFile(configuration, resourceId);
final TestProcessBuilder.TestProcess taskManagerProcess = new TestProcessBuilder(TaskExecutorProcessEntryPoint.class.getName()).addConfigAsMainClassArgs(configuration).start();
boolean success = false;
try {
CommonTestUtils.waitUntilCondition(workingDirectory::exists, Deadline.fromNow(Duration.ofMinutes(1L)));
taskManagerProcess.getProcess().destroy();
taskManagerProcess.getProcess().waitFor();
assertTrue(workingDirectory.exists());
success = true;
} finally {
if (!success) {
AbstractTaskManagerProcessFailureRecoveryTest.printProcessLog("TaskManager", taskManagerProcess);
}
}
}
use of org.apache.flink.test.util.TestProcessBuilder in project flink by apache.
the class ClusterEntrypointITCase method testNondeterministicWorkingDirectoryIsDeletedInCaseOfProcessFailure.
@Test
public void testNondeterministicWorkingDirectoryIsDeletedInCaseOfProcessFailure() throws Exception {
final File workingDirBase = TEMPORARY_FOLDER.newFolder();
final Configuration configuration = new Configuration();
configuration.set(ClusterOptions.PROCESS_WORKING_DIR_BASE, workingDirBase.getAbsolutePath());
final TestProcessBuilder.TestProcess jobManagerProcess = new TestProcessBuilder(DispatcherProcess.DispatcherProcessEntryPoint.class.getName()).addConfigAsMainClassArgs(configuration).start();
boolean success = false;
try {
CommonTestUtils.waitUntilCondition(() -> {
try (Stream<Path> files = Files.list(workingDirBase.toPath())) {
return files.findAny().isPresent();
}
}, Deadline.fromNow(Duration.ofMinutes(1L)));
final File workingDirectory = Iterables.getOnlyElement(Files.list(workingDirBase.toPath()).collect(Collectors.toList())).toFile();
jobManagerProcess.getProcess().destroy();
jobManagerProcess.getProcess().waitFor();
assertFalse(workingDirectory.exists());
success = true;
} finally {
if (!success) {
AbstractTaskManagerProcessFailureRecoveryTest.printProcessLog("JobManager", jobManagerProcess);
}
}
}
use of org.apache.flink.test.util.TestProcessBuilder in project flink by apache.
the class ProcessFailureCancelingITCase method testCancelingOnProcessFailure.
@Test
public void testCancelingOnProcessFailure() throws Throwable {
Assume.assumeTrue("---- Skipping Process Failure test : Could not find java executable ----", getJavaCommandPath() != null);
TestProcess taskManagerProcess = null;
final TestingFatalErrorHandler fatalErrorHandler = new TestingFatalErrorHandler();
Configuration config = new Configuration();
config.setString(JobManagerOptions.ADDRESS, "localhost");
config.set(AkkaOptions.ASK_TIMEOUT_DURATION, Duration.ofSeconds(100));
config.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zooKeeperResource.getConnectString());
config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.newFolder().getAbsolutePath());
config.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, 2);
config.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.parse("4m"));
config.set(TaskManagerOptions.NETWORK_MEMORY_MIN, MemorySize.parse("3200k"));
config.set(TaskManagerOptions.NETWORK_MEMORY_MAX, MemorySize.parse("3200k"));
config.set(TaskManagerOptions.TASK_HEAP_MEMORY, MemorySize.parse("128m"));
config.set(TaskManagerOptions.CPU_CORES, 1.0);
config.setInteger(RestOptions.PORT, 0);
final RpcService rpcService = RpcSystem.load().remoteServiceBuilder(config, "localhost", "0").createAndStart();
final int jobManagerPort = rpcService.getPort();
config.setInteger(JobManagerOptions.PORT, jobManagerPort);
final DispatcherResourceManagerComponentFactory resourceManagerComponentFactory = DefaultDispatcherResourceManagerComponentFactory.createSessionComponentFactory(StandaloneResourceManagerFactory.getInstance());
DispatcherResourceManagerComponent dispatcherResourceManagerComponent = null;
final ScheduledExecutorService ioExecutor = TestingUtils.defaultExecutor();
final HighAvailabilityServices haServices = HighAvailabilityServicesUtils.createHighAvailabilityServices(config, ioExecutor, AddressResolution.NO_ADDRESS_RESOLUTION, RpcSystem.load(), NoOpFatalErrorHandler.INSTANCE);
final AtomicReference<Throwable> programException = new AtomicReference<>();
try {
dispatcherResourceManagerComponent = resourceManagerComponentFactory.create(config, ResourceID.generate(), ioExecutor, rpcService, haServices, blobServerResource.getBlobServer(), new HeartbeatServices(100L, 10000L, 2), NoOpMetricRegistry.INSTANCE, new MemoryExecutionGraphInfoStore(), VoidMetricQueryServiceRetriever.INSTANCE, fatalErrorHandler);
TestProcessBuilder taskManagerProcessBuilder = new TestProcessBuilder(TaskExecutorProcessEntryPoint.class.getName());
taskManagerProcessBuilder.addConfigAsMainClassArgs(config);
taskManagerProcess = taskManagerProcessBuilder.start();
// start the test program, which infinitely blocks
Runnable programRunner = new Runnable() {
@Override
public void run() {
try {
ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", 1337, config);
env.setParallelism(2);
env.setRestartStrategy(RestartStrategies.noRestart());
env.generateSequence(0, Long.MAX_VALUE).map(new MapFunction<Long, Long>() {
@Override
public Long map(Long value) throws Exception {
synchronized (this) {
System.out.println(TASK_DEPLOYED_MARKER);
wait();
}
return 0L;
}
}).output(new DiscardingOutputFormat<>());
env.execute();
} catch (Throwable t) {
programException.set(t);
}
}
};
Thread programThread = new Thread(programRunner);
programThread.start();
waitUntilAtLeastOneTaskHasBeenDeployed(taskManagerProcess);
// kill the TaskManager after the job started to run
taskManagerProcess.destroy();
taskManagerProcess = null;
// the job should fail within a few seconds due to heartbeat timeouts
// since the CI environment is often slow, we conservatively give it up to 2 minutes
programThread.join(TIMEOUT.toMillis());
assertFalse("The program did not cancel in time", programThread.isAlive());
Throwable error = programException.get();
assertNotNull("The program did not fail properly", error);
assertTrue(error instanceof ProgramInvocationException);
// all seems well :-)
} catch (Exception | Error e) {
if (taskManagerProcess != null) {
printOutput("TaskManager OUT", taskManagerProcess.getProcessOutput().toString());
printOutput("TaskManager ERR", taskManagerProcess.getErrorOutput().toString());
}
throw ExceptionUtils.firstOrSuppressed(e, programException.get());
} finally {
if (taskManagerProcess != null) {
taskManagerProcess.destroy();
}
if (dispatcherResourceManagerComponent != null) {
dispatcherResourceManagerComponent.stopApplication(ApplicationStatus.SUCCEEDED, null);
}
fatalErrorHandler.rethrowError();
RpcUtils.terminateRpcService(rpcService, Time.seconds(100L));
haServices.closeAndCleanupAllData();
}
}
use of org.apache.flink.test.util.TestProcessBuilder in project flink by apache.
the class TaskManagerDisconnectOnShutdownITCase method testTaskManagerProcessFailure.
@Test
public void testTaskManagerProcessFailure() {
Configuration config = new Configuration();
config.setString(JobManagerOptions.ADDRESS, "localhost");
// disable heartbeats
config.set(HeartbeatManagerOptions.HEARTBEAT_RPC_FAILURE_THRESHOLD, -1);
config.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, 2);
config.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.parse("4m"));
config.set(TaskManagerOptions.NETWORK_MEMORY_MIN, MemorySize.parse("3200k"));
config.set(TaskManagerOptions.NETWORK_MEMORY_MAX, MemorySize.parse("3200k"));
config.set(TaskManagerOptions.TASK_HEAP_MEMORY, MemorySize.parse("128m"));
config.set(TaskManagerOptions.CPU_CORES, 1.0);
config.setString(JobManagerOptions.EXECUTION_FAILOVER_STRATEGY, "full");
config.set(JobManagerOptions.RESOURCE_WAIT_TIMEOUT, Duration.ofSeconds(30L));
// check that we run this test only if the java command
// is available on this machine
String javaCommand = getJavaCommandPath();
if (javaCommand == null) {
fail("cannot find java executable");
}
final TaskManagerConnectionTracker tracker = new TaskManagerConnectionTracker();
TestProcessBuilder.TestProcess taskManagerProcess = null;
try (final SessionClusterEntrypoint clusterEntrypoint = new SessionClusterEntrypoint(config) {
@Override
protected DefaultDispatcherResourceManagerComponentFactory createDispatcherResourceManagerComponentFactory(Configuration configuration) {
return DefaultDispatcherResourceManagerComponentFactory.createSessionComponentFactory(new TestingStandaloneResourceManagerFactory(tracker));
}
}) {
clusterEntrypoint.startCluster();
TestProcessBuilder taskManagerProcessBuilder = new TestProcessBuilder(TaskExecutorProcessEntryPoint.class.getName());
taskManagerProcessBuilder.addConfigAsMainClassArgs(config);
// start the TaskManager processes
taskManagerProcess = taskManagerProcessBuilder.start();
tracker.waitForTaskManagerConnected();
// shutdown TaskManager
taskManagerProcess.destroy();
tracker.waitForTaskManagerDisconnected();
assertThat(tracker.getNumberOfConnectedTaskManager()).isEqualTo(1);
} catch (Throwable t) {
printProcessLog(taskManagerProcess);
fail(t.getMessage());
} finally {
if (taskManagerProcess != null && taskManagerProcess.getProcess().isAlive()) {
LOG.error("TaskManager did not shutdown in time.");
printProcessLog(taskManagerProcess);
taskManagerProcess.getProcess().destroyForcibly();
}
}
}
Aggregations