Search in sources :

Example 6 with TestProcessBuilder

use of org.apache.flink.test.util.TestProcessBuilder in project flink by apache.

the class TaskManagerRunnerITCase method testDeterministicWorkingDirIsNotDeletedInCaseOfProcessFailure.

@Test
public void testDeterministicWorkingDirIsNotDeletedInCaseOfProcessFailure() throws Exception {
    final File workingDirBase = TEMPORARY_FOLDER.newFolder();
    final ResourceID resourceId = ResourceID.generate();
    final Configuration configuration = new Configuration();
    configuration.set(ClusterOptions.PROCESS_WORKING_DIR_BASE, workingDirBase.getAbsolutePath());
    configuration.set(TaskManagerOptions.TASK_MANAGER_RESOURCE_ID, resourceId.toString());
    configuration.set(JobManagerOptions.ADDRESS, "localhost");
    configuration.set(AkkaOptions.LOOKUP_TIMEOUT_DURATION, Duration.ZERO);
    final File workingDirectory = ClusterEntrypointUtils.generateTaskManagerWorkingDirectoryFile(configuration, resourceId);
    final TestProcessBuilder.TestProcess taskManagerProcess = new TestProcessBuilder(TaskExecutorProcessEntryPoint.class.getName()).addConfigAsMainClassArgs(configuration).start();
    boolean success = false;
    try {
        CommonTestUtils.waitUntilCondition(workingDirectory::exists, Deadline.fromNow(Duration.ofMinutes(1L)));
        taskManagerProcess.getProcess().destroy();
        taskManagerProcess.getProcess().waitFor();
        assertTrue(workingDirectory.exists());
        success = true;
    } finally {
        if (!success) {
            AbstractTaskManagerProcessFailureRecoveryTest.printProcessLog("TaskManager", taskManagerProcess);
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TaskExecutorProcessEntryPoint(org.apache.flink.test.recovery.utils.TaskExecutorProcessEntryPoint) File(java.io.File) TestProcessBuilder(org.apache.flink.test.util.TestProcessBuilder) Test(org.junit.Test)

Example 7 with TestProcessBuilder

use of org.apache.flink.test.util.TestProcessBuilder in project flink by apache.

the class ClusterEntrypointITCase method testNondeterministicWorkingDirectoryIsDeletedInCaseOfProcessFailure.

@Test
public void testNondeterministicWorkingDirectoryIsDeletedInCaseOfProcessFailure() throws Exception {
    final File workingDirBase = TEMPORARY_FOLDER.newFolder();
    final Configuration configuration = new Configuration();
    configuration.set(ClusterOptions.PROCESS_WORKING_DIR_BASE, workingDirBase.getAbsolutePath());
    final TestProcessBuilder.TestProcess jobManagerProcess = new TestProcessBuilder(DispatcherProcess.DispatcherProcessEntryPoint.class.getName()).addConfigAsMainClassArgs(configuration).start();
    boolean success = false;
    try {
        CommonTestUtils.waitUntilCondition(() -> {
            try (Stream<Path> files = Files.list(workingDirBase.toPath())) {
                return files.findAny().isPresent();
            }
        }, Deadline.fromNow(Duration.ofMinutes(1L)));
        final File workingDirectory = Iterables.getOnlyElement(Files.list(workingDirBase.toPath()).collect(Collectors.toList())).toFile();
        jobManagerProcess.getProcess().destroy();
        jobManagerProcess.getProcess().waitFor();
        assertFalse(workingDirectory.exists());
        success = true;
    } finally {
        if (!success) {
            AbstractTaskManagerProcessFailureRecoveryTest.printProcessLog("JobManager", jobManagerProcess);
        }
    }
}
Also used : Path(java.nio.file.Path) Configuration(org.apache.flink.configuration.Configuration) File(java.io.File) TestProcessBuilder(org.apache.flink.test.util.TestProcessBuilder) Test(org.junit.Test)

Example 8 with TestProcessBuilder

use of org.apache.flink.test.util.TestProcessBuilder in project flink by apache.

the class ProcessFailureCancelingITCase method testCancelingOnProcessFailure.

@Test
public void testCancelingOnProcessFailure() throws Throwable {
    Assume.assumeTrue("---- Skipping Process Failure test : Could not find java executable ----", getJavaCommandPath() != null);
    TestProcess taskManagerProcess = null;
    final TestingFatalErrorHandler fatalErrorHandler = new TestingFatalErrorHandler();
    Configuration config = new Configuration();
    config.setString(JobManagerOptions.ADDRESS, "localhost");
    config.set(AkkaOptions.ASK_TIMEOUT_DURATION, Duration.ofSeconds(100));
    config.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
    config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zooKeeperResource.getConnectString());
    config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.newFolder().getAbsolutePath());
    config.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, 2);
    config.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.parse("4m"));
    config.set(TaskManagerOptions.NETWORK_MEMORY_MIN, MemorySize.parse("3200k"));
    config.set(TaskManagerOptions.NETWORK_MEMORY_MAX, MemorySize.parse("3200k"));
    config.set(TaskManagerOptions.TASK_HEAP_MEMORY, MemorySize.parse("128m"));
    config.set(TaskManagerOptions.CPU_CORES, 1.0);
    config.setInteger(RestOptions.PORT, 0);
    final RpcService rpcService = RpcSystem.load().remoteServiceBuilder(config, "localhost", "0").createAndStart();
    final int jobManagerPort = rpcService.getPort();
    config.setInteger(JobManagerOptions.PORT, jobManagerPort);
    final DispatcherResourceManagerComponentFactory resourceManagerComponentFactory = DefaultDispatcherResourceManagerComponentFactory.createSessionComponentFactory(StandaloneResourceManagerFactory.getInstance());
    DispatcherResourceManagerComponent dispatcherResourceManagerComponent = null;
    final ScheduledExecutorService ioExecutor = TestingUtils.defaultExecutor();
    final HighAvailabilityServices haServices = HighAvailabilityServicesUtils.createHighAvailabilityServices(config, ioExecutor, AddressResolution.NO_ADDRESS_RESOLUTION, RpcSystem.load(), NoOpFatalErrorHandler.INSTANCE);
    final AtomicReference<Throwable> programException = new AtomicReference<>();
    try {
        dispatcherResourceManagerComponent = resourceManagerComponentFactory.create(config, ResourceID.generate(), ioExecutor, rpcService, haServices, blobServerResource.getBlobServer(), new HeartbeatServices(100L, 10000L, 2), NoOpMetricRegistry.INSTANCE, new MemoryExecutionGraphInfoStore(), VoidMetricQueryServiceRetriever.INSTANCE, fatalErrorHandler);
        TestProcessBuilder taskManagerProcessBuilder = new TestProcessBuilder(TaskExecutorProcessEntryPoint.class.getName());
        taskManagerProcessBuilder.addConfigAsMainClassArgs(config);
        taskManagerProcess = taskManagerProcessBuilder.start();
        // start the test program, which infinitely blocks
        Runnable programRunner = new Runnable() {

            @Override
            public void run() {
                try {
                    ExecutionEnvironment env = ExecutionEnvironment.createRemoteEnvironment("localhost", 1337, config);
                    env.setParallelism(2);
                    env.setRestartStrategy(RestartStrategies.noRestart());
                    env.generateSequence(0, Long.MAX_VALUE).map(new MapFunction<Long, Long>() {

                        @Override
                        public Long map(Long value) throws Exception {
                            synchronized (this) {
                                System.out.println(TASK_DEPLOYED_MARKER);
                                wait();
                            }
                            return 0L;
                        }
                    }).output(new DiscardingOutputFormat<>());
                    env.execute();
                } catch (Throwable t) {
                    programException.set(t);
                }
            }
        };
        Thread programThread = new Thread(programRunner);
        programThread.start();
        waitUntilAtLeastOneTaskHasBeenDeployed(taskManagerProcess);
        // kill the TaskManager after the job started to run
        taskManagerProcess.destroy();
        taskManagerProcess = null;
        // the job should fail within a few seconds due to heartbeat timeouts
        // since the CI environment is often slow, we conservatively give it up to 2 minutes
        programThread.join(TIMEOUT.toMillis());
        assertFalse("The program did not cancel in time", programThread.isAlive());
        Throwable error = programException.get();
        assertNotNull("The program did not fail properly", error);
        assertTrue(error instanceof ProgramInvocationException);
    // all seems well :-)
    } catch (Exception | Error e) {
        if (taskManagerProcess != null) {
            printOutput("TaskManager OUT", taskManagerProcess.getProcessOutput().toString());
            printOutput("TaskManager ERR", taskManagerProcess.getErrorOutput().toString());
        }
        throw ExceptionUtils.firstOrSuppressed(e, programException.get());
    } finally {
        if (taskManagerProcess != null) {
            taskManagerProcess.destroy();
        }
        if (dispatcherResourceManagerComponent != null) {
            dispatcherResourceManagerComponent.stopApplication(ApplicationStatus.SUCCEEDED, null);
        }
        fatalErrorHandler.rethrowError();
        RpcUtils.terminateRpcService(rpcService, Time.seconds(100L));
        haServices.closeAndCleanupAllData();
    }
}
Also used : ExecutionEnvironment(org.apache.flink.api.java.ExecutionEnvironment) Configuration(org.apache.flink.configuration.Configuration) MapFunction(org.apache.flink.api.common.functions.MapFunction) TestProcess(org.apache.flink.test.util.TestProcessBuilder.TestProcess) MemoryExecutionGraphInfoStore(org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore) DispatcherResourceManagerComponent(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent) TaskExecutorProcessEntryPoint(org.apache.flink.test.recovery.utils.TaskExecutorProcessEntryPoint) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) AtomicReference(java.util.concurrent.atomic.AtomicReference) TaskExecutorProcessEntryPoint(org.apache.flink.test.recovery.utils.TaskExecutorProcessEntryPoint) TestProcessBuilder(org.apache.flink.test.util.TestProcessBuilder) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) TimeoutException(java.util.concurrent.TimeoutException) HighAvailabilityServices(org.apache.flink.runtime.highavailability.HighAvailabilityServices) RpcService(org.apache.flink.runtime.rpc.RpcService) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) DefaultDispatcherResourceManagerComponentFactory(org.apache.flink.runtime.entrypoint.component.DefaultDispatcherResourceManagerComponentFactory) DispatcherResourceManagerComponentFactory(org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponentFactory) Test(org.junit.Test)

Example 9 with TestProcessBuilder

use of org.apache.flink.test.util.TestProcessBuilder in project flink by apache.

the class TaskManagerDisconnectOnShutdownITCase method testTaskManagerProcessFailure.

@Test
public void testTaskManagerProcessFailure() {
    Configuration config = new Configuration();
    config.setString(JobManagerOptions.ADDRESS, "localhost");
    // disable heartbeats
    config.set(HeartbeatManagerOptions.HEARTBEAT_RPC_FAILURE_THRESHOLD, -1);
    config.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, 2);
    config.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.parse("4m"));
    config.set(TaskManagerOptions.NETWORK_MEMORY_MIN, MemorySize.parse("3200k"));
    config.set(TaskManagerOptions.NETWORK_MEMORY_MAX, MemorySize.parse("3200k"));
    config.set(TaskManagerOptions.TASK_HEAP_MEMORY, MemorySize.parse("128m"));
    config.set(TaskManagerOptions.CPU_CORES, 1.0);
    config.setString(JobManagerOptions.EXECUTION_FAILOVER_STRATEGY, "full");
    config.set(JobManagerOptions.RESOURCE_WAIT_TIMEOUT, Duration.ofSeconds(30L));
    // check that we run this test only if the java command
    // is available on this machine
    String javaCommand = getJavaCommandPath();
    if (javaCommand == null) {
        fail("cannot find java executable");
    }
    final TaskManagerConnectionTracker tracker = new TaskManagerConnectionTracker();
    TestProcessBuilder.TestProcess taskManagerProcess = null;
    try (final SessionClusterEntrypoint clusterEntrypoint = new SessionClusterEntrypoint(config) {

        @Override
        protected DefaultDispatcherResourceManagerComponentFactory createDispatcherResourceManagerComponentFactory(Configuration configuration) {
            return DefaultDispatcherResourceManagerComponentFactory.createSessionComponentFactory(new TestingStandaloneResourceManagerFactory(tracker));
        }
    }) {
        clusterEntrypoint.startCluster();
        TestProcessBuilder taskManagerProcessBuilder = new TestProcessBuilder(TaskExecutorProcessEntryPoint.class.getName());
        taskManagerProcessBuilder.addConfigAsMainClassArgs(config);
        // start the TaskManager processes
        taskManagerProcess = taskManagerProcessBuilder.start();
        tracker.waitForTaskManagerConnected();
        // shutdown TaskManager
        taskManagerProcess.destroy();
        tracker.waitForTaskManagerDisconnected();
        assertThat(tracker.getNumberOfConnectedTaskManager()).isEqualTo(1);
    } catch (Throwable t) {
        printProcessLog(taskManagerProcess);
        fail(t.getMessage());
    } finally {
        if (taskManagerProcess != null && taskManagerProcess.getProcess().isAlive()) {
            LOG.error("TaskManager did not shutdown in time.");
            printProcessLog(taskManagerProcess);
            taskManagerProcess.getProcess().destroyForcibly();
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) ResourceManagerRuntimeServicesConfiguration(org.apache.flink.runtime.resourcemanager.ResourceManagerRuntimeServicesConfiguration) SessionClusterEntrypoint(org.apache.flink.runtime.entrypoint.SessionClusterEntrypoint) TaskExecutorProcessEntryPoint(org.apache.flink.test.recovery.utils.TaskExecutorProcessEntryPoint) TestProcessBuilder(org.apache.flink.test.util.TestProcessBuilder) Test(org.junit.jupiter.api.Test)

Aggregations

TestProcessBuilder (org.apache.flink.test.util.TestProcessBuilder)9 Configuration (org.apache.flink.configuration.Configuration)7 Test (org.junit.Test)6 File (java.io.File)5 TaskExecutorProcessEntryPoint (org.apache.flink.test.recovery.utils.TaskExecutorProcessEntryPoint)5 TestProcess (org.apache.flink.test.util.TestProcessBuilder.TestProcess)3 Path (java.nio.file.Path)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)2 IOException (java.io.IOException)1 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)1 TimeoutException (java.util.concurrent.TimeoutException)1 MapFunction (org.apache.flink.api.common.functions.MapFunction)1 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)1 ProgramInvocationException (org.apache.flink.client.program.ProgramInvocationException)1 MemoryExecutionGraphInfoStore (org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore)1 SessionClusterEntrypoint (org.apache.flink.runtime.entrypoint.SessionClusterEntrypoint)1 StandaloneSessionClusterEntrypoint (org.apache.flink.runtime.entrypoint.StandaloneSessionClusterEntrypoint)1 DefaultDispatcherResourceManagerComponentFactory (org.apache.flink.runtime.entrypoint.component.DefaultDispatcherResourceManagerComponentFactory)1 DispatcherResourceManagerComponent (org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent)1