Search in sources :

Example 1 with TestProcessBuilder

use of org.apache.flink.test.util.TestProcessBuilder in project flink by apache.

the class AbstractTaskManagerProcessFailureRecoveryTest method testTaskManagerProcessFailure.

@Test
public void testTaskManagerProcessFailure() throws Exception {
    TestProcess taskManagerProcess1 = null;
    TestProcess taskManagerProcess2 = null;
    TestProcess taskManagerProcess3 = null;
    File coordinateTempDir = null;
    Configuration config = new Configuration();
    config.setString(JobManagerOptions.ADDRESS, "localhost");
    config.setString(RestOptions.BIND_PORT, "0");
    config.setLong(HeartbeatManagerOptions.HEARTBEAT_INTERVAL, 200L);
    config.setLong(HeartbeatManagerOptions.HEARTBEAT_TIMEOUT, 10000L);
    config.set(HeartbeatManagerOptions.HEARTBEAT_RPC_FAILURE_THRESHOLD, 1);
    config.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
    config.setString(HighAvailabilityOptions.HA_ZOOKEEPER_QUORUM, zooKeeperResource.getConnectString());
    config.setString(HighAvailabilityOptions.HA_STORAGE_PATH, temporaryFolder.newFolder().getAbsolutePath());
    config.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, 2);
    config.set(TaskManagerOptions.MANAGED_MEMORY_SIZE, MemorySize.parse("4m"));
    config.set(TaskManagerOptions.NETWORK_MEMORY_MIN, MemorySize.parse("3200k"));
    config.set(TaskManagerOptions.NETWORK_MEMORY_MAX, MemorySize.parse("3200k"));
    config.set(NettyShuffleEnvironmentOptions.NETWORK_SORT_SHUFFLE_MIN_BUFFERS, 16);
    config.set(TaskManagerOptions.TASK_HEAP_MEMORY, MemorySize.parse("128m"));
    config.set(TaskManagerOptions.CPU_CORES, 1.0);
    config.setString(JobManagerOptions.EXECUTION_FAILOVER_STRATEGY, "full");
    config.set(JobManagerOptions.RESOURCE_WAIT_TIMEOUT, Duration.ofSeconds(30L));
    try (final StandaloneSessionClusterEntrypoint clusterEntrypoint = new StandaloneSessionClusterEntrypoint(config)) {
        // check that we run this test only if the java command
        // is available on this machine
        String javaCommand = getJavaCommandPath();
        if (javaCommand == null) {
            System.out.println("---- Skipping Process Failure test : Could not find java executable ----");
            return;
        }
        clusterEntrypoint.startCluster();
        // coordination between the processes goes through a directory
        coordinateTempDir = temporaryFolder.newFolder();
        TestProcessBuilder taskManagerProcessBuilder = new TestProcessBuilder(TaskExecutorProcessEntryPoint.class.getName());
        taskManagerProcessBuilder.addConfigAsMainClassArgs(config);
        // start the first two TaskManager processes
        taskManagerProcess1 = taskManagerProcessBuilder.start();
        taskManagerProcess2 = taskManagerProcessBuilder.start();
        // the program will set a marker file in each of its parallel tasks once they are ready,
        // so that
        // this coordinating code is aware of this.
        // the program will very slowly consume elements until the marker file (later created by
        // the
        // test driver code) is present
        final File coordinateDirClosure = coordinateTempDir;
        final AtomicReference<Throwable> errorRef = new AtomicReference<>();
        // we trigger program execution in a separate thread
        Thread programTrigger = new Thread("Program Trigger") {

            @Override
            public void run() {
                try {
                    testTaskManagerFailure(config, coordinateDirClosure);
                } catch (Throwable t) {
                    t.printStackTrace();
                    errorRef.set(t);
                }
            }
        };
        // start the test program
        programTrigger.start();
        // max 20 seconds
        if (!waitForMarkerFiles(coordinateTempDir, READY_MARKER_FILE_PREFIX, PARALLELISM, 120000)) {
            // check if the program failed for some reason
            if (errorRef.get() != null) {
                Throwable error = errorRef.get();
                error.printStackTrace();
                fail("The program encountered a " + error.getClass().getSimpleName() + " : " + error.getMessage());
            } else {
                // no error occurred, simply a timeout
                fail("The tasks were not started within time (" + 120000 + "msecs)");
            }
        }
        // start the third TaskManager
        taskManagerProcess3 = taskManagerProcessBuilder.start();
        // kill one of the previous TaskManagers, triggering a failure and recovery
        taskManagerProcess1.destroy();
        waitForShutdown("TaskManager 1", taskManagerProcess1);
        // we create the marker file which signals the program functions tasks that they can
        // complete
        touchFile(new File(coordinateTempDir, PROCEED_MARKER_FILE));
        // wait for at most 5 minutes for the program to complete
        programTrigger.join(300000);
        // check that the program really finished
        assertFalse("The program did not finish in time", programTrigger.isAlive());
        // check whether the program encountered an error
        if (errorRef.get() != null) {
            Throwable error = errorRef.get();
            error.printStackTrace();
            fail("The program encountered a " + error.getClass().getSimpleName() + " : " + error.getMessage());
        }
    // all seems well :-)
    } catch (Exception e) {
        e.printStackTrace();
        printProcessLog("TaskManager 1", taskManagerProcess1);
        printProcessLog("TaskManager 2", taskManagerProcess2);
        printProcessLog("TaskManager 3", taskManagerProcess3);
        fail(e.getMessage());
    } catch (Error e) {
        e.printStackTrace();
        printProcessLog("TaskManager 1", taskManagerProcess1);
        printProcessLog("TaskManager 2", taskManagerProcess2);
        printProcessLog("TaskManager 3", taskManagerProcess3);
        throw e;
    } finally {
        if (taskManagerProcess1 != null) {
            taskManagerProcess1.destroy();
        }
        if (taskManagerProcess2 != null) {
            taskManagerProcess2.destroy();
        }
        if (taskManagerProcess3 != null) {
            taskManagerProcess3.destroy();
        }
        waitForShutdown("TaskManager 1", taskManagerProcess1);
        waitForShutdown("TaskManager 2", taskManagerProcess2);
        waitForShutdown("TaskManager 3", taskManagerProcess3);
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) AtomicReference(java.util.concurrent.atomic.AtomicReference) TestProcess(org.apache.flink.test.util.TestProcessBuilder.TestProcess) TestProcessBuilder(org.apache.flink.test.util.TestProcessBuilder) IOException(java.io.IOException) StandaloneSessionClusterEntrypoint(org.apache.flink.runtime.entrypoint.StandaloneSessionClusterEntrypoint) TaskExecutorProcessEntryPoint(org.apache.flink.test.recovery.utils.TaskExecutorProcessEntryPoint) File(java.io.File) Test(org.junit.Test)

Example 2 with TestProcessBuilder

use of org.apache.flink.test.util.TestProcessBuilder in project flink by apache.

the class ClusterEntrypointITCase method testDeterministicWorkingDirectoryIsNotDeletedInCaseOfProcessFailure.

@Test
public void testDeterministicWorkingDirectoryIsNotDeletedInCaseOfProcessFailure() throws Exception {
    final File workingDirBase = TEMPORARY_FOLDER.newFolder();
    final ResourceID resourceId = ResourceID.generate();
    final Configuration configuration = new Configuration();
    configuration.set(ClusterOptions.PROCESS_WORKING_DIR_BASE, workingDirBase.getAbsolutePath());
    configuration.set(JobManagerOptions.JOB_MANAGER_RESOURCE_ID, resourceId.toString());
    final File workingDirectory = ClusterEntrypointUtils.generateJobManagerWorkingDirectoryFile(configuration, resourceId);
    final TestProcessBuilder.TestProcess jobManagerProcess = new TestProcessBuilder(DispatcherProcess.DispatcherProcessEntryPoint.class.getName()).addConfigAsMainClassArgs(configuration).start();
    boolean success = false;
    try {
        CommonTestUtils.waitUntilCondition(workingDirectory::exists, Deadline.fromNow(Duration.ofMinutes(1L)));
        jobManagerProcess.getProcess().destroy();
        jobManagerProcess.getProcess().waitFor();
        assertTrue(workingDirectory.exists());
        success = true;
    } finally {
        if (!success) {
            AbstractTaskManagerProcessFailureRecoveryTest.printProcessLog("JobManager", jobManagerProcess);
        }
    }
}
Also used : Configuration(org.apache.flink.configuration.Configuration) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) File(java.io.File) TestProcessBuilder(org.apache.flink.test.util.TestProcessBuilder) Test(org.junit.Test)

Example 3 with TestProcessBuilder

use of org.apache.flink.test.util.TestProcessBuilder in project flink by apache.

the class LocalRecoveryITCase method startTaskManagerProcess.

private static TestProcessBuilder.TestProcess startTaskManagerProcess(Configuration effectiveConfiguration) throws IOException {
    final TestProcessBuilder taskManagerProcessBuilder = createTaskManagerProcessBuilder();
    taskManagerProcessBuilder.addConfigAsMainClassArgs(effectiveConfiguration);
    final TestProcessBuilder.TestProcess process = taskManagerProcessBuilder.start();
    return process;
}
Also used : TestProcessBuilder(org.apache.flink.test.util.TestProcessBuilder)

Example 4 with TestProcessBuilder

use of org.apache.flink.test.util.TestProcessBuilder in project flink by apache.

the class ExceptionUtilsITCase method run.

private static RunResult run(String className, Iterable<String> args, long directMemorySize, long metaspaceSize) throws InterruptedException, IOException {
    TestProcessBuilder taskManagerProcessBuilder = new TestProcessBuilder(className);
    if (directMemorySize > 0) {
        taskManagerProcessBuilder.addJvmArg(String.format("-XX:MaxDirectMemorySize=%d", directMemorySize));
    }
    if (metaspaceSize > 0) {
        taskManagerProcessBuilder.addJvmArg("-XX:-UseCompressedOops");
        taskManagerProcessBuilder.addJvmArg(String.format("-XX:MaxMetaspaceSize=%d", metaspaceSize));
    }
    for (String arg : args) {
        taskManagerProcessBuilder.addMainClassArg(arg);
    }
    // JAVA_TOOL_OPTIONS is configured on CI which would affect the process output
    taskManagerProcessBuilder.withCleanEnvironment();
    TestProcess p = taskManagerProcessBuilder.start();
    p.getProcess().waitFor();
    return new RunResult(p.getErrorOutput().toString().trim(), p.getProcessOutput().toString().trim());
}
Also used : TestProcess(org.apache.flink.test.util.TestProcessBuilder.TestProcess) TestProcessBuilder(org.apache.flink.test.util.TestProcessBuilder)

Example 5 with TestProcessBuilder

use of org.apache.flink.test.util.TestProcessBuilder in project flink by apache.

the class TaskManagerRunnerITCase method testNondeterministicWorkingDirIsDeletedInCaseOfProcessFailure.

@Test
public void testNondeterministicWorkingDirIsDeletedInCaseOfProcessFailure() throws Exception {
    final File workingDirBase = TEMPORARY_FOLDER.newFolder();
    final Configuration configuration = new Configuration();
    configuration.set(ClusterOptions.PROCESS_WORKING_DIR_BASE, workingDirBase.getAbsolutePath());
    configuration.set(JobManagerOptions.ADDRESS, "localhost");
    configuration.set(AkkaOptions.LOOKUP_TIMEOUT_DURATION, Duration.ZERO);
    final TestProcessBuilder.TestProcess taskManagerProcess = new TestProcessBuilder(TaskExecutorProcessEntryPoint.class.getName()).addConfigAsMainClassArgs(configuration).start();
    boolean success = false;
    try {
        CommonTestUtils.waitUntilCondition(() -> {
            try (Stream<Path> files = Files.list(workingDirBase.toPath())) {
                return files.findAny().isPresent();
            }
        }, Deadline.fromNow(Duration.ofMinutes(1L)));
        final File workingDirectory = Iterables.getOnlyElement(Files.list(workingDirBase.toPath()).collect(Collectors.toList())).toFile();
        taskManagerProcess.getProcess().destroy();
        taskManagerProcess.getProcess().waitFor();
        assertFalse(workingDirectory.exists());
        success = true;
    } finally {
        if (!success) {
            AbstractTaskManagerProcessFailureRecoveryTest.printProcessLog("TaskManager", taskManagerProcess);
        }
    }
}
Also used : Path(java.nio.file.Path) Configuration(org.apache.flink.configuration.Configuration) TaskExecutorProcessEntryPoint(org.apache.flink.test.recovery.utils.TaskExecutorProcessEntryPoint) File(java.io.File) TestProcessBuilder(org.apache.flink.test.util.TestProcessBuilder) Test(org.junit.Test)

Aggregations

TestProcessBuilder (org.apache.flink.test.util.TestProcessBuilder)9 Configuration (org.apache.flink.configuration.Configuration)7 Test (org.junit.Test)6 File (java.io.File)5 TaskExecutorProcessEntryPoint (org.apache.flink.test.recovery.utils.TaskExecutorProcessEntryPoint)5 TestProcess (org.apache.flink.test.util.TestProcessBuilder.TestProcess)3 Path (java.nio.file.Path)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)2 IOException (java.io.IOException)1 ScheduledExecutorService (java.util.concurrent.ScheduledExecutorService)1 TimeoutException (java.util.concurrent.TimeoutException)1 MapFunction (org.apache.flink.api.common.functions.MapFunction)1 ExecutionEnvironment (org.apache.flink.api.java.ExecutionEnvironment)1 ProgramInvocationException (org.apache.flink.client.program.ProgramInvocationException)1 MemoryExecutionGraphInfoStore (org.apache.flink.runtime.dispatcher.MemoryExecutionGraphInfoStore)1 SessionClusterEntrypoint (org.apache.flink.runtime.entrypoint.SessionClusterEntrypoint)1 StandaloneSessionClusterEntrypoint (org.apache.flink.runtime.entrypoint.StandaloneSessionClusterEntrypoint)1 DefaultDispatcherResourceManagerComponentFactory (org.apache.flink.runtime.entrypoint.component.DefaultDispatcherResourceManagerComponentFactory)1 DispatcherResourceManagerComponent (org.apache.flink.runtime.entrypoint.component.DispatcherResourceManagerComponent)1