Search in sources :

Example 6 with AtomicReference

use of java.util.concurrent.atomic.AtomicReference in project flink by apache.

the class AbstractTaskManagerProcessFailureRecoveryTest method testTaskManagerProcessFailure.

@Test
public void testTaskManagerProcessFailure() {
    final StringWriter processOutput1 = new StringWriter();
    final StringWriter processOutput2 = new StringWriter();
    final StringWriter processOutput3 = new StringWriter();
    ActorSystem jmActorSystem = null;
    Process taskManagerProcess1 = null;
    Process taskManagerProcess2 = null;
    Process taskManagerProcess3 = null;
    File coordinateTempDir = null;
    try {
        // check that we run this test only if the java command
        // is available on this machine
        String javaCommand = getJavaCommandPath();
        if (javaCommand == null) {
            System.out.println("---- Skipping Process Failure test : Could not find java executable ----");
            return;
        }
        // create a logging file for the process
        File tempLogFile = File.createTempFile(getClass().getSimpleName() + "-", "-log4j.properties");
        tempLogFile.deleteOnExit();
        CommonTestUtils.printLog4jDebugConfig(tempLogFile);
        // coordination between the processes goes through a directory
        coordinateTempDir = CommonTestUtils.createTempDirectory();
        // find a free port to start the JobManager
        final int jobManagerPort = NetUtils.getAvailablePort();
        // start a JobManager
        Tuple2<String, Object> localAddress = new Tuple2<String, Object>("localhost", jobManagerPort);
        Configuration jmConfig = new Configuration();
        jmConfig.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_INTERVAL, "1000 ms");
        jmConfig.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_PAUSE, "6 s");
        jmConfig.setInteger(ConfigConstants.AKKA_WATCH_THRESHOLD, 9);
        jmConfig.setString(ConfigConstants.RESTART_STRATEGY_FIXED_DELAY_DELAY, "10 s");
        jmConfig.setString(ConfigConstants.AKKA_ASK_TIMEOUT, "100 s");
        jmActorSystem = AkkaUtils.createActorSystem(jmConfig, new Some<>(localAddress));
        ActorRef jmActor = JobManager.startJobManagerActors(jmConfig, jmActorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1();
        // the TaskManager java command
        String[] command = new String[] { javaCommand, "-Dlog.level=DEBUG", "-Dlog4j.configuration=file:" + tempLogFile.getAbsolutePath(), "-Xms80m", "-Xmx80m", "-classpath", getCurrentClasspath(), TaskManagerProcessEntryPoint.class.getName(), String.valueOf(jobManagerPort) };
        // start the first two TaskManager processes
        taskManagerProcess1 = new ProcessBuilder(command).start();
        new CommonTestUtils.PipeForwarder(taskManagerProcess1.getErrorStream(), processOutput1);
        taskManagerProcess2 = new ProcessBuilder(command).start();
        new CommonTestUtils.PipeForwarder(taskManagerProcess2.getErrorStream(), processOutput2);
        // we wait for the JobManager to have the two TaskManagers available
        // since some of the CI environments are very hostile, we need to give this a lot of time (2 minutes)
        waitUntilNumTaskManagersAreRegistered(jmActor, 2, 120000);
        // the program will set a marker file in each of its parallel tasks once they are ready, so that
        // this coordinating code is aware of this.
        // the program will very slowly consume elements until the marker file (later created by the
        // test driver code) is present
        final File coordinateDirClosure = coordinateTempDir;
        final AtomicReference<Throwable> errorRef = new AtomicReference<>();
        // we trigger program execution in a separate thread
        Thread programTrigger = new Thread("Program Trigger") {

            @Override
            public void run() {
                try {
                    testTaskManagerFailure(jobManagerPort, coordinateDirClosure);
                } catch (Throwable t) {
                    t.printStackTrace();
                    errorRef.set(t);
                }
            }
        };
        //start the test program
        programTrigger.start();
        // max 20 seconds
        if (!waitForMarkerFiles(coordinateTempDir, READY_MARKER_FILE_PREFIX, PARALLELISM, 120000)) {
            // check if the program failed for some reason
            if (errorRef.get() != null) {
                Throwable error = errorRef.get();
                error.printStackTrace();
                fail("The program encountered a " + error.getClass().getSimpleName() + " : " + error.getMessage());
            } else {
                // no error occurred, simply a timeout
                fail("The tasks were not started within time (" + 120000 + "msecs)");
            }
        }
        // start the third TaskManager
        taskManagerProcess3 = new ProcessBuilder(command).start();
        new CommonTestUtils.PipeForwarder(taskManagerProcess3.getErrorStream(), processOutput3);
        // we wait for the third TaskManager to register
        // since some of the CI environments are very hostile, we need to give this a lot of time (2 minutes)
        waitUntilNumTaskManagersAreRegistered(jmActor, 3, 120000);
        // kill one of the previous TaskManagers, triggering a failure and recovery
        taskManagerProcess1.destroy();
        taskManagerProcess1 = null;
        // we create the marker file which signals the program functions tasks that they can complete
        touchFile(new File(coordinateTempDir, PROCEED_MARKER_FILE));
        // wait for at most 5 minutes for the program to complete
        programTrigger.join(300000);
        // check that the program really finished
        assertFalse("The program did not finish in time", programTrigger.isAlive());
        // check whether the program encountered an error
        if (errorRef.get() != null) {
            Throwable error = errorRef.get();
            error.printStackTrace();
            fail("The program encountered a " + error.getClass().getSimpleName() + " : " + error.getMessage());
        }
    // all seems well :-)
    } catch (Exception e) {
        e.printStackTrace();
        printProcessLog("TaskManager 1", processOutput1.toString());
        printProcessLog("TaskManager 2", processOutput2.toString());
        printProcessLog("TaskManager 3", processOutput3.toString());
        fail(e.getMessage());
    } catch (Error e) {
        e.printStackTrace();
        printProcessLog("TaskManager 1", processOutput1.toString());
        printProcessLog("TaskManager 2", processOutput2.toString());
        printProcessLog("TaskManager 3", processOutput3.toString());
        throw e;
    } finally {
        if (taskManagerProcess1 != null) {
            taskManagerProcess1.destroy();
        }
        if (taskManagerProcess2 != null) {
            taskManagerProcess2.destroy();
        }
        if (taskManagerProcess3 != null) {
            taskManagerProcess3.destroy();
        }
        if (jmActorSystem != null) {
            jmActorSystem.shutdown();
        }
        if (coordinateTempDir != null) {
            try {
                FileUtils.deleteDirectory(coordinateTempDir);
            } catch (Throwable t) {
            // we can ignore this
            }
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) MemoryArchivist(org.apache.flink.runtime.jobmanager.MemoryArchivist) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) AtomicReference(java.util.concurrent.atomic.AtomicReference) JobManager(org.apache.flink.runtime.jobmanager.JobManager) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) Some(scala.Some) StringWriter(java.io.StringWriter) Tuple2(scala.Tuple2) File(java.io.File) Test(org.junit.Test)

Example 7 with AtomicReference

use of java.util.concurrent.atomic.AtomicReference in project flink by apache.

the class SocketClientSinkTest method testSocketSink.

@Test
public void testSocketSink() throws Exception {
    final ServerSocket server = new ServerSocket(0);
    final int port = server.getLocalPort();
    final AtomicReference<Throwable> error = new AtomicReference<Throwable>();
    Thread sinkRunner = new Thread("Test sink runner") {

        @Override
        public void run() {
            try {
                SocketClientSink<String> simpleSink = new SocketClientSink<>(host, port, simpleSchema, 0);
                simpleSink.open(new Configuration());
                simpleSink.invoke(TEST_MESSAGE + '\n');
                simpleSink.close();
            } catch (Throwable t) {
                error.set(t);
            }
        }
    };
    sinkRunner.start();
    Socket sk = server.accept();
    BufferedReader rdr = new BufferedReader(new InputStreamReader(sk.getInputStream()));
    String value = rdr.readLine();
    sinkRunner.join();
    server.close();
    if (error.get() != null) {
        Throwable t = error.get();
        t.printStackTrace();
        fail("Error in spawned thread: " + t.getMessage());
    }
    assertEquals(TEST_MESSAGE, value);
}
Also used : Configuration(org.apache.flink.configuration.Configuration) InputStreamReader(java.io.InputStreamReader) ServerSocket(java.net.ServerSocket) AtomicReference(java.util.concurrent.atomic.AtomicReference) BufferedReader(java.io.BufferedReader) Socket(java.net.Socket) ServerSocket(java.net.ServerSocket) Test(org.junit.Test)

Example 8 with AtomicReference

use of java.util.concurrent.atomic.AtomicReference in project flink by apache.

the class SocketClientSinkTest method testSinkAutoFlush.

@Test
public void testSinkAutoFlush() throws Exception {
    final ServerSocket server = new ServerSocket(0);
    final int port = server.getLocalPort();
    final SocketClientSink<String> simpleSink = new SocketClientSink<>(host, port, simpleSchema, 0, true);
    simpleSink.open(new Configuration());
    final AtomicReference<Throwable> error = new AtomicReference<Throwable>();
    Thread sinkRunner = new Thread("Test sink runner") {

        @Override
        public void run() {
            try {
                // need two messages here: send a fin to cancel the client state:FIN_WAIT_2 while the server is CLOSE_WAIT
                simpleSink.invoke(TEST_MESSAGE + '\n');
            } catch (Throwable t) {
                error.set(t);
            }
        }
    };
    sinkRunner.start();
    Socket sk = server.accept();
    BufferedReader rdr = new BufferedReader(new InputStreamReader(sk.getInputStream()));
    String value = rdr.readLine();
    sinkRunner.join();
    simpleSink.close();
    server.close();
    if (error.get() != null) {
        Throwable t = error.get();
        t.printStackTrace();
        fail("Error in spawned thread: " + t.getMessage());
    }
    assertEquals(TEST_MESSAGE, value);
}
Also used : Configuration(org.apache.flink.configuration.Configuration) InputStreamReader(java.io.InputStreamReader) ServerSocket(java.net.ServerSocket) AtomicReference(java.util.concurrent.atomic.AtomicReference) BufferedReader(java.io.BufferedReader) Socket(java.net.Socket) ServerSocket(java.net.ServerSocket) Test(org.junit.Test)

Example 9 with AtomicReference

use of java.util.concurrent.atomic.AtomicReference in project flink by apache.

the class StreamingRuntimeContextTest method testFoldingStateInstantiation.

@Test
public void testFoldingStateInstantiation() throws Exception {
    final ExecutionConfig config = new ExecutionConfig();
    config.registerKryoType(Path.class);
    final AtomicReference<Object> descriptorCapture = new AtomicReference<>();
    StreamingRuntimeContext context = new StreamingRuntimeContext(createDescriptorCapturingMockOp(descriptorCapture, config), createMockEnvironment(), Collections.<String, Accumulator<?, ?>>emptyMap());
    @SuppressWarnings("unchecked") FoldFunction<String, TaskInfo> folder = (FoldFunction<String, TaskInfo>) mock(FoldFunction.class);
    FoldingStateDescriptor<String, TaskInfo> descr = new FoldingStateDescriptor<>("name", null, folder, TaskInfo.class);
    context.getFoldingState(descr);
    FoldingStateDescriptor<?, ?> descrIntercepted = (FoldingStateDescriptor<?, ?>) descriptorCapture.get();
    TypeSerializer<?> serializer = descrIntercepted.getSerializer();
    // check that the Path class is really registered, i.e., the execution config was applied
    assertTrue(serializer instanceof KryoSerializer);
    assertTrue(((KryoSerializer<?>) serializer).getKryo().getRegistration(Path.class).getId() > 0);
}
Also used : Path(org.apache.flink.core.fs.Path) FoldFunction(org.apache.flink.api.common.functions.FoldFunction) AtomicReference(java.util.concurrent.atomic.AtomicReference) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) FoldingStateDescriptor(org.apache.flink.api.common.state.FoldingStateDescriptor) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) TaskInfo(org.apache.flink.api.common.TaskInfo) Test(org.junit.Test)

Example 10 with AtomicReference

use of java.util.concurrent.atomic.AtomicReference in project flink by apache.

the class StreamingRuntimeContextTest method testValueStateInstantiation.

@Test
public void testValueStateInstantiation() throws Exception {
    final ExecutionConfig config = new ExecutionConfig();
    config.registerKryoType(Path.class);
    final AtomicReference<Object> descriptorCapture = new AtomicReference<>();
    StreamingRuntimeContext context = new StreamingRuntimeContext(createDescriptorCapturingMockOp(descriptorCapture, config), createMockEnvironment(), Collections.<String, Accumulator<?, ?>>emptyMap());
    ValueStateDescriptor<TaskInfo> descr = new ValueStateDescriptor<>("name", TaskInfo.class);
    context.getState(descr);
    StateDescriptor<?, ?> descrIntercepted = (StateDescriptor<?, ?>) descriptorCapture.get();
    TypeSerializer<?> serializer = descrIntercepted.getSerializer();
    // check that the Path class is really registered, i.e., the execution config was applied
    assertTrue(serializer instanceof KryoSerializer);
    assertTrue(((KryoSerializer<?>) serializer).getKryo().getRegistration(Path.class).getId() > 0);
}
Also used : Path(org.apache.flink.core.fs.Path) AtomicReference(java.util.concurrent.atomic.AtomicReference) ExecutionConfig(org.apache.flink.api.common.ExecutionConfig) KryoSerializer(org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) TaskInfo(org.apache.flink.api.common.TaskInfo) ReducingStateDescriptor(org.apache.flink.api.common.state.ReducingStateDescriptor) MapStateDescriptor(org.apache.flink.api.common.state.MapStateDescriptor) ListStateDescriptor(org.apache.flink.api.common.state.ListStateDescriptor) StateDescriptor(org.apache.flink.api.common.state.StateDescriptor) ValueStateDescriptor(org.apache.flink.api.common.state.ValueStateDescriptor) FoldingStateDescriptor(org.apache.flink.api.common.state.FoldingStateDescriptor) Test(org.junit.Test)

Aggregations

AtomicReference (java.util.concurrent.atomic.AtomicReference)1331 Test (org.junit.Test)668 CountDownLatch (java.util.concurrent.CountDownLatch)437 IOException (java.io.IOException)263 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)205 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)159 ArrayList (java.util.ArrayList)108 HashMap (java.util.HashMap)105 List (java.util.List)95 Map (java.util.Map)77 Test (org.testng.annotations.Test)76 File (java.io.File)64 ExecutionException (java.util.concurrent.ExecutionException)60 HashSet (java.util.HashSet)54 URI (java.net.URI)48 TimeoutException (java.util.concurrent.TimeoutException)48 HttpServletRequest (javax.servlet.http.HttpServletRequest)48 HttpServletResponse (javax.servlet.http.HttpServletResponse)46 MockResponse (okhttp3.mockwebserver.MockResponse)46 ByteBuffer (java.nio.ByteBuffer)44