use of java.util.concurrent.atomic.AtomicReference in project flink by apache.
the class AbstractTaskManagerProcessFailureRecoveryTest method testTaskManagerProcessFailure.
@Test
public void testTaskManagerProcessFailure() {
final StringWriter processOutput1 = new StringWriter();
final StringWriter processOutput2 = new StringWriter();
final StringWriter processOutput3 = new StringWriter();
ActorSystem jmActorSystem = null;
Process taskManagerProcess1 = null;
Process taskManagerProcess2 = null;
Process taskManagerProcess3 = null;
File coordinateTempDir = null;
try {
// check that we run this test only if the java command
// is available on this machine
String javaCommand = getJavaCommandPath();
if (javaCommand == null) {
System.out.println("---- Skipping Process Failure test : Could not find java executable ----");
return;
}
// create a logging file for the process
File tempLogFile = File.createTempFile(getClass().getSimpleName() + "-", "-log4j.properties");
tempLogFile.deleteOnExit();
CommonTestUtils.printLog4jDebugConfig(tempLogFile);
// coordination between the processes goes through a directory
coordinateTempDir = CommonTestUtils.createTempDirectory();
// find a free port to start the JobManager
final int jobManagerPort = NetUtils.getAvailablePort();
// start a JobManager
Tuple2<String, Object> localAddress = new Tuple2<String, Object>("localhost", jobManagerPort);
Configuration jmConfig = new Configuration();
jmConfig.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_INTERVAL, "1000 ms");
jmConfig.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_PAUSE, "6 s");
jmConfig.setInteger(ConfigConstants.AKKA_WATCH_THRESHOLD, 9);
jmConfig.setString(ConfigConstants.RESTART_STRATEGY_FIXED_DELAY_DELAY, "10 s");
jmConfig.setString(ConfigConstants.AKKA_ASK_TIMEOUT, "100 s");
jmActorSystem = AkkaUtils.createActorSystem(jmConfig, new Some<>(localAddress));
ActorRef jmActor = JobManager.startJobManagerActors(jmConfig, jmActorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1();
// the TaskManager java command
String[] command = new String[] { javaCommand, "-Dlog.level=DEBUG", "-Dlog4j.configuration=file:" + tempLogFile.getAbsolutePath(), "-Xms80m", "-Xmx80m", "-classpath", getCurrentClasspath(), TaskManagerProcessEntryPoint.class.getName(), String.valueOf(jobManagerPort) };
// start the first two TaskManager processes
taskManagerProcess1 = new ProcessBuilder(command).start();
new CommonTestUtils.PipeForwarder(taskManagerProcess1.getErrorStream(), processOutput1);
taskManagerProcess2 = new ProcessBuilder(command).start();
new CommonTestUtils.PipeForwarder(taskManagerProcess2.getErrorStream(), processOutput2);
// we wait for the JobManager to have the two TaskManagers available
// since some of the CI environments are very hostile, we need to give this a lot of time (2 minutes)
waitUntilNumTaskManagersAreRegistered(jmActor, 2, 120000);
// the program will set a marker file in each of its parallel tasks once they are ready, so that
// this coordinating code is aware of this.
// the program will very slowly consume elements until the marker file (later created by the
// test driver code) is present
final File coordinateDirClosure = coordinateTempDir;
final AtomicReference<Throwable> errorRef = new AtomicReference<>();
// we trigger program execution in a separate thread
Thread programTrigger = new Thread("Program Trigger") {
@Override
public void run() {
try {
testTaskManagerFailure(jobManagerPort, coordinateDirClosure);
} catch (Throwable t) {
t.printStackTrace();
errorRef.set(t);
}
}
};
//start the test program
programTrigger.start();
// max 20 seconds
if (!waitForMarkerFiles(coordinateTempDir, READY_MARKER_FILE_PREFIX, PARALLELISM, 120000)) {
// check if the program failed for some reason
if (errorRef.get() != null) {
Throwable error = errorRef.get();
error.printStackTrace();
fail("The program encountered a " + error.getClass().getSimpleName() + " : " + error.getMessage());
} else {
// no error occurred, simply a timeout
fail("The tasks were not started within time (" + 120000 + "msecs)");
}
}
// start the third TaskManager
taskManagerProcess3 = new ProcessBuilder(command).start();
new CommonTestUtils.PipeForwarder(taskManagerProcess3.getErrorStream(), processOutput3);
// we wait for the third TaskManager to register
// since some of the CI environments are very hostile, we need to give this a lot of time (2 minutes)
waitUntilNumTaskManagersAreRegistered(jmActor, 3, 120000);
// kill one of the previous TaskManagers, triggering a failure and recovery
taskManagerProcess1.destroy();
taskManagerProcess1 = null;
// we create the marker file which signals the program functions tasks that they can complete
touchFile(new File(coordinateTempDir, PROCEED_MARKER_FILE));
// wait for at most 5 minutes for the program to complete
programTrigger.join(300000);
// check that the program really finished
assertFalse("The program did not finish in time", programTrigger.isAlive());
// check whether the program encountered an error
if (errorRef.get() != null) {
Throwable error = errorRef.get();
error.printStackTrace();
fail("The program encountered a " + error.getClass().getSimpleName() + " : " + error.getMessage());
}
// all seems well :-)
} catch (Exception e) {
e.printStackTrace();
printProcessLog("TaskManager 1", processOutput1.toString());
printProcessLog("TaskManager 2", processOutput2.toString());
printProcessLog("TaskManager 3", processOutput3.toString());
fail(e.getMessage());
} catch (Error e) {
e.printStackTrace();
printProcessLog("TaskManager 1", processOutput1.toString());
printProcessLog("TaskManager 2", processOutput2.toString());
printProcessLog("TaskManager 3", processOutput3.toString());
throw e;
} finally {
if (taskManagerProcess1 != null) {
taskManagerProcess1.destroy();
}
if (taskManagerProcess2 != null) {
taskManagerProcess2.destroy();
}
if (taskManagerProcess3 != null) {
taskManagerProcess3.destroy();
}
if (jmActorSystem != null) {
jmActorSystem.shutdown();
}
if (coordinateTempDir != null) {
try {
FileUtils.deleteDirectory(coordinateTempDir);
} catch (Throwable t) {
// we can ignore this
}
}
}
}
use of java.util.concurrent.atomic.AtomicReference in project flink by apache.
the class SocketClientSinkTest method testSocketSink.
@Test
public void testSocketSink() throws Exception {
final ServerSocket server = new ServerSocket(0);
final int port = server.getLocalPort();
final AtomicReference<Throwable> error = new AtomicReference<Throwable>();
Thread sinkRunner = new Thread("Test sink runner") {
@Override
public void run() {
try {
SocketClientSink<String> simpleSink = new SocketClientSink<>(host, port, simpleSchema, 0);
simpleSink.open(new Configuration());
simpleSink.invoke(TEST_MESSAGE + '\n');
simpleSink.close();
} catch (Throwable t) {
error.set(t);
}
}
};
sinkRunner.start();
Socket sk = server.accept();
BufferedReader rdr = new BufferedReader(new InputStreamReader(sk.getInputStream()));
String value = rdr.readLine();
sinkRunner.join();
server.close();
if (error.get() != null) {
Throwable t = error.get();
t.printStackTrace();
fail("Error in spawned thread: " + t.getMessage());
}
assertEquals(TEST_MESSAGE, value);
}
use of java.util.concurrent.atomic.AtomicReference in project flink by apache.
the class SocketClientSinkTest method testSinkAutoFlush.
@Test
public void testSinkAutoFlush() throws Exception {
final ServerSocket server = new ServerSocket(0);
final int port = server.getLocalPort();
final SocketClientSink<String> simpleSink = new SocketClientSink<>(host, port, simpleSchema, 0, true);
simpleSink.open(new Configuration());
final AtomicReference<Throwable> error = new AtomicReference<Throwable>();
Thread sinkRunner = new Thread("Test sink runner") {
@Override
public void run() {
try {
// need two messages here: send a fin to cancel the client state:FIN_WAIT_2 while the server is CLOSE_WAIT
simpleSink.invoke(TEST_MESSAGE + '\n');
} catch (Throwable t) {
error.set(t);
}
}
};
sinkRunner.start();
Socket sk = server.accept();
BufferedReader rdr = new BufferedReader(new InputStreamReader(sk.getInputStream()));
String value = rdr.readLine();
sinkRunner.join();
simpleSink.close();
server.close();
if (error.get() != null) {
Throwable t = error.get();
t.printStackTrace();
fail("Error in spawned thread: " + t.getMessage());
}
assertEquals(TEST_MESSAGE, value);
}
use of java.util.concurrent.atomic.AtomicReference in project flink by apache.
the class StreamingRuntimeContextTest method testFoldingStateInstantiation.
@Test
public void testFoldingStateInstantiation() throws Exception {
final ExecutionConfig config = new ExecutionConfig();
config.registerKryoType(Path.class);
final AtomicReference<Object> descriptorCapture = new AtomicReference<>();
StreamingRuntimeContext context = new StreamingRuntimeContext(createDescriptorCapturingMockOp(descriptorCapture, config), createMockEnvironment(), Collections.<String, Accumulator<?, ?>>emptyMap());
@SuppressWarnings("unchecked") FoldFunction<String, TaskInfo> folder = (FoldFunction<String, TaskInfo>) mock(FoldFunction.class);
FoldingStateDescriptor<String, TaskInfo> descr = new FoldingStateDescriptor<>("name", null, folder, TaskInfo.class);
context.getFoldingState(descr);
FoldingStateDescriptor<?, ?> descrIntercepted = (FoldingStateDescriptor<?, ?>) descriptorCapture.get();
TypeSerializer<?> serializer = descrIntercepted.getSerializer();
// check that the Path class is really registered, i.e., the execution config was applied
assertTrue(serializer instanceof KryoSerializer);
assertTrue(((KryoSerializer<?>) serializer).getKryo().getRegistration(Path.class).getId() > 0);
}
use of java.util.concurrent.atomic.AtomicReference in project flink by apache.
the class StreamingRuntimeContextTest method testValueStateInstantiation.
@Test
public void testValueStateInstantiation() throws Exception {
final ExecutionConfig config = new ExecutionConfig();
config.registerKryoType(Path.class);
final AtomicReference<Object> descriptorCapture = new AtomicReference<>();
StreamingRuntimeContext context = new StreamingRuntimeContext(createDescriptorCapturingMockOp(descriptorCapture, config), createMockEnvironment(), Collections.<String, Accumulator<?, ?>>emptyMap());
ValueStateDescriptor<TaskInfo> descr = new ValueStateDescriptor<>("name", TaskInfo.class);
context.getState(descr);
StateDescriptor<?, ?> descrIntercepted = (StateDescriptor<?, ?>) descriptorCapture.get();
TypeSerializer<?> serializer = descrIntercepted.getSerializer();
// check that the Path class is really registered, i.e., the execution config was applied
assertTrue(serializer instanceof KryoSerializer);
assertTrue(((KryoSerializer<?>) serializer).getKryo().getRegistration(Path.class).getId() > 0);
}
Aggregations