use of scala.Tuple2 in project flink by apache.
the class AbstractTaskManagerProcessFailureRecoveryTest method testTaskManagerProcessFailure.
@Test
public void testTaskManagerProcessFailure() {
final StringWriter processOutput1 = new StringWriter();
final StringWriter processOutput2 = new StringWriter();
final StringWriter processOutput3 = new StringWriter();
ActorSystem jmActorSystem = null;
Process taskManagerProcess1 = null;
Process taskManagerProcess2 = null;
Process taskManagerProcess3 = null;
File coordinateTempDir = null;
try {
// check that we run this test only if the java command
// is available on this machine
String javaCommand = getJavaCommandPath();
if (javaCommand == null) {
System.out.println("---- Skipping Process Failure test : Could not find java executable ----");
return;
}
// create a logging file for the process
File tempLogFile = File.createTempFile(getClass().getSimpleName() + "-", "-log4j.properties");
tempLogFile.deleteOnExit();
CommonTestUtils.printLog4jDebugConfig(tempLogFile);
// coordination between the processes goes through a directory
coordinateTempDir = CommonTestUtils.createTempDirectory();
// find a free port to start the JobManager
final int jobManagerPort = NetUtils.getAvailablePort();
// start a JobManager
Tuple2<String, Object> localAddress = new Tuple2<String, Object>("localhost", jobManagerPort);
Configuration jmConfig = new Configuration();
jmConfig.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_INTERVAL, "1000 ms");
jmConfig.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_PAUSE, "6 s");
jmConfig.setInteger(ConfigConstants.AKKA_WATCH_THRESHOLD, 9);
jmConfig.setString(ConfigConstants.RESTART_STRATEGY_FIXED_DELAY_DELAY, "10 s");
jmConfig.setString(ConfigConstants.AKKA_ASK_TIMEOUT, "100 s");
jmActorSystem = AkkaUtils.createActorSystem(jmConfig, new Some<>(localAddress));
ActorRef jmActor = JobManager.startJobManagerActors(jmConfig, jmActorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1();
// the TaskManager java command
String[] command = new String[] { javaCommand, "-Dlog.level=DEBUG", "-Dlog4j.configuration=file:" + tempLogFile.getAbsolutePath(), "-Xms80m", "-Xmx80m", "-classpath", getCurrentClasspath(), TaskManagerProcessEntryPoint.class.getName(), String.valueOf(jobManagerPort) };
// start the first two TaskManager processes
taskManagerProcess1 = new ProcessBuilder(command).start();
new CommonTestUtils.PipeForwarder(taskManagerProcess1.getErrorStream(), processOutput1);
taskManagerProcess2 = new ProcessBuilder(command).start();
new CommonTestUtils.PipeForwarder(taskManagerProcess2.getErrorStream(), processOutput2);
// we wait for the JobManager to have the two TaskManagers available
// since some of the CI environments are very hostile, we need to give this a lot of time (2 minutes)
waitUntilNumTaskManagersAreRegistered(jmActor, 2, 120000);
// the program will set a marker file in each of its parallel tasks once they are ready, so that
// this coordinating code is aware of this.
// the program will very slowly consume elements until the marker file (later created by the
// test driver code) is present
final File coordinateDirClosure = coordinateTempDir;
final AtomicReference<Throwable> errorRef = new AtomicReference<>();
// we trigger program execution in a separate thread
Thread programTrigger = new Thread("Program Trigger") {
@Override
public void run() {
try {
testTaskManagerFailure(jobManagerPort, coordinateDirClosure);
} catch (Throwable t) {
t.printStackTrace();
errorRef.set(t);
}
}
};
//start the test program
programTrigger.start();
// max 20 seconds
if (!waitForMarkerFiles(coordinateTempDir, READY_MARKER_FILE_PREFIX, PARALLELISM, 120000)) {
// check if the program failed for some reason
if (errorRef.get() != null) {
Throwable error = errorRef.get();
error.printStackTrace();
fail("The program encountered a " + error.getClass().getSimpleName() + " : " + error.getMessage());
} else {
// no error occurred, simply a timeout
fail("The tasks were not started within time (" + 120000 + "msecs)");
}
}
// start the third TaskManager
taskManagerProcess3 = new ProcessBuilder(command).start();
new CommonTestUtils.PipeForwarder(taskManagerProcess3.getErrorStream(), processOutput3);
// we wait for the third TaskManager to register
// since some of the CI environments are very hostile, we need to give this a lot of time (2 minutes)
waitUntilNumTaskManagersAreRegistered(jmActor, 3, 120000);
// kill one of the previous TaskManagers, triggering a failure and recovery
taskManagerProcess1.destroy();
taskManagerProcess1 = null;
// we create the marker file which signals the program functions tasks that they can complete
touchFile(new File(coordinateTempDir, PROCEED_MARKER_FILE));
// wait for at most 5 minutes for the program to complete
programTrigger.join(300000);
// check that the program really finished
assertFalse("The program did not finish in time", programTrigger.isAlive());
// check whether the program encountered an error
if (errorRef.get() != null) {
Throwable error = errorRef.get();
error.printStackTrace();
fail("The program encountered a " + error.getClass().getSimpleName() + " : " + error.getMessage());
}
// all seems well :-)
} catch (Exception e) {
e.printStackTrace();
printProcessLog("TaskManager 1", processOutput1.toString());
printProcessLog("TaskManager 2", processOutput2.toString());
printProcessLog("TaskManager 3", processOutput3.toString());
fail(e.getMessage());
} catch (Error e) {
e.printStackTrace();
printProcessLog("TaskManager 1", processOutput1.toString());
printProcessLog("TaskManager 2", processOutput2.toString());
printProcessLog("TaskManager 3", processOutput3.toString());
throw e;
} finally {
if (taskManagerProcess1 != null) {
taskManagerProcess1.destroy();
}
if (taskManagerProcess2 != null) {
taskManagerProcess2.destroy();
}
if (taskManagerProcess3 != null) {
taskManagerProcess3.destroy();
}
if (jmActorSystem != null) {
jmActorSystem.shutdown();
}
if (coordinateTempDir != null) {
try {
FileUtils.deleteDirectory(coordinateTempDir);
} catch (Throwable t) {
// we can ignore this
}
}
}
}
use of scala.Tuple2 in project flink by apache.
the class TaskManagerProcessReapingTestBase method testReapProcessOnFailure.
@Test
public void testReapProcessOnFailure() {
Process taskManagerProcess = null;
ActorSystem jmActorSystem = null;
final StringWriter processOutput = new StringWriter();
try {
String javaCommand = getJavaCommandPath();
// is available on this machine
if (javaCommand == null) {
System.out.println("---- Skipping TaskManagerProcessReapingTest : Could not find java executable ----");
return;
}
// create a logging file for the process
File tempLogFile = File.createTempFile("testlogconfig", "properties");
tempLogFile.deleteOnExit();
CommonTestUtils.printLog4jDebugConfig(tempLogFile);
final int jobManagerPort = NetUtils.getAvailablePort();
// start a JobManager
Tuple2<String, Object> localAddress = new Tuple2<String, Object>("localhost", jobManagerPort);
jmActorSystem = AkkaUtils.createActorSystem(new Configuration(), new Some<Tuple2<String, Object>>(localAddress));
ActorRef jmActor = JobManager.startJobManagerActors(new Configuration(), jmActorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1;
// start a ResourceManager
StandaloneLeaderRetrievalService standaloneLeaderRetrievalService = new StandaloneLeaderRetrievalService(AkkaUtils.getAkkaURL(jmActorSystem, jmActor));
FlinkResourceManager.startResourceManagerActors(new Configuration(), jmActorSystem, standaloneLeaderRetrievalService, StandaloneResourceManager.class);
final int taskManagerPort = NetUtils.getAvailablePort();
// start the task manager process
String[] command = new String[] { javaCommand, "-Dlog.level=DEBUG", "-Dlog4j.configuration=file:" + tempLogFile.getAbsolutePath(), "-Xms256m", "-Xmx256m", "-classpath", getCurrentClasspath(), TaskManagerTestEntryPoint.class.getName(), String.valueOf(jobManagerPort), String.valueOf(taskManagerPort) };
ProcessBuilder bld = new ProcessBuilder(command);
taskManagerProcess = bld.start();
new PipeForwarder(taskManagerProcess.getErrorStream(), processOutput);
// grab the reference to the TaskManager. try multiple times, until the process
// is started and the TaskManager is up
String taskManagerActorName = String.format("akka.tcp://flink@%s/user/%s", "localhost:" + taskManagerPort, TaskManager.TASK_MANAGER_NAME());
ActorRef taskManagerRef = null;
Throwable lastError = null;
for (int i = 0; i < 40; i++) {
try {
taskManagerRef = TaskManager.getTaskManagerRemoteReference(taskManagerActorName, jmActorSystem, new FiniteDuration(25, TimeUnit.SECONDS));
break;
} catch (Throwable t) {
// TaskManager probably not ready yet
lastError = t;
}
Thread.sleep(500);
}
assertTrue("TaskManager process died", isProcessAlive(taskManagerProcess));
if (taskManagerRef == null) {
if (lastError != null) {
lastError.printStackTrace();
}
fail("TaskManager process did not launch the TaskManager properly. Failed to look up " + taskManagerActorName);
}
// kill the TaskManager actor
onTaskManagerProcessRunning(taskManagerRef);
// wait for max 5 seconds for the process to terminate
{
long now = System.currentTimeMillis();
long deadline = now + 10000;
while (now < deadline && isProcessAlive(taskManagerProcess)) {
Thread.sleep(100);
now = System.currentTimeMillis();
}
}
assertFalse("TaskManager process did not terminate upon actor death", isProcessAlive(taskManagerProcess));
int returnCode = taskManagerProcess.exitValue();
assertEquals("TaskManager died, but not because of the process reaper", TaskManager.RUNTIME_FAILURE_RETURN_CODE(), returnCode);
onTaskManagerProcessTerminated(processOutput.toString());
} catch (Exception e) {
e.printStackTrace();
printProcessLog(processOutput.toString());
fail(e.getMessage());
} catch (Error e) {
e.printStackTrace();
printProcessLog(processOutput.toString());
throw e;
} finally {
if (taskManagerProcess != null) {
taskManagerProcess.destroy();
}
if (jmActorSystem != null) {
jmActorSystem.shutdown();
}
}
}
use of scala.Tuple2 in project kafka by apache.
the class InternalTopicIntegrationTest method getTopicConfigProperties.
private Properties getTopicConfigProperties(final String changelog) {
// Note: You must initialize the ZkClient with ZKStringSerializer. If you don't, then
// createTopics() will only seem to work (it will return without error). The topic will exist in
// only ZooKeeper and will be returned when listing topics, but Kafka itself does not create the
// topic.
final ZkClient zkClient = new ZkClient(CLUSTER.zKConnectString(), DEFAULT_ZK_SESSION_TIMEOUT_MS, DEFAULT_ZK_CONNECTION_TIMEOUT_MS, ZKStringSerializer$.MODULE$);
try {
final boolean isSecure = false;
final ZkUtils zkUtils = new ZkUtils(zkClient, new ZkConnection(CLUSTER.zKConnectString()), isSecure);
final Map<String, Properties> topicConfigs = AdminUtils.fetchAllTopicConfigs(zkUtils);
final Iterator it = topicConfigs.iterator();
while (it.hasNext()) {
final Tuple2<String, Properties> topicConfig = (Tuple2<String, Properties>) it.next();
final String topic = topicConfig._1;
final Properties prop = topicConfig._2;
if (topic.equals(changelog)) {
return prop;
}
}
return new Properties();
} finally {
zkClient.close();
}
}
use of scala.Tuple2 in project zeppelin by apache.
the class ZeppelinContext method tuplesToParamOptions.
private ParamOption[] tuplesToParamOptions(scala.collection.Iterable<Tuple2<Object, String>> options) {
int n = options.size();
ParamOption[] paramOptions = new ParamOption[n];
Iterator<Tuple2<Object, String>> it = asJavaIterable(options).iterator();
int i = 0;
while (it.hasNext()) {
Tuple2<Object, String> valueAndDisplayValue = it.next();
paramOptions[i++] = new ParamOption(valueAndDisplayValue._1(), valueAndDisplayValue._2());
}
return paramOptions;
}
use of scala.Tuple2 in project learning-spark by databricks.
the class LogAnalyzerTotal method processAccessLogs.
public void processAccessLogs(String outDir, JavaDStream<ApacheAccessLog> accessLogsDStream) {
// Calculate statistics based on the content size, and update the static variables to track this.
accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() {
public Void call(JavaRDD<ApacheAccessLog> accessLogs) {
Tuple4<Long, Long, Long, Long> stats = Functions.contentSizeStats(accessLogs);
if (stats != null) {
runningCount.getAndAdd(stats._1());
runningSum.getAndAdd(stats._2());
runningMin.set(Math.min(runningMin.get(), stats._3()));
runningMax.set(Math.max(runningMax.get(), stats._4()));
}
return null;
}
});
// A DStream of Resonse Code Counts;
JavaPairDStream<Integer, Long> responseCodeCountDStream = accessLogsDStream.transformToPair(new Function<JavaRDD<ApacheAccessLog>, JavaPairRDD<Integer, Long>>() {
public JavaPairRDD<Integer, Long> call(JavaRDD<ApacheAccessLog> rdd) {
return Functions.responseCodeCount(rdd);
}
}).updateStateByKey(new Functions.ComputeRunningSum());
responseCodeCountDStream.foreachRDD(new Function<JavaPairRDD<Integer, Long>, Void>() {
public Void call(JavaPairRDD<Integer, Long> rdd) {
currentResponseCodeCounts = rdd.take(100);
return null;
}
});
// A DStream of ipAddressCounts.
JavaPairDStream<String, Long> ipRawDStream = accessLogsDStream.transformToPair(new Function<JavaRDD<ApacheAccessLog>, JavaPairRDD<String, Long>>() {
public JavaPairRDD<String, Long> call(JavaRDD<ApacheAccessLog> rdd) {
return Functions.ipAddressCount(rdd);
}
});
JavaPairDStream<String, Long> ipCumDStream = ipRawDStream.updateStateByKey(new Functions.ComputeRunningSum());
// A DStream of ipAddressCounts without transform
JavaPairDStream<String, Long> ipDStream = accessLogsDStream.mapToPair(new Functions.IpTuple());
JavaPairDStream<String, Long> ipCountsDStream = ipDStream.reduceByKey(new Functions.LongSumReducer());
// and joining it with the transfer amount
JavaPairDStream<String, Long> ipBytesDStream = accessLogsDStream.mapToPair(new Functions.IpContentTuple());
JavaPairDStream<String, Long> ipBytesSumDStream = ipBytesDStream.reduceByKey(new Functions.LongSumReducer());
JavaPairDStream<String, Tuple2<Long, Long>> ipBytesRequestCountDStream = ipBytesSumDStream.join(ipCountsDStream);
// Save our dstream of ip address request counts
JavaPairDStream<Text, LongWritable> writableDStream = ipDStream.mapToPair(new PairFunction<Tuple2<String, Long>, Text, LongWritable>() {
public Tuple2<Text, LongWritable> call(Tuple2<String, Long> e) {
return new Tuple2(new Text(e._1()), new LongWritable(e._2()));
}
});
class OutFormat extends SequenceFileOutputFormat<Text, LongWritable> {
}
;
writableDStream.saveAsHadoopFiles(outDir, "pandas", Text.class, LongWritable.class, OutFormat.class);
// All ips more than 10
JavaDStream<String> ipAddressDStream = ipCumDStream.transform(new Function<JavaPairRDD<String, Long>, JavaRDD<String>>() {
public JavaRDD<String> call(JavaPairRDD<String, Long> rdd) {
return Functions.filterIPAddress(rdd);
}
});
ipAddressDStream.foreachRDD(new Function<JavaRDD<String>, Void>() {
public Void call(JavaRDD<String> rdd) {
List<String> currentIPAddresses = rdd.take(100);
return null;
}
});
// A DStream of endpoint to count.
JavaPairDStream<String, Long> endpointCountsDStream = accessLogsDStream.transformToPair(new Function<JavaRDD<ApacheAccessLog>, JavaPairRDD<String, Long>>() {
public JavaPairRDD<String, Long> call(JavaRDD<ApacheAccessLog> rdd) {
return Functions.endpointCount(rdd);
}
}).updateStateByKey(new Functions.ComputeRunningSum());
Object ordering = Ordering.natural();
final Comparator<Long> cmp = (Comparator<Long>) ordering;
endpointCountsDStream.foreachRDD(new Function<JavaPairRDD<String, Long>, Void>() {
public Void call(JavaPairRDD<String, Long> rdd) {
currentTopEndpoints = rdd.takeOrdered(10, new Functions.ValueComparator<String, Long>(cmp));
return null;
}
});
}
Aggregations