Search in sources :

Example 11 with Tuple2

use of scala.Tuple2 in project flink by apache.

the class AbstractTaskManagerProcessFailureRecoveryTest method testTaskManagerProcessFailure.

@Test
public void testTaskManagerProcessFailure() {
    final StringWriter processOutput1 = new StringWriter();
    final StringWriter processOutput2 = new StringWriter();
    final StringWriter processOutput3 = new StringWriter();
    ActorSystem jmActorSystem = null;
    Process taskManagerProcess1 = null;
    Process taskManagerProcess2 = null;
    Process taskManagerProcess3 = null;
    File coordinateTempDir = null;
    try {
        // check that we run this test only if the java command
        // is available on this machine
        String javaCommand = getJavaCommandPath();
        if (javaCommand == null) {
            System.out.println("---- Skipping Process Failure test : Could not find java executable ----");
            return;
        }
        // create a logging file for the process
        File tempLogFile = File.createTempFile(getClass().getSimpleName() + "-", "-log4j.properties");
        tempLogFile.deleteOnExit();
        CommonTestUtils.printLog4jDebugConfig(tempLogFile);
        // coordination between the processes goes through a directory
        coordinateTempDir = CommonTestUtils.createTempDirectory();
        // find a free port to start the JobManager
        final int jobManagerPort = NetUtils.getAvailablePort();
        // start a JobManager
        Tuple2<String, Object> localAddress = new Tuple2<String, Object>("localhost", jobManagerPort);
        Configuration jmConfig = new Configuration();
        jmConfig.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_INTERVAL, "1000 ms");
        jmConfig.setString(ConfigConstants.AKKA_WATCH_HEARTBEAT_PAUSE, "6 s");
        jmConfig.setInteger(ConfigConstants.AKKA_WATCH_THRESHOLD, 9);
        jmConfig.setString(ConfigConstants.RESTART_STRATEGY_FIXED_DELAY_DELAY, "10 s");
        jmConfig.setString(ConfigConstants.AKKA_ASK_TIMEOUT, "100 s");
        jmActorSystem = AkkaUtils.createActorSystem(jmConfig, new Some<>(localAddress));
        ActorRef jmActor = JobManager.startJobManagerActors(jmConfig, jmActorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1();
        // the TaskManager java command
        String[] command = new String[] { javaCommand, "-Dlog.level=DEBUG", "-Dlog4j.configuration=file:" + tempLogFile.getAbsolutePath(), "-Xms80m", "-Xmx80m", "-classpath", getCurrentClasspath(), TaskManagerProcessEntryPoint.class.getName(), String.valueOf(jobManagerPort) };
        // start the first two TaskManager processes
        taskManagerProcess1 = new ProcessBuilder(command).start();
        new CommonTestUtils.PipeForwarder(taskManagerProcess1.getErrorStream(), processOutput1);
        taskManagerProcess2 = new ProcessBuilder(command).start();
        new CommonTestUtils.PipeForwarder(taskManagerProcess2.getErrorStream(), processOutput2);
        // we wait for the JobManager to have the two TaskManagers available
        // since some of the CI environments are very hostile, we need to give this a lot of time (2 minutes)
        waitUntilNumTaskManagersAreRegistered(jmActor, 2, 120000);
        // the program will set a marker file in each of its parallel tasks once they are ready, so that
        // this coordinating code is aware of this.
        // the program will very slowly consume elements until the marker file (later created by the
        // test driver code) is present
        final File coordinateDirClosure = coordinateTempDir;
        final AtomicReference<Throwable> errorRef = new AtomicReference<>();
        // we trigger program execution in a separate thread
        Thread programTrigger = new Thread("Program Trigger") {

            @Override
            public void run() {
                try {
                    testTaskManagerFailure(jobManagerPort, coordinateDirClosure);
                } catch (Throwable t) {
                    t.printStackTrace();
                    errorRef.set(t);
                }
            }
        };
        //start the test program
        programTrigger.start();
        // max 20 seconds
        if (!waitForMarkerFiles(coordinateTempDir, READY_MARKER_FILE_PREFIX, PARALLELISM, 120000)) {
            // check if the program failed for some reason
            if (errorRef.get() != null) {
                Throwable error = errorRef.get();
                error.printStackTrace();
                fail("The program encountered a " + error.getClass().getSimpleName() + " : " + error.getMessage());
            } else {
                // no error occurred, simply a timeout
                fail("The tasks were not started within time (" + 120000 + "msecs)");
            }
        }
        // start the third TaskManager
        taskManagerProcess3 = new ProcessBuilder(command).start();
        new CommonTestUtils.PipeForwarder(taskManagerProcess3.getErrorStream(), processOutput3);
        // we wait for the third TaskManager to register
        // since some of the CI environments are very hostile, we need to give this a lot of time (2 minutes)
        waitUntilNumTaskManagersAreRegistered(jmActor, 3, 120000);
        // kill one of the previous TaskManagers, triggering a failure and recovery
        taskManagerProcess1.destroy();
        taskManagerProcess1 = null;
        // we create the marker file which signals the program functions tasks that they can complete
        touchFile(new File(coordinateTempDir, PROCEED_MARKER_FILE));
        // wait for at most 5 minutes for the program to complete
        programTrigger.join(300000);
        // check that the program really finished
        assertFalse("The program did not finish in time", programTrigger.isAlive());
        // check whether the program encountered an error
        if (errorRef.get() != null) {
            Throwable error = errorRef.get();
            error.printStackTrace();
            fail("The program encountered a " + error.getClass().getSimpleName() + " : " + error.getMessage());
        }
    // all seems well :-)
    } catch (Exception e) {
        e.printStackTrace();
        printProcessLog("TaskManager 1", processOutput1.toString());
        printProcessLog("TaskManager 2", processOutput2.toString());
        printProcessLog("TaskManager 3", processOutput3.toString());
        fail(e.getMessage());
    } catch (Error e) {
        e.printStackTrace();
        printProcessLog("TaskManager 1", processOutput1.toString());
        printProcessLog("TaskManager 2", processOutput2.toString());
        printProcessLog("TaskManager 3", processOutput3.toString());
        throw e;
    } finally {
        if (taskManagerProcess1 != null) {
            taskManagerProcess1.destroy();
        }
        if (taskManagerProcess2 != null) {
            taskManagerProcess2.destroy();
        }
        if (taskManagerProcess3 != null) {
            taskManagerProcess3.destroy();
        }
        if (jmActorSystem != null) {
            jmActorSystem.shutdown();
        }
        if (coordinateTempDir != null) {
            try {
                FileUtils.deleteDirectory(coordinateTempDir);
            } catch (Throwable t) {
            // we can ignore this
            }
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) MemoryArchivist(org.apache.flink.runtime.jobmanager.MemoryArchivist) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) AtomicReference(java.util.concurrent.atomic.AtomicReference) JobManager(org.apache.flink.runtime.jobmanager.JobManager) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) Some(scala.Some) StringWriter(java.io.StringWriter) Tuple2(scala.Tuple2) File(java.io.File) Test(org.junit.Test)

Example 12 with Tuple2

use of scala.Tuple2 in project flink by apache.

the class TaskManagerProcessReapingTestBase method testReapProcessOnFailure.

@Test
public void testReapProcessOnFailure() {
    Process taskManagerProcess = null;
    ActorSystem jmActorSystem = null;
    final StringWriter processOutput = new StringWriter();
    try {
        String javaCommand = getJavaCommandPath();
        // is available on this machine
        if (javaCommand == null) {
            System.out.println("---- Skipping TaskManagerProcessReapingTest : Could not find java executable ----");
            return;
        }
        // create a logging file for the process
        File tempLogFile = File.createTempFile("testlogconfig", "properties");
        tempLogFile.deleteOnExit();
        CommonTestUtils.printLog4jDebugConfig(tempLogFile);
        final int jobManagerPort = NetUtils.getAvailablePort();
        // start a JobManager
        Tuple2<String, Object> localAddress = new Tuple2<String, Object>("localhost", jobManagerPort);
        jmActorSystem = AkkaUtils.createActorSystem(new Configuration(), new Some<Tuple2<String, Object>>(localAddress));
        ActorRef jmActor = JobManager.startJobManagerActors(new Configuration(), jmActorSystem, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1;
        // start a ResourceManager
        StandaloneLeaderRetrievalService standaloneLeaderRetrievalService = new StandaloneLeaderRetrievalService(AkkaUtils.getAkkaURL(jmActorSystem, jmActor));
        FlinkResourceManager.startResourceManagerActors(new Configuration(), jmActorSystem, standaloneLeaderRetrievalService, StandaloneResourceManager.class);
        final int taskManagerPort = NetUtils.getAvailablePort();
        // start the task manager process
        String[] command = new String[] { javaCommand, "-Dlog.level=DEBUG", "-Dlog4j.configuration=file:" + tempLogFile.getAbsolutePath(), "-Xms256m", "-Xmx256m", "-classpath", getCurrentClasspath(), TaskManagerTestEntryPoint.class.getName(), String.valueOf(jobManagerPort), String.valueOf(taskManagerPort) };
        ProcessBuilder bld = new ProcessBuilder(command);
        taskManagerProcess = bld.start();
        new PipeForwarder(taskManagerProcess.getErrorStream(), processOutput);
        // grab the reference to the TaskManager. try multiple times, until the process
        // is started and the TaskManager is up
        String taskManagerActorName = String.format("akka.tcp://flink@%s/user/%s", "localhost:" + taskManagerPort, TaskManager.TASK_MANAGER_NAME());
        ActorRef taskManagerRef = null;
        Throwable lastError = null;
        for (int i = 0; i < 40; i++) {
            try {
                taskManagerRef = TaskManager.getTaskManagerRemoteReference(taskManagerActorName, jmActorSystem, new FiniteDuration(25, TimeUnit.SECONDS));
                break;
            } catch (Throwable t) {
                // TaskManager probably not ready yet
                lastError = t;
            }
            Thread.sleep(500);
        }
        assertTrue("TaskManager process died", isProcessAlive(taskManagerProcess));
        if (taskManagerRef == null) {
            if (lastError != null) {
                lastError.printStackTrace();
            }
            fail("TaskManager process did not launch the TaskManager properly. Failed to look up " + taskManagerActorName);
        }
        // kill the TaskManager actor
        onTaskManagerProcessRunning(taskManagerRef);
        // wait for max 5 seconds for the process to terminate
        {
            long now = System.currentTimeMillis();
            long deadline = now + 10000;
            while (now < deadline && isProcessAlive(taskManagerProcess)) {
                Thread.sleep(100);
                now = System.currentTimeMillis();
            }
        }
        assertFalse("TaskManager process did not terminate upon actor death", isProcessAlive(taskManagerProcess));
        int returnCode = taskManagerProcess.exitValue();
        assertEquals("TaskManager died, but not because of the process reaper", TaskManager.RUNTIME_FAILURE_RETURN_CODE(), returnCode);
        onTaskManagerProcessTerminated(processOutput.toString());
    } catch (Exception e) {
        e.printStackTrace();
        printProcessLog(processOutput.toString());
        fail(e.getMessage());
    } catch (Error e) {
        e.printStackTrace();
        printProcessLog(processOutput.toString());
        throw e;
    } finally {
        if (taskManagerProcess != null) {
            taskManagerProcess.destroy();
        }
        if (jmActorSystem != null) {
            jmActorSystem.shutdown();
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) MemoryArchivist(org.apache.flink.runtime.jobmanager.MemoryArchivist) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) FiniteDuration(scala.concurrent.duration.FiniteDuration) JobManager(org.apache.flink.runtime.jobmanager.JobManager) IOException(java.io.IOException) Some(scala.Some) StringWriter(java.io.StringWriter) Tuple2(scala.Tuple2) StandaloneLeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.StandaloneLeaderRetrievalService) File(java.io.File) Test(org.junit.Test)

Example 13 with Tuple2

use of scala.Tuple2 in project kafka by apache.

the class InternalTopicIntegrationTest method getTopicConfigProperties.

private Properties getTopicConfigProperties(final String changelog) {
    // Note: You must initialize the ZkClient with ZKStringSerializer.  If you don't, then
    // createTopics() will only seem to work (it will return without error).  The topic will exist in
    // only ZooKeeper and will be returned when listing topics, but Kafka itself does not create the
    // topic.
    final ZkClient zkClient = new ZkClient(CLUSTER.zKConnectString(), DEFAULT_ZK_SESSION_TIMEOUT_MS, DEFAULT_ZK_CONNECTION_TIMEOUT_MS, ZKStringSerializer$.MODULE$);
    try {
        final boolean isSecure = false;
        final ZkUtils zkUtils = new ZkUtils(zkClient, new ZkConnection(CLUSTER.zKConnectString()), isSecure);
        final Map<String, Properties> topicConfigs = AdminUtils.fetchAllTopicConfigs(zkUtils);
        final Iterator it = topicConfigs.iterator();
        while (it.hasNext()) {
            final Tuple2<String, Properties> topicConfig = (Tuple2<String, Properties>) it.next();
            final String topic = topicConfig._1;
            final Properties prop = topicConfig._2;
            if (topic.equals(changelog)) {
                return prop;
            }
        }
        return new Properties();
    } finally {
        zkClient.close();
    }
}
Also used : ZkClient(org.I0Itec.zkclient.ZkClient) Tuple2(scala.Tuple2) Iterator(scala.collection.Iterator) ZkUtils(kafka.utils.ZkUtils) Properties(java.util.Properties) ZkConnection(org.I0Itec.zkclient.ZkConnection)

Example 14 with Tuple2

use of scala.Tuple2 in project zeppelin by apache.

the class ZeppelinContext method tuplesToParamOptions.

private ParamOption[] tuplesToParamOptions(scala.collection.Iterable<Tuple2<Object, String>> options) {
    int n = options.size();
    ParamOption[] paramOptions = new ParamOption[n];
    Iterator<Tuple2<Object, String>> it = asJavaIterable(options).iterator();
    int i = 0;
    while (it.hasNext()) {
        Tuple2<Object, String> valueAndDisplayValue = it.next();
        paramOptions[i++] = new ParamOption(valueAndDisplayValue._1(), valueAndDisplayValue._2());
    }
    return paramOptions;
}
Also used : Tuple2(scala.Tuple2) AngularObject(org.apache.zeppelin.display.AngularObject) ParamOption(org.apache.zeppelin.display.Input.ParamOption)

Example 15 with Tuple2

use of scala.Tuple2 in project learning-spark by databricks.

the class LogAnalyzerTotal method processAccessLogs.

public void processAccessLogs(String outDir, JavaDStream<ApacheAccessLog> accessLogsDStream) {
    // Calculate statistics based on the content size, and update the static variables to track this.
    accessLogsDStream.foreachRDD(new Function<JavaRDD<ApacheAccessLog>, Void>() {

        public Void call(JavaRDD<ApacheAccessLog> accessLogs) {
            Tuple4<Long, Long, Long, Long> stats = Functions.contentSizeStats(accessLogs);
            if (stats != null) {
                runningCount.getAndAdd(stats._1());
                runningSum.getAndAdd(stats._2());
                runningMin.set(Math.min(runningMin.get(), stats._3()));
                runningMax.set(Math.max(runningMax.get(), stats._4()));
            }
            return null;
        }
    });
    // A DStream of Resonse Code Counts;
    JavaPairDStream<Integer, Long> responseCodeCountDStream = accessLogsDStream.transformToPair(new Function<JavaRDD<ApacheAccessLog>, JavaPairRDD<Integer, Long>>() {

        public JavaPairRDD<Integer, Long> call(JavaRDD<ApacheAccessLog> rdd) {
            return Functions.responseCodeCount(rdd);
        }
    }).updateStateByKey(new Functions.ComputeRunningSum());
    responseCodeCountDStream.foreachRDD(new Function<JavaPairRDD<Integer, Long>, Void>() {

        public Void call(JavaPairRDD<Integer, Long> rdd) {
            currentResponseCodeCounts = rdd.take(100);
            return null;
        }
    });
    // A DStream of ipAddressCounts.
    JavaPairDStream<String, Long> ipRawDStream = accessLogsDStream.transformToPair(new Function<JavaRDD<ApacheAccessLog>, JavaPairRDD<String, Long>>() {

        public JavaPairRDD<String, Long> call(JavaRDD<ApacheAccessLog> rdd) {
            return Functions.ipAddressCount(rdd);
        }
    });
    JavaPairDStream<String, Long> ipCumDStream = ipRawDStream.updateStateByKey(new Functions.ComputeRunningSum());
    // A DStream of ipAddressCounts without transform
    JavaPairDStream<String, Long> ipDStream = accessLogsDStream.mapToPair(new Functions.IpTuple());
    JavaPairDStream<String, Long> ipCountsDStream = ipDStream.reduceByKey(new Functions.LongSumReducer());
    // and joining it with the transfer amount
    JavaPairDStream<String, Long> ipBytesDStream = accessLogsDStream.mapToPair(new Functions.IpContentTuple());
    JavaPairDStream<String, Long> ipBytesSumDStream = ipBytesDStream.reduceByKey(new Functions.LongSumReducer());
    JavaPairDStream<String, Tuple2<Long, Long>> ipBytesRequestCountDStream = ipBytesSumDStream.join(ipCountsDStream);
    // Save our dstream of ip address request counts
    JavaPairDStream<Text, LongWritable> writableDStream = ipDStream.mapToPair(new PairFunction<Tuple2<String, Long>, Text, LongWritable>() {

        public Tuple2<Text, LongWritable> call(Tuple2<String, Long> e) {
            return new Tuple2(new Text(e._1()), new LongWritable(e._2()));
        }
    });
    class OutFormat extends SequenceFileOutputFormat<Text, LongWritable> {
    }
    ;
    writableDStream.saveAsHadoopFiles(outDir, "pandas", Text.class, LongWritable.class, OutFormat.class);
    // All ips more than 10
    JavaDStream<String> ipAddressDStream = ipCumDStream.transform(new Function<JavaPairRDD<String, Long>, JavaRDD<String>>() {

        public JavaRDD<String> call(JavaPairRDD<String, Long> rdd) {
            return Functions.filterIPAddress(rdd);
        }
    });
    ipAddressDStream.foreachRDD(new Function<JavaRDD<String>, Void>() {

        public Void call(JavaRDD<String> rdd) {
            List<String> currentIPAddresses = rdd.take(100);
            return null;
        }
    });
    // A DStream of endpoint to count.
    JavaPairDStream<String, Long> endpointCountsDStream = accessLogsDStream.transformToPair(new Function<JavaRDD<ApacheAccessLog>, JavaPairRDD<String, Long>>() {

        public JavaPairRDD<String, Long> call(JavaRDD<ApacheAccessLog> rdd) {
            return Functions.endpointCount(rdd);
        }
    }).updateStateByKey(new Functions.ComputeRunningSum());
    Object ordering = Ordering.natural();
    final Comparator<Long> cmp = (Comparator<Long>) ordering;
    endpointCountsDStream.foreachRDD(new Function<JavaPairRDD<String, Long>, Void>() {

        public Void call(JavaPairRDD<String, Long> rdd) {
            currentTopEndpoints = rdd.takeOrdered(10, new Functions.ValueComparator<String, Long>(cmp));
            return null;
        }
    });
}
Also used : SequenceFileOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat) Comparator(java.util.Comparator) VoidFunction(org.apache.spark.api.java.function.VoidFunction) Function(org.apache.spark.api.java.function.Function) PairFunction(org.apache.spark.api.java.function.PairFunction) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) List(java.util.List) LongWritable(org.apache.hadoop.io.LongWritable) Text(org.apache.hadoop.io.Text) JavaRDD(org.apache.spark.api.java.JavaRDD) Tuple4(scala.Tuple4) Tuple2(scala.Tuple2) AtomicLong(java.util.concurrent.atomic.AtomicLong)

Aggregations

Tuple2 (scala.Tuple2)181 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)57 ArrayList (java.util.ArrayList)43 IOException (java.io.IOException)32 Test (org.junit.Test)32 INDArray (org.nd4j.linalg.api.ndarray.INDArray)28 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)23 List (java.util.List)22 Function (org.apache.spark.api.java.function.Function)19 File (java.io.File)18 Collectors (java.util.stream.Collectors)18 GATKException (org.broadinstitute.hellbender.exceptions.GATKException)18 Configuration (org.apache.hadoop.conf.Configuration)17 UserException (org.broadinstitute.hellbender.exceptions.UserException)17 Broadcast (org.apache.spark.broadcast.Broadcast)16 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)16 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)16 SparkConf (org.apache.spark.SparkConf)15 JavaRDD (org.apache.spark.api.java.JavaRDD)15 VisibleForTesting (com.google.common.annotations.VisibleForTesting)14