Search in sources :

Example 6 with Some

use of scala.Some in project enmasse-workshop by EnMasseProject.

the class TemperatureAnalyzer method createStreamingContext.

private static JavaStreamingContext createStreamingContext() {
    SparkConf conf = new SparkConf().setAppName(APP_NAME);
    // conf.setMaster("local[2]");
    conf.set("spark.streaming.receiver.writeAheadLog.enable", "true");
    JavaStreamingContext ssc = new JavaStreamingContext(conf, BATCH_DURATION);
    ssc.checkpoint(CHECKPOINT_DIR);
    JavaReceiverInputDStream<DeviceTemperature> receiveStream = AMQPUtils.createStream(ssc, host, port, Option.apply(username), Option.apply(password), temperatureAddress, message -> {
        Section section = message.getBody();
        if (section instanceof AmqpValue) {
            Object value = ((AmqpValue) section).getValue();
            DeviceTemperature deviceTemperature = DeviceTemperature.fromJson(value.toString());
            return new Some<>(deviceTemperature);
        } else if (section instanceof Data) {
            Binary data = ((Data) section).getValue();
            DeviceTemperature deviceTemperature = DeviceTemperature.fromJson(new String(data.getArray(), "UTF-8"));
            return new Some<>(deviceTemperature);
        } else {
            return null;
        }
    }, StorageLevel.MEMORY_ONLY());
    // from a stream with DeviceTemperature instace to a pair stream with key = device-id, value = temperature
    JavaPairDStream<String, Integer> temperaturesByDevice = receiveStream.mapToPair(deviceTemperature -> {
        return new Tuple2<>(deviceTemperature.deviceId(), deviceTemperature.temperature());
    });
    // reducing the pair stream by key (device-id) for getting max temperature value
    JavaPairDStream<String, Integer> max = temperaturesByDevice.reduceByKeyAndWindow((a, b) -> {
        if (a > b)
            return a;
        else
            return b;
    }, new Duration(5000), new Duration(5000));
    // max.print();
    Broadcast<String> messagingHost = ssc.sparkContext().broadcast(host);
    Broadcast<Integer> messagingPort = ssc.sparkContext().broadcast(port);
    Broadcast<String> driverUsername = ssc.sparkContext().broadcast(username);
    Broadcast<String> driverPassword = ssc.sparkContext().broadcast(password);
    max.foreachRDD(rdd -> {
        rdd.foreach(record -> {
            // building a DeviceTemperature instance from the pair key = device-id, value = temperature
            DeviceTemperature deviceTemperature = new DeviceTemperature(record._1(), record._2());
            Vertx vertx = Vertx.vertx();
            ProtonClient client = ProtonClient.create(vertx);
            log.info("Connecting to messaging ...");
            client.connect(messagingHost.value(), messagingPort.value(), driverUsername.value(), driverPassword.value(), done -> {
                if (done.succeeded()) {
                    log.info("... connected to {}:{}", messagingHost.value(), messagingPort.getValue());
                    ProtonConnection connection = done.result();
                    connection.open();
                    ProtonSender maxSender = connection.createSender(maxAddress);
                    maxSender.open();
                    Message message = ProtonHelper.message();
                    message.setAddress(maxAddress);
                    message.setBody(new Data(new Binary(deviceTemperature.toJson().toString().getBytes())));
                    log.info("Sending {} to max address ...", deviceTemperature);
                    maxSender.send(message, maxDelivery -> {
                        log.info("... message sent");
                        maxSender.close();
                        connection.close();
                        vertx.close();
                    });
                } else {
                    log.error("Error on AMQP connection for sending", done.cause());
                    vertx.close();
                }
            });
        });
    });
    return ssc;
}
Also used : Message(org.apache.qpid.proton.message.Message) Data(org.apache.qpid.proton.amqp.messaging.Data) Duration(org.apache.spark.streaming.Duration) Vertx(io.vertx.core.Vertx) ProtonClient(io.vertx.proton.ProtonClient) Section(org.apache.qpid.proton.amqp.messaging.Section) AmqpValue(org.apache.qpid.proton.amqp.messaging.AmqpValue) JavaStreamingContext(org.apache.spark.streaming.api.java.JavaStreamingContext) ProtonConnection(io.vertx.proton.ProtonConnection) ProtonSender(io.vertx.proton.ProtonSender) Some(scala.Some) Tuple2(scala.Tuple2) Binary(org.apache.qpid.proton.amqp.Binary) SparkConf(org.apache.spark.SparkConf)

Example 7 with Some

use of scala.Some in project flink by apache.

the class WebRuntimeMonitorITCase method testRedirectToLeader.

/**
	 * Tests that the monitor associated with the following job manager redirects to the leader.
	 */
@Test
public void testRedirectToLeader() throws Exception {
    final Deadline deadline = TestTimeout.fromNow();
    ActorSystem[] jobManagerSystem = new ActorSystem[2];
    WebRuntimeMonitor[] webMonitor = new WebRuntimeMonitor[2];
    List<LeaderRetrievalService> leaderRetrievalServices = new ArrayList<>();
    try (TestingServer zooKeeper = new TestingServer()) {
        final Configuration config = ZooKeeperTestUtils.createZooKeeperHAConfig(zooKeeper.getConnectString(), temporaryFolder.getRoot().getPath());
        File logDir = temporaryFolder.newFolder();
        Path logFile = Files.createFile(new File(logDir, "jobmanager.log").toPath());
        Files.createFile(new File(logDir, "jobmanager.out").toPath());
        config.setInteger(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, 0);
        config.setString(ConfigConstants.JOB_MANAGER_WEB_LOG_PATH_KEY, logFile.toString());
        for (int i = 0; i < jobManagerSystem.length; i++) {
            jobManagerSystem[i] = AkkaUtils.createActorSystem(new Configuration(), new Some<>(new Tuple2<String, Object>("localhost", 0)));
        }
        for (int i = 0; i < webMonitor.length; i++) {
            LeaderRetrievalService lrs = ZooKeeperUtils.createLeaderRetrievalService(config);
            leaderRetrievalServices.add(lrs);
            webMonitor[i] = new WebRuntimeMonitor(config, lrs, jobManagerSystem[i]);
        }
        ActorRef[] jobManager = new ActorRef[2];
        String[] jobManagerAddress = new String[2];
        for (int i = 0; i < jobManager.length; i++) {
            Configuration jmConfig = config.clone();
            jmConfig.setInteger(ConfigConstants.JOB_MANAGER_WEB_PORT_KEY, webMonitor[i].getServerPort());
            jobManager[i] = JobManager.startJobManagerActors(jmConfig, jobManagerSystem[i], TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), JobManager.class, MemoryArchivist.class)._1();
            jobManagerAddress[i] = AkkaUtils.getAkkaURL(jobManagerSystem[i], jobManager[i]);
            webMonitor[i].start(jobManagerAddress[i]);
        }
        LeaderRetrievalService lrs = ZooKeeperUtils.createLeaderRetrievalService(config);
        leaderRetrievalServices.add(lrs);
        TestingListener leaderListener = new TestingListener();
        lrs.start(leaderListener);
        leaderListener.waitForNewLeader(deadline.timeLeft().toMillis());
        String leaderAddress = leaderListener.getAddress();
        int leaderIndex = leaderAddress.equals(jobManagerAddress[0]) ? 0 : 1;
        int followerIndex = (leaderIndex + 1) % 2;
        ActorSystem leadingSystem = jobManagerSystem[leaderIndex];
        ActorSystem followerSystem = jobManagerSystem[followerIndex];
        WebMonitor leadingWebMonitor = webMonitor[leaderIndex];
        WebMonitor followerWebMonitor = webMonitor[followerIndex];
        // For test stability reason we have to wait until we are sure that both leader
        // listeners have been notified.
        JobManagerRetriever leadingRetriever = Whitebox.getInternalState(leadingWebMonitor, "retriever");
        JobManagerRetriever followerRetriever = Whitebox.getInternalState(followerWebMonitor, "retriever");
        // Wait for the initial notifications
        waitForLeaderNotification(leadingSystem, jobManager[leaderIndex], leadingRetriever, deadline);
        waitForLeaderNotification(leadingSystem, jobManager[leaderIndex], followerRetriever, deadline);
        try (HttpTestClient leaderClient = new HttpTestClient("localhost", leadingWebMonitor.getServerPort());
            HttpTestClient followingClient = new HttpTestClient("localhost", followerWebMonitor.getServerPort())) {
            String expected = new Scanner(new File(MAIN_RESOURCES_PATH + "/index.html")).useDelimiter("\\A").next();
            // Request the file from the leading web server
            leaderClient.sendGetRequest("index.html", deadline.timeLeft());
            HttpTestClient.SimpleHttpResponse response = leaderClient.getNextResponse(deadline.timeLeft());
            assertEquals(HttpResponseStatus.OK, response.getStatus());
            assertEquals(response.getType(), MimeTypes.getMimeTypeForExtension("html"));
            assertEquals(expected, response.getContent());
            // Request the file from the following web server
            followingClient.sendGetRequest("index.html", deadline.timeLeft());
            response = followingClient.getNextResponse(deadline.timeLeft());
            assertEquals(HttpResponseStatus.TEMPORARY_REDIRECT, response.getStatus());
            assertTrue(response.getLocation().contains(String.valueOf(leadingWebMonitor.getServerPort())));
            // Kill the leader
            leadingSystem.shutdown();
            // Wait for the notification of the follower
            waitForLeaderNotification(followerSystem, jobManager[followerIndex], followerRetriever, deadline);
            // Same request to the new leader
            followingClient.sendGetRequest("index.html", deadline.timeLeft());
            response = followingClient.getNextResponse(deadline.timeLeft());
            assertEquals(HttpResponseStatus.OK, response.getStatus());
            assertEquals(response.getType(), MimeTypes.getMimeTypeForExtension("html"));
            assertEquals(expected, response.getContent());
            // Simple overview request
            followingClient.sendGetRequest("/overview", deadline.timeLeft());
            response = followingClient.getNextResponse(deadline.timeLeft());
            assertEquals(HttpResponseStatus.OK, response.getStatus());
            assertEquals(response.getType(), MimeTypes.getMimeTypeForExtension("json"));
            assertTrue(response.getContent().contains("\"taskmanagers\":1") || response.getContent().contains("\"taskmanagers\":0"));
        }
    } finally {
        for (ActorSystem system : jobManagerSystem) {
            if (system != null) {
                system.shutdown();
            }
        }
        for (WebMonitor monitor : webMonitor) {
            monitor.stop();
        }
        for (LeaderRetrievalService lrs : leaderRetrievalServices) {
            lrs.stop();
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) Scanner(java.util.Scanner) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) ArrayList(java.util.ArrayList) TestingListener(org.apache.flink.runtime.leaderelection.TestingListener) HttpTestClient(org.apache.flink.runtime.webmonitor.testutils.HttpTestClient) TestingServer(org.apache.curator.test.TestingServer) Path(java.nio.file.Path) Deadline(scala.concurrent.duration.Deadline) Some(scala.Some) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) File(java.io.File) Test(org.junit.Test)

Example 8 with Some

use of scala.Some in project flink by apache.

the class FlinkClient method getJobManager.

private ActorRef getJobManager() throws IOException {
    final Configuration configuration = GlobalConfiguration.loadConfiguration();
    ActorSystem actorSystem;
    try {
        final scala.Tuple2<String, Object> systemEndpoint = new scala.Tuple2<String, Object>("", 0);
        actorSystem = AkkaUtils.createActorSystem(configuration, new Some<scala.Tuple2<String, Object>>(systemEndpoint));
    } catch (final Exception e) {
        throw new RuntimeException("Could not start actor system to communicate with JobManager", e);
    }
    return JobManager.getJobManagerActorRef(AkkaUtils.getAkkaProtocol(configuration), NetUtils.unresolvedHostAndPortToNormalizedString(this.jobManagerHost, this.jobManagerPort), actorSystem, AkkaUtils.getLookupTimeout(configuration));
}
Also used : ActorSystem(akka.actor.ActorSystem) Some(scala.Some) Configuration(org.apache.flink.configuration.Configuration) GlobalConfiguration(org.apache.flink.configuration.GlobalConfiguration) ProgramInvocationException(org.apache.flink.client.program.ProgramInvocationException) AlreadyAliveException(org.apache.storm.generated.AlreadyAliveException) IOException(java.io.IOException) NotAliveException(org.apache.storm.generated.NotAliveException) InvalidTopologyException(org.apache.storm.generated.InvalidTopologyException)

Example 9 with Some

use of scala.Some in project flink by apache.

the class YarnApplicationMasterRunner method runApplicationMaster.

// ------------------------------------------------------------------------
//  Core work method
// ------------------------------------------------------------------------
/**
	 * The main work method, must run as a privileged action.
	 *
	 * @return The return code for the Java process.
	 */
protected int runApplicationMaster(Configuration config) {
    ActorSystem actorSystem = null;
    WebMonitor webMonitor = null;
    int numberProcessors = Hardware.getNumberCPUCores();
    final ScheduledExecutorService futureExecutor = Executors.newScheduledThreadPool(numberProcessors, new ExecutorThreadFactory("yarn-jobmanager-future"));
    final ExecutorService ioExecutor = Executors.newFixedThreadPool(numberProcessors, new ExecutorThreadFactory("yarn-jobmanager-io"));
    try {
        // ------- (1) load and parse / validate all configurations -------
        // loading all config values here has the advantage that the program fails fast, if any
        // configuration problem occurs
        final String currDir = ENV.get(Environment.PWD.key());
        require(currDir != null, "Current working directory variable (%s) not set", Environment.PWD.key());
        // Note that we use the "appMasterHostname" given by YARN here, to make sure
        // we use the hostnames given by YARN consistently throughout akka.
        // for akka "localhost" and "localhost.localdomain" are different actors.
        final String appMasterHostname = ENV.get(Environment.NM_HOST.key());
        require(appMasterHostname != null, "ApplicationMaster hostname variable %s not set", Environment.NM_HOST.key());
        LOG.info("YARN assigned hostname for application master: {}", appMasterHostname);
        //Update keytab and principal path to reflect YARN container path location
        final String remoteKeytabPath = ENV.get(YarnConfigKeys.KEYTAB_PATH);
        final String remoteKeytabPrincipal = ENV.get(YarnConfigKeys.KEYTAB_PRINCIPAL);
        String keytabPath = null;
        if (remoteKeytabPath != null) {
            File f = new File(currDir, Utils.KEYTAB_FILE_NAME);
            keytabPath = f.getAbsolutePath();
            LOG.info("keytabPath: {}", keytabPath);
        }
        if (keytabPath != null && remoteKeytabPrincipal != null) {
            config.setString(SecurityOptions.KERBEROS_LOGIN_KEYTAB, keytabPath);
            config.setString(SecurityOptions.KERBEROS_LOGIN_PRINCIPAL, remoteKeytabPrincipal);
        }
        // Hadoop/Yarn configuration (loads config data automatically from classpath files)
        final YarnConfiguration yarnConfig = new YarnConfiguration();
        final int taskManagerContainerMemory;
        final int numInitialTaskManagers;
        final int slotsPerTaskManager;
        try {
            taskManagerContainerMemory = Integer.parseInt(ENV.get(YarnConfigKeys.ENV_TM_MEMORY));
        } catch (NumberFormatException e) {
            throw new RuntimeException("Invalid value for " + YarnConfigKeys.ENV_TM_MEMORY + " : " + e.getMessage());
        }
        try {
            numInitialTaskManagers = Integer.parseInt(ENV.get(YarnConfigKeys.ENV_TM_COUNT));
        } catch (NumberFormatException e) {
            throw new RuntimeException("Invalid value for " + YarnConfigKeys.ENV_TM_COUNT + " : " + e.getMessage());
        }
        try {
            slotsPerTaskManager = Integer.parseInt(ENV.get(YarnConfigKeys.ENV_SLOTS));
        } catch (NumberFormatException e) {
            throw new RuntimeException("Invalid value for " + YarnConfigKeys.ENV_SLOTS + " : " + e.getMessage());
        }
        final ContaineredTaskManagerParameters taskManagerParameters = ContaineredTaskManagerParameters.create(config, taskManagerContainerMemory, slotsPerTaskManager);
        LOG.info("TaskManagers will be created with {} task slots", taskManagerParameters.numSlots());
        LOG.info("TaskManagers will be started with container size {} MB, JVM heap size {} MB, " + "JVM direct memory limit {} MB", taskManagerParameters.taskManagerTotalMemoryMB(), taskManagerParameters.taskManagerHeapSizeMB(), taskManagerParameters.taskManagerDirectMemoryLimitMB());
        // ----------------- (2) start the actor system -------------------
        // try to start the actor system, JobManager and JobManager actor system
        // using the port range definition from the config.
        final String amPortRange = config.getString(ConfigConstants.YARN_APPLICATION_MASTER_PORT, ConfigConstants.DEFAULT_YARN_JOB_MANAGER_PORT);
        actorSystem = BootstrapTools.startActorSystem(config, appMasterHostname, amPortRange, LOG);
        final String akkaHostname = AkkaUtils.getAddress(actorSystem).host().get();
        final int akkaPort = (Integer) AkkaUtils.getAddress(actorSystem).port().get();
        LOG.info("Actor system bound to hostname {}.", akkaHostname);
        // ---- (3) Generate the configuration for the TaskManagers
        final Configuration taskManagerConfig = BootstrapTools.generateTaskManagerConfiguration(config, akkaHostname, akkaPort, slotsPerTaskManager, TASKMANAGER_REGISTRATION_TIMEOUT);
        LOG.debug("TaskManager configuration: {}", taskManagerConfig);
        final ContainerLaunchContext taskManagerContext = Utils.createTaskExecutorContext(config, yarnConfig, ENV, taskManagerParameters, taskManagerConfig, currDir, getTaskManagerClass(), LOG);
        // ---- (4) start the actors and components in this order:
        // 1) JobManager & Archive (in non-HA case, the leader service takes this)
        // 2) Web Monitor (we need its port to register)
        // 3) Resource Master for YARN
        // 4) Process reapers for the JobManager and Resource Master
        // 1: the JobManager
        LOG.debug("Starting JobManager actor");
        // we start the JobManager with its standard name
        ActorRef jobManager = JobManager.startJobManagerActors(config, actorSystem, futureExecutor, ioExecutor, new Some<>(JobManager.JOB_MANAGER_NAME()), Option.<String>empty(), getJobManagerClass(), getArchivistClass())._1();
        // 2: the web monitor
        LOG.debug("Starting Web Frontend");
        webMonitor = BootstrapTools.startWebMonitorIfConfigured(config, actorSystem, jobManager, LOG);
        String protocol = "http://";
        if (config.getBoolean(ConfigConstants.JOB_MANAGER_WEB_SSL_ENABLED, ConfigConstants.DEFAULT_JOB_MANAGER_WEB_SSL_ENABLED) && SSLUtils.getSSLEnabled(config)) {
            protocol = "https://";
        }
        final String webMonitorURL = webMonitor == null ? null : protocol + appMasterHostname + ":" + webMonitor.getServerPort();
        // 3: Flink's Yarn ResourceManager
        LOG.debug("Starting YARN Flink Resource Manager");
        // we need the leader retrieval service here to be informed of new leaders and session IDs
        LeaderRetrievalService leaderRetriever = LeaderRetrievalUtils.createLeaderRetrievalService(config, jobManager);
        Props resourceMasterProps = YarnFlinkResourceManager.createActorProps(getResourceManagerClass(), config, yarnConfig, leaderRetriever, appMasterHostname, webMonitorURL, taskManagerParameters, taskManagerContext, numInitialTaskManagers, LOG);
        ActorRef resourceMaster = actorSystem.actorOf(resourceMasterProps);
        // 4: Process reapers
        // The process reapers ensure that upon unexpected actor death, the process exits
        // and does not stay lingering around unresponsive
        LOG.debug("Starting process reapers for JobManager and YARN Application Master");
        actorSystem.actorOf(Props.create(ProcessReaper.class, resourceMaster, LOG, ACTOR_DIED_EXIT_CODE), "YARN_Resource_Master_Process_Reaper");
        actorSystem.actorOf(Props.create(ProcessReaper.class, jobManager, LOG, ACTOR_DIED_EXIT_CODE), "JobManager_Process_Reaper");
    } catch (Throwable t) {
        // make sure that everything whatever ends up in the log
        LOG.error("YARN Application Master initialization failed", t);
        if (webMonitor != null) {
            try {
                webMonitor.stop();
            } catch (Throwable ignored) {
                LOG.warn("Failed to stop the web frontend", t);
            }
        }
        if (actorSystem != null) {
            try {
                actorSystem.shutdown();
            } catch (Throwable tt) {
                LOG.error("Error shutting down actor system", tt);
            }
        }
        futureExecutor.shutdownNow();
        ioExecutor.shutdownNow();
        return INIT_ERROR_EXIT_CODE;
    }
    // everything started, we can wait until all is done or the process is killed
    LOG.info("YARN Application Master started");
    // wait until everything is done
    actorSystem.awaitTermination();
    // if we get here, everything work out jolly all right, and we even exited smoothly
    if (webMonitor != null) {
        try {
            webMonitor.stop();
        } catch (Throwable t) {
            LOG.error("Failed to stop the web frontend", t);
        }
    }
    org.apache.flink.runtime.concurrent.Executors.gracefulShutdown(AkkaUtils.getTimeout(config).toMillis(), TimeUnit.MILLISECONDS, futureExecutor, ioExecutor);
    return 0;
}
Also used : ActorSystem(akka.actor.ActorSystem) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) Configuration(org.apache.flink.configuration.Configuration) GlobalConfiguration(org.apache.flink.configuration.GlobalConfiguration) ProcessReaper(org.apache.flink.runtime.process.ProcessReaper) ActorRef(akka.actor.ActorRef) ContaineredTaskManagerParameters(org.apache.flink.runtime.clusterframework.ContaineredTaskManagerParameters) ContainerLaunchContext(org.apache.hadoop.yarn.api.records.ContainerLaunchContext) Props(akka.actor.Props) ExecutorThreadFactory(org.apache.flink.runtime.util.ExecutorThreadFactory) Some(scala.Some) YarnConfiguration(org.apache.hadoop.yarn.conf.YarnConfiguration) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) WebMonitor(org.apache.flink.runtime.webmonitor.WebMonitor) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) ExecutorService(java.util.concurrent.ExecutorService) File(java.io.File)

Example 10 with Some

use of scala.Some in project flink by apache.

the class JobManagerHACheckpointRecoveryITCase method testCheckpointRecoveryFailure.

/**
	 * Tests that the JobManager logs failures during recovery properly.
	 *
	 * @see <a href="https://issues.apache.org/jira/browse/FLINK-3185">FLINK-3185</a>
	 */
@Test
@RetryOnFailure(times = 1)
public void testCheckpointRecoveryFailure() throws Exception {
    final Deadline testDeadline = TestTimeOut.fromNow();
    final String zooKeeperQuorum = ZooKeeper.getConnectString();
    final String fileStateBackendPath = FileStateBackendBasePath.getAbsoluteFile().toString();
    Configuration config = ZooKeeperTestUtils.createZooKeeperHAConfig(zooKeeperQuorum, fileStateBackendPath);
    config.setInteger(ConfigConstants.LOCAL_NUMBER_JOB_MANAGER, 2);
    JobManagerProcess[] jobManagerProcess = new JobManagerProcess[2];
    LeaderRetrievalService leaderRetrievalService = null;
    ActorSystem taskManagerSystem = null;
    ActorSystem testActorSystem = null;
    try {
        // Test actor system
        testActorSystem = AkkaUtils.createActorSystem(new Configuration(), new Some<>(new Tuple2<String, Object>("localhost", 0)));
        // The job managers
        jobManagerProcess[0] = new JobManagerProcess(0, config);
        jobManagerProcess[1] = new JobManagerProcess(1, config);
        jobManagerProcess[0].startProcess();
        jobManagerProcess[1].startProcess();
        // Leader listener
        TestingListener leaderListener = new TestingListener();
        leaderRetrievalService = ZooKeeperUtils.createLeaderRetrievalService(config);
        leaderRetrievalService.start(leaderListener);
        // The task manager
        taskManagerSystem = AkkaUtils.createActorSystem(config, Option.apply(new Tuple2<String, Object>("localhost", 0)));
        TaskManager.startTaskManagerComponentsAndActor(config, ResourceID.generate(), taskManagerSystem, "localhost", Option.<String>empty(), Option.<LeaderRetrievalService>empty(), false, TaskManager.class);
        // Get the leader
        leaderListener.waitForNewLeader(testDeadline.timeLeft().toMillis());
        String leaderAddress = leaderListener.getAddress();
        UUID leaderId = leaderListener.getLeaderSessionID();
        // Get the leader ref
        ActorRef leaderRef = AkkaUtils.getActorRef(leaderAddress, testActorSystem, testDeadline.timeLeft());
        ActorGateway leader = new AkkaActorGateway(leaderRef, leaderId);
        // Who's the boss?
        JobManagerProcess leadingJobManagerProcess;
        JobManagerProcess nonLeadingJobManagerProcess;
        if (jobManagerProcess[0].getJobManagerAkkaURL(testDeadline.timeLeft()).equals(leaderListener.getAddress())) {
            leadingJobManagerProcess = jobManagerProcess[0];
            nonLeadingJobManagerProcess = jobManagerProcess[1];
        } else {
            leadingJobManagerProcess = jobManagerProcess[1];
            nonLeadingJobManagerProcess = jobManagerProcess[0];
        }
        // Blocking JobGraph
        JobVertex blockingVertex = new JobVertex("Blocking vertex");
        blockingVertex.setInvokableClass(BlockingNoOpInvokable.class);
        JobGraph jobGraph = new JobGraph(blockingVertex);
        // Submit the job in detached mode
        leader.tell(new SubmitJob(jobGraph, ListeningBehaviour.DETACHED));
        // Wait for the job to be running
        JobManagerActorTestUtils.waitForJobStatus(jobGraph.getJobID(), JobStatus.RUNNING, leader, testDeadline.timeLeft());
        // Remove all files
        FileUtils.deleteDirectory(FileStateBackendBasePath);
        // Kill the leader
        leadingJobManagerProcess.destroy();
        // Verify that the job manager logs the failed recovery. We can not
        // do more at this point. :(
        boolean success = false;
        while (testDeadline.hasTimeLeft()) {
            String output = nonLeadingJobManagerProcess.getProcessOutput();
            if (output != null) {
                if (output.contains("Failed to recover job") && output.contains("java.io.FileNotFoundException")) {
                    success = true;
                    break;
                }
            } else {
                log.warn("No process output available.");
            }
            Thread.sleep(500);
        }
        assertTrue("Did not find expected output in logs.", success);
    } catch (Throwable t) {
        // Print early (in some situations the process logs get too big
        // for Travis and the root problem is not shown)
        t.printStackTrace();
        // In case of an error, print the job manager process logs.
        if (jobManagerProcess[0] != null) {
            jobManagerProcess[0].printProcessLog();
        }
        if (jobManagerProcess[1] != null) {
            jobManagerProcess[1].printProcessLog();
        }
        throw t;
    } finally {
        if (jobManagerProcess[0] != null) {
            jobManagerProcess[0].destroy();
        }
        if (jobManagerProcess[1] != null) {
            jobManagerProcess[1].destroy();
        }
        if (leaderRetrievalService != null) {
            leaderRetrievalService.stop();
        }
        if (taskManagerSystem != null) {
            taskManagerSystem.shutdown();
        }
        if (testActorSystem != null) {
            testActorSystem.shutdown();
        }
    }
}
Also used : ActorSystem(akka.actor.ActorSystem) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) Deadline(scala.concurrent.duration.Deadline) TestingListener(org.apache.flink.runtime.leaderelection.TestingListener) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) Some(scala.Some) JobVertex(org.apache.flink.runtime.jobgraph.JobVertex) LeaderRetrievalService(org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService) ActorGateway(org.apache.flink.runtime.instance.ActorGateway) AkkaActorGateway(org.apache.flink.runtime.instance.AkkaActorGateway) JobManagerProcess(org.apache.flink.runtime.testutils.JobManagerProcess) UUID(java.util.UUID) SubmitJob(org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob) Test(org.junit.Test) RetryOnFailure(org.apache.flink.testutils.junit.RetryOnFailure)

Aggregations

Some (scala.Some)13 ActorSystem (akka.actor.ActorSystem)9 Configuration (org.apache.flink.configuration.Configuration)8 ActorRef (akka.actor.ActorRef)7 Test (org.junit.Test)6 LeaderRetrievalService (org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService)5 File (java.io.File)4 TestingListener (org.apache.flink.runtime.leaderelection.TestingListener)4 IOException (java.io.IOException)3 UUID (java.util.UUID)3 ActorGateway (org.apache.flink.runtime.instance.ActorGateway)3 AkkaActorGateway (org.apache.flink.runtime.instance.AkkaActorGateway)3 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)3 SubmitJob (org.apache.flink.runtime.messages.JobManagerMessages.SubmitJob)3 JobManagerProcess (org.apache.flink.runtime.testutils.JobManagerProcess)3 Tuple2 (scala.Tuple2)3 Deadline (scala.concurrent.duration.Deadline)3 StringWriter (java.io.StringWriter)2 GlobalConfiguration (org.apache.flink.configuration.GlobalConfiguration)2 RetryOnFailure (org.apache.flink.testutils.junit.RetryOnFailure)2