Search in sources :

Example 1 with Identify

use of akka.actor.Identify in project flink by apache.

the class JobClient method awaitJobResult.

/**
	 * Given a JobListeningContext, awaits the result of the job execution that this context is bound to
	 * @param listeningContext The listening context of the job execution
	 * @return The result of the execution
	 * @throws JobExecutionException if anything goes wrong while monitoring the job
	 */
public static JobExecutionResult awaitJobResult(JobListeningContext listeningContext) throws JobExecutionException {
    final JobID jobID = listeningContext.getJobID();
    final ActorRef jobClientActor = listeningContext.getJobClientActor();
    final Future<Object> jobSubmissionFuture = listeningContext.getJobResultFuture();
    final FiniteDuration askTimeout = listeningContext.getTimeout();
    // retrieves class loader if necessary
    final ClassLoader classLoader = listeningContext.getClassLoader();
    // ping the JobClientActor from time to time to check if it is still running
    while (!jobSubmissionFuture.isCompleted()) {
        try {
            Await.ready(jobSubmissionFuture, askTimeout);
        } catch (InterruptedException e) {
            throw new JobExecutionException(jobID, "Interrupted while waiting for job completion.");
        } catch (TimeoutException e) {
            try {
                Await.result(Patterns.ask(jobClientActor, // Ping the Actor to see if it is alive
                new Identify(true), Timeout.durationToTimeout(askTimeout)), askTimeout);
            // we got a reply, continue waiting for the job result
            } catch (Exception eInner) {
                // thus the health check failed
                if (!jobSubmissionFuture.isCompleted()) {
                    throw new JobExecutionException(jobID, "JobClientActor seems to have died before the JobExecutionResult could be retrieved.", eInner);
                }
            }
        }
    }
    final Object answer;
    try {
        // we have already awaited the result, zero time to wait here
        answer = Await.result(jobSubmissionFuture, Duration.Zero());
    } catch (Throwable throwable) {
        throw new JobExecutionException(jobID, "Couldn't retrieve the JobExecutionResult from the JobManager.", throwable);
    } finally {
        // failsafe shutdown of the client actor
        jobClientActor.tell(PoisonPill.getInstance(), ActorRef.noSender());
    }
    // second block handles the actual response
    if (answer instanceof JobManagerMessages.JobResultSuccess) {
        LOG.info("Job execution complete");
        SerializedJobExecutionResult result = ((JobManagerMessages.JobResultSuccess) answer).result();
        if (result != null) {
            try {
                return result.toJobExecutionResult(classLoader);
            } catch (Throwable t) {
                throw new JobExecutionException(jobID, "Job was successfully executed but JobExecutionResult could not be deserialized.");
            }
        } else {
            throw new JobExecutionException(jobID, "Job was successfully executed but result contained a null JobExecutionResult.");
        }
    } else if (answer instanceof JobManagerMessages.JobResultFailure) {
        LOG.info("Job execution failed");
        SerializedThrowable serThrowable = ((JobManagerMessages.JobResultFailure) answer).cause();
        if (serThrowable != null) {
            Throwable cause = serThrowable.deserializeError(classLoader);
            if (cause instanceof JobExecutionException) {
                throw (JobExecutionException) cause;
            } else {
                throw new JobExecutionException(jobID, "Job execution failed", cause);
            }
        } else {
            throw new JobExecutionException(jobID, "Job execution failed with null as failure cause.");
        }
    } else if (answer instanceof JobManagerMessages.JobNotFound) {
        throw new JobRetrievalException(((JobManagerMessages.JobNotFound) answer).jobID(), "Couldn't retrieve Job " + jobID + " because it was not running.");
    } else {
        throw new JobExecutionException(jobID, "Unknown answer from JobManager after submitting the job: " + answer);
    }
}
Also used : ActorRef(akka.actor.ActorRef) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) FiniteDuration(scala.concurrent.duration.FiniteDuration) Identify(akka.actor.Identify) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) FlinkUserCodeClassLoader(org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader) SerializedThrowable(org.apache.flink.runtime.util.SerializedThrowable) JobID(org.apache.flink.api.common.JobID) TimeoutException(java.util.concurrent.TimeoutException) SerializedThrowable(org.apache.flink.runtime.util.SerializedThrowable)

Example 2 with Identify

use of akka.actor.Identify in project flink by apache.

the class AkkaRpcService method connect.

// this method does not mutate state and is thus thread-safe
@Override
public <C extends RpcGateway> Future<C> connect(final String address, final Class<C> clazz) {
    checkState(!stopped, "RpcService is stopped");
    LOG.debug("Try to connect to remote RPC endpoint with address {}. Returning a {} gateway.", address, clazz.getName());
    final ActorSelection actorSel = actorSystem.actorSelection(address);
    final scala.concurrent.Future<Object> identify = Patterns.ask(actorSel, new Identify(42), timeout.toMilliseconds());
    final scala.concurrent.Future<C> resultFuture = identify.map(new Mapper<Object, C>() {

        @Override
        public C checkedApply(Object obj) throws Exception {
            ActorIdentity actorIdentity = (ActorIdentity) obj;
            if (actorIdentity.getRef() == null) {
                throw new RpcConnectionException("Could not connect to rpc endpoint under address " + address + '.');
            } else {
                ActorRef actorRef = actorIdentity.getRef();
                final String address = AkkaUtils.getAkkaURL(actorSystem, actorRef);
                final String hostname;
                Option<String> host = actorRef.path().address().host();
                if (host.isEmpty()) {
                    hostname = "localhost";
                } else {
                    hostname = host.get();
                }
                InvocationHandler akkaInvocationHandler = new AkkaInvocationHandler(address, hostname, actorRef, timeout, maximumFramesize, null);
                // Rather than using the System ClassLoader directly, we derive the ClassLoader
                // from this class . That works better in cases where Flink runs embedded and all Flink
                // code is loaded dynamically (for example from an OSGI bundle) through a custom ClassLoader
                ClassLoader classLoader = AkkaRpcService.this.getClass().getClassLoader();
                @SuppressWarnings("unchecked") C proxy = (C) Proxy.newProxyInstance(classLoader, new Class<?>[] { clazz }, akkaInvocationHandler);
                return proxy;
            }
        }
    }, actorSystem.dispatcher());
    return new FlinkFuture<>(resultFuture);
}
Also used : ActorRef(akka.actor.ActorRef) RpcConnectionException(org.apache.flink.runtime.rpc.exceptions.RpcConnectionException) InvocationHandler(java.lang.reflect.InvocationHandler) Identify(akka.actor.Identify) RpcConnectionException(org.apache.flink.runtime.rpc.exceptions.RpcConnectionException) FlinkFuture(org.apache.flink.runtime.concurrent.impl.FlinkFuture) ActorSelection(akka.actor.ActorSelection) Option(scala.Option) ActorIdentity(akka.actor.ActorIdentity)

Example 3 with Identify

use of akka.actor.Identify in project flink by apache.

the class JobManagerHARecoveryTest method testFailingJobRecovery.

/**
	 * Tests that a failing job recovery won't cause other job recoveries to fail.
	 */
@Test
public void testFailingJobRecovery() throws Exception {
    final FiniteDuration timeout = new FiniteDuration(10, TimeUnit.SECONDS);
    final FiniteDuration jobRecoveryTimeout = new FiniteDuration(0, TimeUnit.SECONDS);
    Deadline deadline = new FiniteDuration(1, TimeUnit.MINUTES).fromNow();
    final Configuration flinkConfiguration = new Configuration();
    UUID leaderSessionID = UUID.randomUUID();
    ActorRef jobManager = null;
    JobID jobId1 = new JobID();
    JobID jobId2 = new JobID();
    // set HA mode to zookeeper so that we try to recover jobs
    flinkConfiguration.setString(HighAvailabilityOptions.HA_MODE, "zookeeper");
    try {
        final SubmittedJobGraphStore submittedJobGraphStore = mock(SubmittedJobGraphStore.class);
        SubmittedJobGraph submittedJobGraph = mock(SubmittedJobGraph.class);
        when(submittedJobGraph.getJobId()).thenReturn(jobId2);
        when(submittedJobGraphStore.getJobIds()).thenReturn(Arrays.asList(jobId1, jobId2));
        // fail the first job recovery
        when(submittedJobGraphStore.recoverJobGraph(eq(jobId1))).thenThrow(new Exception("Test exception"));
        // succeed the second job recovery
        when(submittedJobGraphStore.recoverJobGraph(eq(jobId2))).thenReturn(submittedJobGraph);
        final TestingLeaderElectionService myLeaderElectionService = new TestingLeaderElectionService();
        final Collection<JobID> recoveredJobs = new ArrayList<>(2);
        Props jobManagerProps = Props.create(TestingFailingHAJobManager.class, flinkConfiguration, TestingUtils.defaultExecutor(), TestingUtils.defaultExecutor(), mock(InstanceManager.class), mock(Scheduler.class), new BlobLibraryCacheManager(mock(BlobService.class), 1 << 20), ActorRef.noSender(), new FixedDelayRestartStrategy.FixedDelayRestartStrategyFactory(Int.MaxValue(), 100), timeout, myLeaderElectionService, submittedJobGraphStore, mock(CheckpointRecoveryFactory.class), jobRecoveryTimeout, Option.<MetricRegistry>apply(null), recoveredJobs).withDispatcher(CallingThreadDispatcher.Id());
        jobManager = system.actorOf(jobManagerProps);
        Future<Object> started = Patterns.ask(jobManager, new Identify(42), deadline.timeLeft().toMillis());
        Await.ready(started, deadline.timeLeft());
        // make the job manager the leader --> this triggers the recovery of all jobs
        myLeaderElectionService.isLeader(leaderSessionID);
        // check that we have successfully recovered the second job
        assertThat(recoveredJobs, containsInAnyOrder(jobId2));
    } finally {
        TestingUtils.stopActor(jobManager);
    }
}
Also used : BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) TestingLeaderElectionService(org.apache.flink.runtime.leaderelection.TestingLeaderElectionService) Configuration(org.apache.flink.configuration.Configuration) ActorRef(akka.actor.ActorRef) Deadline(scala.concurrent.duration.Deadline) MetricRegistry(org.apache.flink.runtime.metrics.MetricRegistry) ArrayList(java.util.ArrayList) FiniteDuration(scala.concurrent.duration.FiniteDuration) Props(akka.actor.Props) Identify(akka.actor.Identify) BlobService(org.apache.flink.runtime.blob.BlobService) UUID(java.util.UUID) JobID(org.apache.flink.api.common.JobID) Test(org.junit.Test)

Example 4 with Identify

use of akka.actor.Identify in project controller by opendaylight.

the class TestActorFactory method verifyActorReady.

@SuppressWarnings("checkstyle:IllegalCatch")
private void verifyActorReady(ActorRef actorRef) {
    // Sometimes we see messages go to dead letters soon after creation - it seems the actor isn't quite
    // in a state yet to receive messages or isn't actually created yet. This seems to happen with
    // actorSelection so, to alleviate it, we use an actorSelection and send an Identify message with
    // retries to ensure it's ready.
    Timeout timeout = new Timeout(100, TimeUnit.MILLISECONDS);
    Throwable lastError = null;
    Stopwatch sw = Stopwatch.createStarted();
    while (sw.elapsed(TimeUnit.SECONDS) <= 10) {
        try {
            ActorSelection actorSelection = system.actorSelection(actorRef.path().toString());
            Future<Object> future = Patterns.ask(actorSelection, new Identify(""), timeout);
            ActorIdentity reply = (ActorIdentity) Await.result(future, timeout.duration());
            Assert.assertNotNull("Identify returned null", reply.getRef());
            return;
        } catch (Exception | AssertionError e) {
            Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
            lastError = e;
        }
    }
    throw new RuntimeException(lastError);
}
Also used : ActorSelection(akka.actor.ActorSelection) Timeout(akka.util.Timeout) Stopwatch(com.google.common.base.Stopwatch) Identify(akka.actor.Identify) InvalidActorNameException(akka.actor.InvalidActorNameException) ActorIdentity(akka.actor.ActorIdentity)

Aggregations

Identify (akka.actor.Identify)4 ActorRef (akka.actor.ActorRef)3 ActorIdentity (akka.actor.ActorIdentity)2 ActorSelection (akka.actor.ActorSelection)2 JobID (org.apache.flink.api.common.JobID)2 FiniteDuration (scala.concurrent.duration.FiniteDuration)2 InvalidActorNameException (akka.actor.InvalidActorNameException)1 Props (akka.actor.Props)1 Timeout (akka.util.Timeout)1 Stopwatch (com.google.common.base.Stopwatch)1 IOException (java.io.IOException)1 InvocationHandler (java.lang.reflect.InvocationHandler)1 ArrayList (java.util.ArrayList)1 UUID (java.util.UUID)1 TimeoutException (java.util.concurrent.TimeoutException)1 Configuration (org.apache.flink.configuration.Configuration)1 BlobService (org.apache.flink.runtime.blob.BlobService)1 FlinkFuture (org.apache.flink.runtime.concurrent.impl.FlinkFuture)1 BlobLibraryCacheManager (org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager)1 FlinkUserCodeClassLoader (org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader)1