Search in sources :

Example 1 with FlinkUserCodeClassLoader

use of org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader in project flink by apache.

the class JobClient method retrieveClassLoader.

/**
	 * Reconstructs the class loader by first requesting information about it at the JobManager
	 * and then downloading missing jar files.
	 * @param jobID id of job
	 * @param jobManager gateway to the JobManager
	 * @param config the flink configuration
	 * @return A classloader that should behave like the original classloader
	 * @throws JobRetrievalException if anything goes wrong
	 */
public static ClassLoader retrieveClassLoader(JobID jobID, ActorGateway jobManager, Configuration config) throws JobRetrievalException {
    final Object jmAnswer;
    try {
        jmAnswer = Await.result(jobManager.ask(new JobManagerMessages.RequestClassloadingProps(jobID), AkkaUtils.getDefaultTimeoutAsFiniteDuration()), AkkaUtils.getDefaultTimeoutAsFiniteDuration());
    } catch (Exception e) {
        throw new JobRetrievalException(jobID, "Couldn't retrieve class loading properties from JobManager.", e);
    }
    if (jmAnswer instanceof JobManagerMessages.ClassloadingProps) {
        JobManagerMessages.ClassloadingProps props = ((JobManagerMessages.ClassloadingProps) jmAnswer);
        Option<String> jmHost = jobManager.actor().path().address().host();
        String jmHostname = jmHost.isDefined() ? jmHost.get() : "localhost";
        InetSocketAddress serverAddress = new InetSocketAddress(jmHostname, props.blobManagerPort());
        final BlobCache blobClient;
        try {
            blobClient = new BlobCache(serverAddress, config);
        } catch (IOException e) {
            throw new JobRetrievalException(jobID, "Failed to setup blob cache", e);
        }
        final Collection<BlobKey> requiredJarFiles = props.requiredJarFiles();
        final Collection<URL> requiredClasspaths = props.requiredClasspaths();
        final URL[] allURLs = new URL[requiredJarFiles.size() + requiredClasspaths.size()];
        int pos = 0;
        for (BlobKey blobKey : props.requiredJarFiles()) {
            try {
                allURLs[pos++] = blobClient.getURL(blobKey);
            } catch (Exception e) {
                blobClient.shutdown();
                throw new JobRetrievalException(jobID, "Failed to download BlobKey " + blobKey, e);
            }
        }
        for (URL url : requiredClasspaths) {
            allURLs[pos++] = url;
        }
        return new FlinkUserCodeClassLoader(allURLs, JobClient.class.getClassLoader());
    } else if (jmAnswer instanceof JobManagerMessages.JobNotFound) {
        throw new JobRetrievalException(jobID, "Couldn't retrieve class loader. Job " + jobID + " not found");
    } else {
        throw new JobRetrievalException(jobID, "Unknown response from JobManager: " + jmAnswer);
    }
}
Also used : InetSocketAddress(java.net.InetSocketAddress) FlinkUserCodeClassLoader(org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader) JobManagerMessages(org.apache.flink.runtime.messages.JobManagerMessages) BlobCache(org.apache.flink.runtime.blob.BlobCache) IOException(java.io.IOException) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) URL(java.net.URL) BlobKey(org.apache.flink.runtime.blob.BlobKey)

Example 2 with FlinkUserCodeClassLoader

use of org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader in project flink by apache.

the class JobMasterTest method testHeartbeatTimeoutWithTaskManager.

@Test
public void testHeartbeatTimeoutWithTaskManager() throws Exception {
    final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
    final TestingLeaderRetrievalService rmLeaderRetrievalService = new TestingLeaderRetrievalService();
    haServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
    haServices.setCheckpointRecoveryFactory(mock(CheckpointRecoveryFactory.class));
    final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
    final String jobManagerAddress = "jm";
    final UUID jmLeaderId = UUID.randomUUID();
    final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
    final String taskManagerAddress = "tm";
    final ResourceID tmResourceId = new ResourceID(taskManagerAddress);
    final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(tmResourceId, InetAddress.getLoopbackAddress(), 1234);
    final TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class);
    final TestingSerialRpcService rpc = new TestingSerialRpcService();
    rpc.registerGateway(taskManagerAddress, taskExecutorGateway);
    final long heartbeatInterval = 1L;
    final long heartbeatTimeout = 5L;
    final ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
    final HeartbeatServices heartbeatServices = new TestingHeartbeatServices(heartbeatInterval, heartbeatTimeout, scheduledExecutor);
    final JobGraph jobGraph = new JobGraph();
    try {
        final JobMaster jobMaster = new JobMaster(jmResourceId, jobGraph, new Configuration(), rpc, haServices, heartbeatServices, Executors.newScheduledThreadPool(1), mock(BlobLibraryCacheManager.class), mock(RestartStrategyFactory.class), Time.of(10, TimeUnit.SECONDS), null, mock(OnCompletionActions.class), testingFatalErrorHandler, new FlinkUserCodeClassLoader(new URL[0]));
        // also start the heartbeat manager in job manager
        jobMaster.start(jmLeaderId);
        // register task manager will trigger monitoring heartbeat target, schedule heartbeat request in interval time
        jobMaster.registerTaskManager(taskManagerAddress, taskManagerLocation, jmLeaderId);
        ArgumentCaptor<Runnable> heartbeatRunnableCaptor = ArgumentCaptor.forClass(Runnable.class);
        verify(scheduledExecutor, times(1)).scheduleAtFixedRate(heartbeatRunnableCaptor.capture(), eq(0L), eq(heartbeatInterval), eq(TimeUnit.MILLISECONDS));
        Runnable heartbeatRunnable = heartbeatRunnableCaptor.getValue();
        ArgumentCaptor<Runnable> timeoutRunnableCaptor = ArgumentCaptor.forClass(Runnable.class);
        verify(scheduledExecutor).schedule(timeoutRunnableCaptor.capture(), eq(heartbeatTimeout), eq(TimeUnit.MILLISECONDS));
        Runnable timeoutRunnable = timeoutRunnableCaptor.getValue();
        // run the first heartbeat request
        heartbeatRunnable.run();
        verify(taskExecutorGateway, times(1)).heartbeatFromJobManager(eq(jmResourceId));
        // run the timeout runnable to simulate a heartbeat timeout
        timeoutRunnable.run();
        verify(taskExecutorGateway).disconnectJobManager(eq(jobGraph.getJobID()), any(TimeoutException.class));
        // check if a concurrent error occurred
        testingFatalErrorHandler.rethrowError();
    } finally {
        rpc.stopService();
    }
}
Also used : BlobLibraryCacheManager(org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager) Configuration(org.apache.flink.configuration.Configuration) TestingLeaderRetrievalService(org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService) FlinkUserCodeClassLoader(org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader) URL(java.net.URL) ScheduledExecutor(org.apache.flink.runtime.concurrent.ScheduledExecutor) TestingHighAvailabilityServices(org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices) ResourceID(org.apache.flink.runtime.clusterframework.types.ResourceID) TestingSerialRpcService(org.apache.flink.runtime.rpc.TestingSerialRpcService) UUID(java.util.UUID) TimeoutException(java.util.concurrent.TimeoutException) TestingFatalErrorHandler(org.apache.flink.runtime.util.TestingFatalErrorHandler) HeartbeatServices(org.apache.flink.runtime.heartbeat.HeartbeatServices) TaskManagerLocation(org.apache.flink.runtime.taskmanager.TaskManagerLocation) TaskExecutorGateway(org.apache.flink.runtime.taskexecutor.TaskExecutorGateway) CheckpointRecoveryFactory(org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory) OnCompletionActions(org.apache.flink.runtime.jobmanager.OnCompletionActions) JobGraph(org.apache.flink.runtime.jobgraph.JobGraph) RestartStrategyFactory(org.apache.flink.runtime.executiongraph.restart.RestartStrategyFactory) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Aggregations

URL (java.net.URL)2 TimeoutException (java.util.concurrent.TimeoutException)2 FlinkUserCodeClassLoader (org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader)2 IOException (java.io.IOException)1 InetSocketAddress (java.net.InetSocketAddress)1 UUID (java.util.UUID)1 Configuration (org.apache.flink.configuration.Configuration)1 BlobCache (org.apache.flink.runtime.blob.BlobCache)1 BlobKey (org.apache.flink.runtime.blob.BlobKey)1 CheckpointRecoveryFactory (org.apache.flink.runtime.checkpoint.CheckpointRecoveryFactory)1 ResourceID (org.apache.flink.runtime.clusterframework.types.ResourceID)1 ScheduledExecutor (org.apache.flink.runtime.concurrent.ScheduledExecutor)1 BlobLibraryCacheManager (org.apache.flink.runtime.execution.librarycache.BlobLibraryCacheManager)1 RestartStrategyFactory (org.apache.flink.runtime.executiongraph.restart.RestartStrategyFactory)1 HeartbeatServices (org.apache.flink.runtime.heartbeat.HeartbeatServices)1 TestingHighAvailabilityServices (org.apache.flink.runtime.highavailability.TestingHighAvailabilityServices)1 JobGraph (org.apache.flink.runtime.jobgraph.JobGraph)1 OnCompletionActions (org.apache.flink.runtime.jobmanager.OnCompletionActions)1 TestingLeaderRetrievalService (org.apache.flink.runtime.leaderelection.TestingLeaderRetrievalService)1 JobManagerMessages (org.apache.flink.runtime.messages.JobManagerMessages)1