use of org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader in project flink by apache.
the class JobClient method retrieveClassLoader.
/**
* Reconstructs the class loader by first requesting information about it at the JobManager
* and then downloading missing jar files.
* @param jobID id of job
* @param jobManager gateway to the JobManager
* @param config the flink configuration
* @return A classloader that should behave like the original classloader
* @throws JobRetrievalException if anything goes wrong
*/
public static ClassLoader retrieveClassLoader(JobID jobID, ActorGateway jobManager, Configuration config) throws JobRetrievalException {
final Object jmAnswer;
try {
jmAnswer = Await.result(jobManager.ask(new JobManagerMessages.RequestClassloadingProps(jobID), AkkaUtils.getDefaultTimeoutAsFiniteDuration()), AkkaUtils.getDefaultTimeoutAsFiniteDuration());
} catch (Exception e) {
throw new JobRetrievalException(jobID, "Couldn't retrieve class loading properties from JobManager.", e);
}
if (jmAnswer instanceof JobManagerMessages.ClassloadingProps) {
JobManagerMessages.ClassloadingProps props = ((JobManagerMessages.ClassloadingProps) jmAnswer);
Option<String> jmHost = jobManager.actor().path().address().host();
String jmHostname = jmHost.isDefined() ? jmHost.get() : "localhost";
InetSocketAddress serverAddress = new InetSocketAddress(jmHostname, props.blobManagerPort());
final BlobCache blobClient;
try {
blobClient = new BlobCache(serverAddress, config);
} catch (IOException e) {
throw new JobRetrievalException(jobID, "Failed to setup blob cache", e);
}
final Collection<BlobKey> requiredJarFiles = props.requiredJarFiles();
final Collection<URL> requiredClasspaths = props.requiredClasspaths();
final URL[] allURLs = new URL[requiredJarFiles.size() + requiredClasspaths.size()];
int pos = 0;
for (BlobKey blobKey : props.requiredJarFiles()) {
try {
allURLs[pos++] = blobClient.getURL(blobKey);
} catch (Exception e) {
blobClient.shutdown();
throw new JobRetrievalException(jobID, "Failed to download BlobKey " + blobKey, e);
}
}
for (URL url : requiredClasspaths) {
allURLs[pos++] = url;
}
return new FlinkUserCodeClassLoader(allURLs, JobClient.class.getClassLoader());
} else if (jmAnswer instanceof JobManagerMessages.JobNotFound) {
throw new JobRetrievalException(jobID, "Couldn't retrieve class loader. Job " + jobID + " not found");
} else {
throw new JobRetrievalException(jobID, "Unknown response from JobManager: " + jmAnswer);
}
}
use of org.apache.flink.runtime.execution.librarycache.FlinkUserCodeClassLoader in project flink by apache.
the class JobMasterTest method testHeartbeatTimeoutWithTaskManager.
@Test
public void testHeartbeatTimeoutWithTaskManager() throws Exception {
final TestingHighAvailabilityServices haServices = new TestingHighAvailabilityServices();
final TestingLeaderRetrievalService rmLeaderRetrievalService = new TestingLeaderRetrievalService();
haServices.setResourceManagerLeaderRetriever(rmLeaderRetrievalService);
haServices.setCheckpointRecoveryFactory(mock(CheckpointRecoveryFactory.class));
final TestingFatalErrorHandler testingFatalErrorHandler = new TestingFatalErrorHandler();
final String jobManagerAddress = "jm";
final UUID jmLeaderId = UUID.randomUUID();
final ResourceID jmResourceId = new ResourceID(jobManagerAddress);
final String taskManagerAddress = "tm";
final ResourceID tmResourceId = new ResourceID(taskManagerAddress);
final TaskManagerLocation taskManagerLocation = new TaskManagerLocation(tmResourceId, InetAddress.getLoopbackAddress(), 1234);
final TaskExecutorGateway taskExecutorGateway = mock(TaskExecutorGateway.class);
final TestingSerialRpcService rpc = new TestingSerialRpcService();
rpc.registerGateway(taskManagerAddress, taskExecutorGateway);
final long heartbeatInterval = 1L;
final long heartbeatTimeout = 5L;
final ScheduledExecutor scheduledExecutor = mock(ScheduledExecutor.class);
final HeartbeatServices heartbeatServices = new TestingHeartbeatServices(heartbeatInterval, heartbeatTimeout, scheduledExecutor);
final JobGraph jobGraph = new JobGraph();
try {
final JobMaster jobMaster = new JobMaster(jmResourceId, jobGraph, new Configuration(), rpc, haServices, heartbeatServices, Executors.newScheduledThreadPool(1), mock(BlobLibraryCacheManager.class), mock(RestartStrategyFactory.class), Time.of(10, TimeUnit.SECONDS), null, mock(OnCompletionActions.class), testingFatalErrorHandler, new FlinkUserCodeClassLoader(new URL[0]));
// also start the heartbeat manager in job manager
jobMaster.start(jmLeaderId);
// register task manager will trigger monitoring heartbeat target, schedule heartbeat request in interval time
jobMaster.registerTaskManager(taskManagerAddress, taskManagerLocation, jmLeaderId);
ArgumentCaptor<Runnable> heartbeatRunnableCaptor = ArgumentCaptor.forClass(Runnable.class);
verify(scheduledExecutor, times(1)).scheduleAtFixedRate(heartbeatRunnableCaptor.capture(), eq(0L), eq(heartbeatInterval), eq(TimeUnit.MILLISECONDS));
Runnable heartbeatRunnable = heartbeatRunnableCaptor.getValue();
ArgumentCaptor<Runnable> timeoutRunnableCaptor = ArgumentCaptor.forClass(Runnable.class);
verify(scheduledExecutor).schedule(timeoutRunnableCaptor.capture(), eq(heartbeatTimeout), eq(TimeUnit.MILLISECONDS));
Runnable timeoutRunnable = timeoutRunnableCaptor.getValue();
// run the first heartbeat request
heartbeatRunnable.run();
verify(taskExecutorGateway, times(1)).heartbeatFromJobManager(eq(jmResourceId));
// run the timeout runnable to simulate a heartbeat timeout
timeoutRunnable.run();
verify(taskExecutorGateway).disconnectJobManager(eq(jobGraph.getJobID()), any(TimeoutException.class));
// check if a concurrent error occurred
testingFatalErrorHandler.rethrowError();
} finally {
rpc.stopService();
}
}
Aggregations