use of org.apache.flink.runtime.blob.BlobKey in project flink by apache.
the class JobSubmitTest method testFailureWhenJarBlobsMissing.
@Test
public void testFailureWhenJarBlobsMissing() {
try {
// create a simple job graph
JobVertex jobVertex = new JobVertex("Test Vertex");
jobVertex.setInvokableClass(NoOpInvokable.class);
JobGraph jg = new JobGraph("test job", jobVertex);
// request the blob port from the job manager
Future<Object> future = jmGateway.ask(JobManagerMessages.getRequestBlobManagerPort(), timeout);
int blobPort = (Integer) Await.result(future, timeout);
// upload two dummy bytes and add their keys to the job graph as dependencies
BlobKey key1, key2;
BlobClient bc = new BlobClient(new InetSocketAddress("localhost", blobPort), jmConfig);
try {
key1 = bc.put(new byte[10]);
key2 = bc.put(new byte[10]);
// delete one of the blobs to make sure that the startup failed
bc.delete(key2);
} finally {
bc.close();
}
jg.addBlob(key1);
jg.addBlob(key2);
// submit the job
Future<Object> submitFuture = jmGateway.ask(new JobManagerMessages.SubmitJob(jg, ListeningBehaviour.EXECUTION_RESULT), timeout);
try {
Await.result(submitFuture, timeout);
} catch (JobExecutionException e) {
// that is what we expect
assertTrue(e.getCause() instanceof IOException);
} catch (Exception e) {
fail("Wrong exception type");
}
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
use of org.apache.flink.runtime.blob.BlobKey in project flink by apache.
the class JobClient method retrieveClassLoader.
/**
* Reconstructs the class loader by first requesting information about it at the JobManager
* and then downloading missing jar files.
* @param jobID id of job
* @param jobManager gateway to the JobManager
* @param config the flink configuration
* @return A classloader that should behave like the original classloader
* @throws JobRetrievalException if anything goes wrong
*/
public static ClassLoader retrieveClassLoader(JobID jobID, ActorGateway jobManager, Configuration config) throws JobRetrievalException {
final Object jmAnswer;
try {
jmAnswer = Await.result(jobManager.ask(new JobManagerMessages.RequestClassloadingProps(jobID), AkkaUtils.getDefaultTimeoutAsFiniteDuration()), AkkaUtils.getDefaultTimeoutAsFiniteDuration());
} catch (Exception e) {
throw new JobRetrievalException(jobID, "Couldn't retrieve class loading properties from JobManager.", e);
}
if (jmAnswer instanceof JobManagerMessages.ClassloadingProps) {
JobManagerMessages.ClassloadingProps props = ((JobManagerMessages.ClassloadingProps) jmAnswer);
Option<String> jmHost = jobManager.actor().path().address().host();
String jmHostname = jmHost.isDefined() ? jmHost.get() : "localhost";
InetSocketAddress serverAddress = new InetSocketAddress(jmHostname, props.blobManagerPort());
final BlobCache blobClient;
try {
blobClient = new BlobCache(serverAddress, config);
} catch (IOException e) {
throw new JobRetrievalException(jobID, "Failed to setup blob cache", e);
}
final Collection<BlobKey> requiredJarFiles = props.requiredJarFiles();
final Collection<URL> requiredClasspaths = props.requiredClasspaths();
final URL[] allURLs = new URL[requiredJarFiles.size() + requiredClasspaths.size()];
int pos = 0;
for (BlobKey blobKey : props.requiredJarFiles()) {
try {
allURLs[pos++] = blobClient.getURL(blobKey);
} catch (Exception e) {
blobClient.shutdown();
throw new JobRetrievalException(jobID, "Failed to download BlobKey " + blobKey, e);
}
}
for (URL url : requiredClasspaths) {
allURLs[pos++] = url;
}
return new FlinkUserCodeClassLoader(allURLs, JobClient.class.getClassLoader());
} else if (jmAnswer instanceof JobManagerMessages.JobNotFound) {
throw new JobRetrievalException(jobID, "Couldn't retrieve class loader. Job " + jobID + " not found");
} else {
throw new JobRetrievalException(jobID, "Unknown response from JobManager: " + jmAnswer);
}
}
use of org.apache.flink.runtime.blob.BlobKey in project flink by apache.
the class BlobLibraryCacheManager method registerTask.
@Override
public void registerTask(JobID jobId, ExecutionAttemptID task, Collection<BlobKey> requiredJarFiles, Collection<URL> requiredClasspaths) throws IOException {
checkNotNull(jobId, "The JobId must not be null.");
checkNotNull(task, "The task execution id must not be null.");
if (requiredJarFiles == null) {
requiredJarFiles = Collections.emptySet();
}
if (requiredClasspaths == null) {
requiredClasspaths = Collections.emptySet();
}
synchronized (lockObject) {
LibraryCacheEntry entry = cacheEntries.get(jobId);
if (entry == null) {
// create a new entry in the library cache
BlobKey[] keys = requiredJarFiles.toArray(new BlobKey[requiredJarFiles.size()]);
URL[] urls = new URL[keys.length + requiredClasspaths.size()];
int count = 0;
try {
for (; count < keys.length; count++) {
BlobKey blobKey = keys[count];
urls[count] = registerReferenceToBlobKeyAndGetURL(blobKey);
}
} catch (Throwable t) {
// undo the reference count increases
try {
for (int i = 0; i < count; i++) {
unregisterReferenceToBlobKey(keys[i]);
}
} catch (Throwable tt) {
LOG.error("Error while updating library reference counters.", tt);
}
// rethrow or wrap
ExceptionUtils.tryRethrowIOException(t);
throw new IOException("Library cache could not register the user code libraries.", t);
}
// add classpaths
for (URL url : requiredClasspaths) {
urls[count] = url;
count++;
}
cacheEntries.put(jobId, new LibraryCacheEntry(requiredJarFiles, urls, task));
} else {
entry.register(task, requiredJarFiles);
}
}
}
use of org.apache.flink.runtime.blob.BlobKey in project flink by apache.
the class TaskManagerTest method testCancellingDependentAndStateUpdateFails.
@Test
public void testCancellingDependentAndStateUpdateFails() {
// this tests creates two tasks. the sender sends data, and fails to send the
// state update back to the job manager
// the second one blocks to be canceled
new JavaTestKit(system) {
{
ActorGateway jobManager = null;
ActorGateway taskManager = null;
final ActorGateway testActorGateway = new AkkaActorGateway(getTestActor(), leaderSessionID);
try {
final JobID jid = new JobID();
JobVertexID vid1 = new JobVertexID();
JobVertexID vid2 = new JobVertexID();
final ExecutionAttemptID eid1 = new ExecutionAttemptID();
final ExecutionAttemptID eid2 = new ExecutionAttemptID();
ActorRef jm = system.actorOf(Props.create(new SimpleLookupFailingUpdateJobManagerCreator(leaderSessionID, eid2)));
jobManager = new AkkaActorGateway(jm, leaderSessionID);
taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, true);
final ActorGateway tm = taskManager;
IntermediateResultPartitionID partitionId = new IntermediateResultPartitionID();
List<ResultPartitionDeploymentDescriptor> irpdd = new ArrayList<ResultPartitionDeploymentDescriptor>();
irpdd.add(new ResultPartitionDeploymentDescriptor(new IntermediateDataSetID(), partitionId, ResultPartitionType.PIPELINED, 1, 1, true));
InputGateDeploymentDescriptor ircdd = new InputGateDeploymentDescriptor(new IntermediateDataSetID(), ResultPartitionType.PIPELINED, 0, new InputChannelDeploymentDescriptor[] { new InputChannelDeploymentDescriptor(new ResultPartitionID(partitionId, eid1), ResultPartitionLocation.createLocal()) });
final TaskDeploymentDescriptor tdd1 = createTaskDeploymentDescriptor(jid, "TestJob", vid1, eid1, new SerializedValue<>(new ExecutionConfig()), "Sender", 1, 0, 1, 0, new Configuration(), new Configuration(), Tasks.Sender.class.getName(), irpdd, Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
final TaskDeploymentDescriptor tdd2 = createTaskDeploymentDescriptor(jid, "TestJob", vid2, eid2, new SerializedValue<>(new ExecutionConfig()), "Receiver", 7, 2, 7, 0, new Configuration(), new Configuration(), Tasks.BlockingReceiver.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.singletonList(ircdd), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
new Within(d) {
@Override
protected void run() {
try {
Future<Object> t1Running = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(eid1), timeout);
Future<Object> t2Running = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskIsRunning(eid2), timeout);
tm.tell(new SubmitTask(tdd2), testActorGateway);
tm.tell(new SubmitTask(tdd1), testActorGateway);
expectMsgEquals(Acknowledge.get());
expectMsgEquals(Acknowledge.get());
Await.ready(t1Running, d);
Await.ready(t2Running, d);
tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
Map<ExecutionAttemptID, Task> tasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
Task t1 = tasks.get(eid1);
Task t2 = tasks.get(eid2);
tm.tell(new CancelTask(eid2), testActorGateway);
expectMsgEquals(Acknowledge.get());
if (t2 != null) {
Future<Object> response = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskRemoved(eid2), timeout);
Await.ready(response, d);
}
if (t1 != null) {
if (t1.getExecutionState() == ExecutionState.RUNNING) {
tm.tell(new CancelTask(eid1), testActorGateway);
expectMsgEquals(Acknowledge.get());
}
Future<Object> response = tm.ask(new TestingTaskManagerMessages.NotifyWhenTaskRemoved(eid1), timeout);
Await.ready(response, d);
}
tm.tell(TestingTaskManagerMessages.getRequestRunningTasksMessage(), testActorGateway);
tasks = expectMsgClass(TestingTaskManagerMessages.ResponseRunningTasks.class).asJava();
assertEquals(0, tasks.size());
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
}
};
} catch (Exception e) {
e.printStackTrace();
fail(e.getMessage());
} finally {
// shut down the actors
TestingUtils.stopActor(taskManager);
TestingUtils.stopActor(jobManager);
}
}
};
}
use of org.apache.flink.runtime.blob.BlobKey in project flink by apache.
the class TaskManagerTest method testSubmitAndExecuteTask.
@Test
public void testSubmitAndExecuteTask() throws IOException {
new JavaTestKit(system) {
{
ActorGateway taskManager = null;
final ActorGateway jobManager = TestingUtils.createForwardingActor(system, getTestActor(), Option.<String>empty());
try {
taskManager = TestingUtils.createTaskManager(system, jobManager, new Configuration(), true, false);
final ActorGateway tm = taskManager;
// handle the registration
new Within(d) {
@Override
protected void run() {
expectMsgClass(RegistrationMessages.RegisterTaskManager.class);
final InstanceID iid = new InstanceID();
assertEquals(tm.actor(), getLastSender());
tm.tell(new RegistrationMessages.AcknowledgeRegistration(iid, 12345), jobManager);
}
};
final JobID jid = new JobID();
final JobVertexID vid = new JobVertexID();
final ExecutionAttemptID eid = new ExecutionAttemptID();
final SerializedValue<ExecutionConfig> executionConfig = new SerializedValue<>(new ExecutionConfig());
final TaskDeploymentDescriptor tdd = createTaskDeploymentDescriptor(jid, "TestJob", vid, eid, executionConfig, "TestTask", 7, 2, 7, 0, new Configuration(), new Configuration(), TestInvokableCorrect.class.getName(), Collections.<ResultPartitionDeploymentDescriptor>emptyList(), Collections.<InputGateDeploymentDescriptor>emptyList(), new ArrayList<BlobKey>(), Collections.<URL>emptyList(), 0);
new Within(d) {
@Override
protected void run() {
tm.tell(new SubmitTask(tdd), jobManager);
// TaskManager should acknowledge the submission
// heartbeats may be interleaved
long deadline = System.currentTimeMillis() + 10000;
do {
Object message = receiveOne(d);
if (message.equals(Acknowledge.get())) {
break;
}
} while (System.currentTimeMillis() < deadline);
// task should have switched to running
Object toRunning = new TaskMessages.UpdateTaskExecutionState(new TaskExecutionState(jid, eid, ExecutionState.RUNNING));
// task should have switched to finished
Object toFinished = new TaskMessages.UpdateTaskExecutionState(new TaskExecutionState(jid, eid, ExecutionState.FINISHED));
deadline = System.currentTimeMillis() + 10000;
do {
Object message = receiveOne(d);
if (message.equals(toRunning)) {
break;
} else if (!(message instanceof TaskManagerMessages.Heartbeat)) {
fail("Unexpected message: " + message);
}
} while (System.currentTimeMillis() < deadline);
deadline = System.currentTimeMillis() + 10000;
do {
Object message = receiveOne(d);
if (message.equals(toFinished)) {
break;
} else if (!(message instanceof TaskManagerMessages.Heartbeat)) {
fail("Unexpected message: " + message);
}
} while (System.currentTimeMillis() < deadline);
}
};
} finally {
// shut down the actors
TestingUtils.stopActor(taskManager);
TestingUtils.stopActor(jobManager);
}
}
};
}
Aggregations