use of org.apache.ignite.internal.GridJobSiblingsRequest in project ignite by apache.
the class GridJobProcessor method requestJobSiblings.
/**
* @param ses Session.
* @return Siblings.
* @throws IgniteCheckedException If failed.
*/
public Collection<ComputeJobSibling> requestJobSiblings(final ComputeTaskSession ses) throws IgniteCheckedException {
assert ses != null;
final UUID taskNodeId = ses.getTaskNodeId();
ClusterNode taskNode = ctx.discovery().node(taskNodeId);
if (taskNode == null)
throw new IgniteCheckedException("Node that originated task execution has left grid: " + taskNodeId);
// Tuple: error message-response.
final IgniteBiTuple<String, GridJobSiblingsResponse> t = new IgniteBiTuple<>();
final Lock lock = new ReentrantLock();
final Condition cond = lock.newCondition();
GridMessageListener msgLsnr = new GridMessageListener() {
@Override
public void onMessage(UUID nodeId, Object msg) {
String err = null;
GridJobSiblingsResponse res = null;
if (!(msg instanceof GridJobSiblingsResponse))
err = "Received unexpected message: " + msg;
else if (!nodeId.equals(taskNodeId))
err = "Received job siblings response from unexpected node [taskNodeId=" + taskNodeId + ", nodeId=" + nodeId + ']';
else {
// Sender and message type are fine.
res = (GridJobSiblingsResponse) msg;
if (res.jobSiblings() == null) {
try {
res.unmarshalSiblings(marsh);
} catch (IgniteCheckedException e) {
U.error(log, "Failed to unmarshal job siblings.", e);
err = e.getMessage();
}
}
}
lock.lock();
try {
if (t.isEmpty()) {
t.set(err, res);
cond.signalAll();
}
} finally {
lock.unlock();
}
}
};
GridLocalEventListener discoLsnr = new GridLocalEventListener() {
@Override
public void onEvent(Event evt) {
assert evt instanceof DiscoveryEvent && (evt.type() == EVT_NODE_FAILED || evt.type() == EVT_NODE_LEFT) : "Unexpected event: " + evt;
DiscoveryEvent discoEvt = (DiscoveryEvent) evt;
if (taskNodeId.equals(discoEvt.eventNode().id())) {
lock.lock();
try {
if (t.isEmpty()) {
t.set("Node that originated task execution has left grid: " + taskNodeId, null);
cond.signalAll();
}
} finally {
lock.unlock();
}
}
}
};
boolean loc = ctx.localNodeId().equals(taskNodeId);
// 1. Create unique topic name.
Object topic = TOPIC_JOB_SIBLINGS.topic(ses.getId(), topicIdGen.getAndIncrement());
try {
// 2. Register listener.
ctx.io().addMessageListener(topic, msgLsnr);
// 3. Send message.
ctx.io().sendToGridTopic(taskNode, TOPIC_JOB_SIBLINGS, new GridJobSiblingsRequest(ses.getId(), loc ? topic : null, loc ? null : U.marshal(marsh, topic)), SYSTEM_POOL);
// 4. Listen to discovery events.
ctx.event().addLocalEventListener(discoLsnr, EVT_NODE_FAILED, EVT_NODE_LEFT);
// 5. Check whether node has left before disco listener has been installed.
taskNode = ctx.discovery().node(taskNodeId);
if (taskNode == null)
throw new IgniteCheckedException("Node that originated task execution has left grid: " + taskNodeId);
// 6. Wait for result.
lock.lock();
try {
long netTimeout = ctx.config().getNetworkTimeout();
if (t.isEmpty())
cond.await(netTimeout, MILLISECONDS);
if (t.isEmpty())
throw new IgniteCheckedException("Timed out waiting for job siblings (consider increasing" + "'networkTimeout' configuration property) [ses=" + ses + ", netTimeout=" + netTimeout + ']');
// Error is set?
if (t.get1() != null)
throw new IgniteCheckedException(t.get1());
else
// Return result
return t.get2().jobSiblings();
} catch (InterruptedException e) {
throw new IgniteCheckedException("Interrupted while waiting for job siblings response: " + ses, e);
} finally {
lock.unlock();
}
} finally {
ctx.io().removeMessageListener(topic, msgLsnr);
ctx.event().removeLocalEventListener(discoLsnr);
}
}
Aggregations