use of org.apache.ignite.internal.GridJobResultImpl in project ignite by apache.
the class GridTaskWorker method processMappedJobs.
/**
* @param jobs Map of jobs.
* @throws IgniteCheckedException Thrown in case of any error.
*/
private void processMappedJobs(Map<? extends ComputeJob, ClusterNode> jobs) throws IgniteCheckedException {
if (F.isEmpty(jobs))
return;
List<GridJobResultImpl> jobResList = new ArrayList<>(jobs.size());
Collection<ComputeJobSibling> sibs = new ArrayList<>(jobs.size());
// Map jobs to nodes for computation.
for (Map.Entry<? extends ComputeJob, ClusterNode> mappedJob : jobs.entrySet()) {
ComputeJob job = mappedJob.getKey();
ClusterNode node = mappedJob.getValue();
if (job == null)
throw new IgniteCheckedException("Job can not be null [mappedJob=" + mappedJob + ", ses=" + ses + ']');
if (node == null)
throw new IgniteCheckedException("Node can not be null [mappedJob=" + mappedJob + ", ses=" + ses + ']');
IgniteUuid jobId = IgniteUuid.fromUuid(ctx.localNodeId());
GridJobSiblingImpl sib = new GridJobSiblingImpl(ses.getId(), jobId, node.id(), ctx);
jobResList.add(new GridJobResultImpl(job, jobId, node, sib));
// Do not add siblings if result cache is disabled.
if (resCache)
sibs.add(sib);
recordJobEvent(EVT_JOB_MAPPED, jobId, node, null, "Job got mapped.");
}
synchronized (mux) {
if (state != State.WAITING)
throw new IgniteCheckedException("Task is not in waiting state [state=" + state + ", ses=" + ses + ']');
// Do not add siblings if result cache is disabled.
if (resCache)
ses.addJobSiblings(sibs);
if (jobRes == null)
jobRes = new HashMap<>();
// getting results while still sending out references.
for (GridJobResultImpl res : jobResList) {
if (jobRes.put(res.getJobContext().getJobId(), res) != null)
throw new IgniteCheckedException("Duplicate job ID for remote job found: " + res.getJobContext().getJobId());
res.setOccupied(true);
if (resCache && jobRes.size() > ctx.discovery().size() && jobRes.size() % SPLIT_WARN_THRESHOLD == 0)
LT.warn(log, "Number of jobs in task is too large for task: " + ses.getTaskName() + ". Consider reducing number of jobs or disabling job result cache with " + "@ComputeTaskNoResultCache annotation.");
}
}
ses.jobNodes(F.viewReadOnly(jobs.values(), F.node2id()));
evtLsnr.onJobsMapped(this);
// Set mapped flag.
ses.onMapped();
// Move local jobs to the end of the list, because
// they will be invoked in current thread that will hold other
// jobs.
int jobResSize = jobResList.size();
if (jobResSize > 1) {
UUID locId = ctx.discovery().localNode().id();
for (int i = 0; i < jobResSize; i++) {
UUID jobNodeId = jobResList.get(i).getNode().id();
if (jobNodeId.equals(locId) && i < jobResSize - 1) {
Collections.swap(jobResList, i, jobResSize - 1);
jobResSize--;
i--;
}
}
}
// Send out all remote mappedJobs.
for (GridJobResultImpl res : jobResList) {
evtLsnr.onJobSend(this, res.getSibling());
try {
sendRequest(res);
} finally {
// Open job for processing results.
synchronized (mux) {
res.setOccupied(false);
}
}
}
processDelayedResponses();
}
use of org.apache.ignite.internal.GridJobResultImpl in project ignite by apache.
the class GridTaskWorker method jobStatuses.
/**
* Collects statistics on jobs locally, only for those jobs that have
* already sent a response or are being executed locally.
*
* @return Job statistics for the task. Mapping: Job status -> count of jobs.
*/
Map<ComputeJobStatusEnum, Long> jobStatuses() {
List<GridJobResultImpl> jobResults = null;
synchronized (mux) {
if (jobRes != null)
jobResults = new ArrayList<>(jobRes.values());
}
// Jobs have not been mapped yet.
if (F.isEmpty(jobResults))
return emptyMap();
UUID locNodeId = ctx.localNodeId();
boolean getLocJobStatistics = false;
Map<ComputeJobStatusEnum, Long> res = new EnumMap<>(ComputeJobStatusEnum.class);
for (GridJobResultImpl jobResult : jobResults) {
if (jobResult.hasResponse()) {
ComputeJobStatusEnum jobStatus;
if (jobResult.isCancelled())
jobStatus = CANCELLED;
else if (jobResult.getException() != null)
jobStatus = FAILED;
else
jobStatus = FINISHED;
res.merge(jobStatus, 1L, Long::sum);
} else if (!getLocJobStatistics && locNodeId.equals(jobResult.getNode().id()))
getLocJobStatistics = true;
}
if (getLocJobStatistics) {
Map<ComputeJobStatusEnum, Long> jobStatuses = ctx.job().jobStatuses(getTaskSessionId());
for (Map.Entry<ComputeJobStatusEnum, Long> e : jobStatuses.entrySet()) res.merge(e.getKey(), e.getValue(), Long::sum);
}
return res;
}
use of org.apache.ignite.internal.GridJobResultImpl in project ignite by apache.
the class GridTaskWorker method onNodeLeft.
/**
* @param nodeId Node ID.
*/
void onNodeLeft(UUID nodeId) {
Collection<GridJobExecuteResponse> resList = null;
synchronized (mux) {
// First check if job cares about future responses.
if (state != State.WAITING)
return;
if (jobRes != null) {
for (GridJobResultImpl jr : jobRes.values()) {
if (!jr.hasResponse() && jr.getNode().id().equals(nodeId)) {
if (log.isDebugEnabled())
log.debug("Creating fake response because node left grid [job=" + jr.getJob() + ", nodeId=" + nodeId + ']');
// Artificial response in case if a job is waiting for a response from
// non-existent node.
GridJobExecuteResponse fakeRes = new GridJobExecuteResponse(nodeId, ses.getId(), jr.getJobContext().getJobId(), null, null, null, null, null, null, false, null);
fakeRes.setFakeException(new ClusterTopologyException("Node has left grid: " + nodeId));
if (resList == null)
resList = new ArrayList<>();
resList.add(fakeRes);
}
}
}
}
if (resList == null)
return;
// Simulate responses without holding synchronization.
for (GridJobExecuteResponse res : resList) {
if (log.isDebugEnabled())
log.debug("Simulating fake response from left node [res=" + res + ", nodeId=" + nodeId + ']');
onResponse(res);
}
}
use of org.apache.ignite.internal.GridJobResultImpl in project ignite by apache.
the class GridTaskWorker method onResponse.
/**
* @param msg Job execution response.
*/
void onResponse(GridJobExecuteResponse msg) {
assert msg != null;
if (fut.isDone()) {
if (log.isDebugEnabled())
log.debug("Ignoring job response since task has finished: " + msg);
return;
}
GridJobExecuteResponse res = msg;
while (res != null) {
GridJobResultImpl jobRes = null;
// Flag indicating whether occupied flag for
// job response was changed in this method apply.
boolean selfOccupied = false;
IgniteInternalFuture<?> affFut = null;
boolean waitForAffTop = false;
final GridJobExecuteResponse failoverRes = res;
try {
synchronized (mux) {
// then there is no point to proceed.
if (state != State.WAITING) {
if (log.isDebugEnabled())
log.debug("Ignoring response since task is already reducing or finishing [res=" + res + ", job=" + ses + ", state=" + state + ']');
return;
}
jobRes = this.jobRes.get(res.getJobId());
if (jobRes == null) {
if (log.isDebugEnabled())
U.warn(log, "Received response for unknown child job (was job presumed failed?): " + res);
res = delayedRess.poll();
// We can not return here because there can be more delayed messages in the queue.
continue;
}
// was created from discovery listener and when sending request failed.
if (jobRes.hasResponse()) {
if (log.isDebugEnabled())
log.debug("Received redundant response for a job (will ignore): " + res);
res = delayedRess.poll();
// We can not return here because there can be more delayed messages in the queue.
continue;
}
if (!jobRes.getNode().id().equals(res.getNodeId())) {
if (log.isDebugEnabled())
log.debug("Ignoring stale response as job was already resent to other node [res=" + res + ", jobRes=" + jobRes + ']');
// Prevent processing 2 responses for the same job simultaneously.
jobRes.setOccupied(true);
selfOccupied = true;
// We can not return here because there can be more delayed messages in the queue.
continue;
}
if (jobRes.isOccupied()) {
if (log.isDebugEnabled())
log.debug("Adding response to delayed queue (job is either being sent or processing " + "another response): " + res);
delayedRess.offer(res);
return;
}
if (lockRespProc) {
delayedRess.offer(res);
return;
}
lockRespProc = true;
selfOccupied = true;
// Prevent processing 2 responses for the same job simultaneously.
jobRes.setOccupied(true);
// We don't keep reference to job if results are not cached.
if (!resCache)
this.jobRes.remove(res.getJobId());
}
if (res.getFakeException() != null)
jobRes.onResponse(null, res.getFakeException(), null, false);
else {
ClassLoader clsLdr = dep.classLoader();
try {
boolean loc = ctx.localNodeId().equals(res.getNodeId()) && !ctx.config().isMarshalLocalJobs();
Object res0 = loc ? res.getJobResult() : U.unmarshal(marsh, res.getJobResultBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
IgniteException ex = loc ? res.getException() : U.<IgniteException>unmarshal(marsh, res.getExceptionBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
Map<Object, Object> attrs = loc ? res.getJobAttributes() : U.<Map<Object, Object>>unmarshal(marsh, res.getJobAttributesBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
jobRes.onResponse(res0, ex, attrs, res.isCancelled());
if (loc)
ctx.resource().invokeAnnotated(dep, jobRes.getJob(), ComputeJobAfterSend.class);
} catch (IgniteCheckedException e) {
if (log.isDebugEnabled())
U.error(log, "Error deserializing job response: " + res, e);
finishTask(null, e);
}
}
List<ComputeJobResult> results;
if (!resCache)
results = emptyList();
else {
synchronized (mux) {
results = getRemoteResults();
}
}
ComputeJobResultPolicy plc = result(jobRes, results);
if (plc == null) {
String errMsg = "Failed to obtain remote job result policy for result from ComputeTask.result(..) " + "method that returned null (will fail the whole task): " + jobRes;
finishTask(null, new IgniteCheckedException(errMsg));
return;
}
boolean retry = false;
synchronized (mux) {
// then there is no point to proceed.
if (state != State.WAITING) {
if (log.isDebugEnabled())
log.debug("Ignoring ComputeTask.result(..) value since task is already reducing or" + "finishing [res=" + res + ", job=" + ses + ", state=" + state + ']');
return;
}
if (res.retry()) {
// Retry is used only with affinity call / run.
assert affCacheIds != null;
retry = true;
mapTopVer = U.max(res.getRetryTopologyVersion(), ctx.cache().context().exchange().readyAffinityVersion());
affFut = ctx.cache().context().exchange().lastTopologyFuture();
if (affFut != null && !affFut.isDone()) {
waitForAffTop = true;
jobRes.resetResponse();
}
} else {
switch(plc) {
// Start reducing all results received so far.
case REDUCE:
{
state = State.REDUCING;
break;
}
// otherwise, reduce.
case WAIT:
{
assert results.size() <= this.jobRes.size();
// when both collections are empty.
if (results.size() == this.jobRes.size()) {
plc = ComputeJobResultPolicy.REDUCE;
// All results are received, proceed to reduce method.
state = State.REDUCING;
}
break;
}
case FAILOVER:
{
if (affCacheIds != null) {
mapTopVer = ctx.cache().context().exchange().readyAffinityVersion();
affFut = ctx.cache().context().exchange().lastTopologyFuture();
if (affFut == null || affFut.isDone()) {
affFut = null;
// Need asynchronosly fetch affinity if cache is not started on node .
if (affCacheName != null && ctx.cache().internalCache(affCacheName) == null) {
affFut = ctx.affinity().affinityCacheFuture(affCacheName, mapTopVer);
if (affFut.isDone())
affFut = null;
}
}
}
if (affFut != null) {
waitForAffTop = true;
jobRes.resetResponse();
} else if (!failover(res, jobRes, getTaskTopology()))
plc = null;
break;
}
}
}
}
// Outside of synchronization.
if (retry && !waitForAffTop) {
// Handle retry
retryAttemptCnt++;
final long wait = retryAttemptCnt * RETRY_DELAY_MS;
sendRetryRequest(wait, jobRes, res);
} else if (plc != null && !waitForAffTop && !retry) {
// Handle failover.
if (plc == FAILOVER)
sendFailoverRequest(jobRes);
else {
evtLsnr.onJobFinished(this, jobRes.getSibling());
if (plc == ComputeJobResultPolicy.REDUCE)
reduce(results);
}
}
} catch (IgniteCheckedException e) {
if (log.isDebugEnabled())
U.error(log, "Failed to obtain topology [ses=" + ses + ", err=" + e + ']', e);
finishTask(null, e);
waitForAffTop = false;
} finally {
// set in this method.
if (selfOccupied) {
assert jobRes != null;
synchronized (mux) {
jobRes.setOccupied(false);
lockRespProc = false;
}
// Process delayed responses if there are any.
res = delayedRess.poll();
}
}
if (waitForAffTop && affFut != null) {
affFut.listen(new IgniteInClosure<IgniteInternalFuture<?>>() {
@Override
public void apply(IgniteInternalFuture<?> fut0) {
ctx.closure().runLocalSafe(new GridPlainRunnable() {
@Override
public void run() {
onResponse(failoverRes);
}
}, false);
}
});
}
}
}
Aggregations