Search in sources :

Example 1 with GridJobExecuteResponse

use of org.apache.ignite.internal.GridJobExecuteResponse in project ignite by apache.

the class GridJobProcessor method handleException.

/**
 * Handles errors that happened prior to job creation.
 *
 * @param node Sender node.
 * @param req Job execution request.
 * @param ex Exception that happened.
 * @param endTime Job end time.
 */
private void handleException(ClusterNode node, GridJobExecuteRequest req, IgniteException ex, long endTime) {
    UUID locNodeId = ctx.localNodeId();
    ClusterNode sndNode = ctx.discovery().node(node.id());
    if (sndNode == null) {
        U.warn(log, "Failed to reply to sender node because it left grid [nodeId=" + node.id() + ", jobId=" + req.getJobId() + ']');
        if (ctx.event().isRecordable(EVT_JOB_FAILED)) {
            JobEvent evt = new JobEvent();
            evt.jobId(req.getJobId());
            evt.message("Job reply failed (original task node left grid): " + req.getJobId());
            evt.node(ctx.discovery().localNode());
            evt.taskName(req.getTaskName());
            evt.taskClassName(req.getTaskClassName());
            evt.taskSessionId(req.getSessionId());
            evt.type(EVT_JOB_FAILED);
            evt.taskNode(node);
            evt.taskSubjectId(securitySubjectId(ctx));
            // Record job reply failure.
            ctx.event().record(evt);
        }
        return;
    }
    try {
        boolean loc = ctx.localNodeId().equals(sndNode.id()) && !ctx.config().isMarshalLocalJobs();
        GridJobExecuteResponse jobRes = new GridJobExecuteResponse(locNodeId, req.getSessionId(), req.getJobId(), loc ? null : U.marshal(marsh, ex), ex, loc ? null : U.marshal(marsh, null), null, loc ? null : U.marshal(marsh, null), null, false, null);
        if (req.isSessionFullSupport()) {
            // Send response to designated job topic.
            // Always go through communication to preserve order,
            // if attributes are enabled.
            // Job response topic.
            Object topic = TOPIC_TASK.topic(req.getJobId(), locNodeId);
            long timeout = endTime - U.currentTimeMillis();
            if (timeout <= 0)
                // Ignore the actual timeout and send response anyway.
                timeout = 1;
            // Send response to designated job topic.
            // Always go through communication to preserve order.
            ctx.io().sendOrderedMessage(sndNode, topic, jobRes, req.isInternal() ? MANAGEMENT_POOL : SYSTEM_POOL, timeout, false);
        } else if (ctx.localNodeId().equals(sndNode.id()))
            ctx.task().processJobExecuteResponse(ctx.localNodeId(), jobRes);
        else
            // Send response to common topic as unordered message.
            ctx.io().sendToGridTopic(sndNode, TOPIC_TASK, jobRes, req.isInternal() ? MANAGEMENT_POOL : SYSTEM_POOL);
    } catch (IgniteCheckedException e) {
        // The only option here is to log, as we must assume that resending will fail too.
        if ((e instanceof ClusterTopologyCheckedException) || isDeadNode(node.id()))
            // Avoid stack trace for left nodes.
            U.error(log, "Failed to reply to sender node because it left grid [nodeId=" + node.id() + ", jobId=" + req.getJobId() + ']');
        else {
            assert sndNode != null;
            U.error(log, "Error sending reply for job [nodeId=" + sndNode.id() + ", jobId=" + req.getJobId() + ']', e);
        }
        if (ctx.event().isRecordable(EVT_JOB_FAILED)) {
            JobEvent evt = new JobEvent();
            evt.jobId(req.getJobId());
            evt.message("Failed to send reply for job: " + req.getJobId());
            evt.node(ctx.discovery().localNode());
            evt.taskName(req.getTaskName());
            evt.taskClassName(req.getTaskClassName());
            evt.taskSessionId(req.getSessionId());
            evt.type(EVT_JOB_FAILED);
            evt.taskNode(node);
            evt.taskSubjectId(securitySubjectId(ctx));
            // Record job reply failure.
            ctx.event().record(evt);
        }
    }
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) JobEvent(org.apache.ignite.events.JobEvent) GridJobExecuteResponse(org.apache.ignite.internal.GridJobExecuteResponse) UUID(java.util.UUID) ClusterTopologyCheckedException(org.apache.ignite.internal.cluster.ClusterTopologyCheckedException)

Example 2 with GridJobExecuteResponse

use of org.apache.ignite.internal.GridJobExecuteResponse in project ignite by apache.

the class GridJobWorker method finishJob.

/**
 * @param res Resuilt.
 * @param ex Exception
 * @param sndReply If {@code true}, reply will be sent.
 * @param retry If {@code true}, retry response will be sent.
 */
void finishJob(@Nullable Object res, @Nullable IgniteException ex, boolean sndReply, boolean retry) {
    // Avoid finishing a job more than once from different threads.
    if (!finishing.compareAndSet(false, true))
        return;
    // Do not send reply if job has been cancelled from system.
    if (sndReply)
        sndReply = !sysCancelled;
    // We should save message ID here since listener callback will reset sequence.
    ClusterNode sndNode = ctx.discovery().node(taskNode.id());
    finishTime = U.currentTimeMillis();
    Collection<IgniteBiTuple<Integer, String>> evts = null;
    try {
        if (ses.isFullSupport())
            evtLsnr.onBeforeJobResponseSent(this);
        // Send response back only if job has not timed out.
        if (!isTimedOut()) {
            if (sndReply) {
                if (sndNode == null) {
                    onMasterNodeLeft();
                    U.warn(log, "Failed to reply to sender node because it left grid [nodeId=" + taskNode.id() + ", ses=" + ses + ", jobId=" + ses.getJobId() + ", job=" + job + ']');
                    status = FAILED;
                    // Record job reply failure.
                    if (!internal && ctx.event().isRecordable(EVT_JOB_FAILED))
                        evts = addEvent(evts, EVT_JOB_FAILED, "Job reply failed (task node left grid): " + job);
                } else {
                    try {
                        byte[] resBytes = null;
                        byte[] exBytes = null;
                        byte[] attrBytes = null;
                        boolean loc = ctx.localNodeId().equals(sndNode.id()) && !ctx.config().isMarshalLocalJobs();
                        Map<Object, Object> attrs = jobCtx.getAttributes();
                        // Try to serialize response, and if exception - return to client.
                        if (!loc) {
                            try {
                                resBytes = U.marshal(marsh, res);
                            } catch (IgniteCheckedException e) {
                                resBytes = U.marshal(marsh, null);
                                if (ex != null)
                                    ex.addSuppressed(e);
                                else
                                    ex = U.convertException(e);
                                logError("Failed to serialize job response [nodeId=" + taskNode.id() + ", ses=" + ses + ", jobId=" + ses.getJobId() + ", job=" + job + ", resCls=" + (res == null ? null : res.getClass()) + ']', e);
                            }
                            try {
                                attrBytes = U.marshal(marsh, attrs);
                            } catch (IgniteCheckedException e) {
                                attrBytes = U.marshal(marsh, Collections.emptyMap());
                                if (ex != null)
                                    ex.addSuppressed(e);
                                else
                                    ex = U.convertException(e);
                                logError("Failed to serialize job attributes [nodeId=" + taskNode.id() + ", ses=" + ses + ", jobId=" + ses.getJobId() + ", job=" + job + ", attrs=" + attrs + ']', e);
                            }
                            try {
                                exBytes = U.marshal(marsh, ex);
                            } catch (IgniteCheckedException e) {
                                String msg = "Failed to serialize job exception [nodeId=" + taskNode.id() + ", ses=" + ses + ", jobId=" + ses.getJobId() + ", job=" + job + ", msg=\"" + e.getMessage() + "\"]";
                                ex = new IgniteException(msg);
                                logError(msg, e);
                                exBytes = U.marshal(marsh, ex);
                            }
                        }
                        if (ex != null) {
                            status = FAILED;
                            if (isStarted) {
                                // Job failed.
                                if (!internal && ctx.event().isRecordable(EVT_JOB_FAILED))
                                    evts = addEvent(evts, EVT_JOB_FAILED, "Job failed due to exception [ex=" + ex + ", job=" + job + ']');
                            } else if (!internal && ctx.event().isRecordable(EVT_JOB_REJECTED))
                                evts = addEvent(evts, EVT_JOB_REJECTED, "Job has not been started " + "[ex=" + ex + ", job=" + job + ']');
                        } else {
                            status = FINISHED;
                            if (!internal && ctx.event().isRecordable(EVT_JOB_FINISHED))
                                evts = addEvent(evts, EVT_JOB_FINISHED, /*no message for success. */
                                null);
                        }
                        GridJobExecuteResponse jobRes = new GridJobExecuteResponse(ctx.localNodeId(), ses.getId(), ses.getJobId(), exBytes, loc ? ex : null, resBytes, loc ? res : null, attrBytes, loc ? attrs : null, isCancelled(), retry ? ctx.cache().context().exchange().readyAffinityVersion() : null);
                        long timeout = ses.getEndTime() - U.currentTimeMillis();
                        if (timeout <= 0)
                            // Ignore the actual timeout and send response anyway.
                            timeout = 1;
                        if (ses.isFullSupport()) {
                            // Send response to designated job topic.
                            // Always go through communication to preserve order,
                            // if attributes are enabled.
                            ctx.io().sendOrderedMessage(sndNode, taskTopic, jobRes, internal ? MANAGEMENT_POOL : SYSTEM_POOL, timeout, false);
                        } else if (ctx.localNodeId().equals(sndNode.id()))
                            ctx.task().processJobExecuteResponse(ctx.localNodeId(), jobRes);
                        else
                            // Send response to common topic as unordered message.
                            ctx.io().sendToGridTopic(sndNode, TOPIC_TASK, jobRes, internal ? MANAGEMENT_POOL : SYSTEM_POOL);
                    } catch (IgniteCheckedException e) {
                        // Log and invoke the master-leave callback.
                        if ((e instanceof ClusterTopologyCheckedException) || isDeadNode(taskNode.id())) {
                            onMasterNodeLeft();
                            // Avoid stack trace for left nodes.
                            U.warn(log, "Failed to reply to sender node because it left grid " + "[nodeId=" + taskNode.id() + ", jobId=" + ses.getJobId() + ", ses=" + ses + ", job=" + job + ']');
                        } else
                            logError("Error sending reply for job [nodeId=" + sndNode.id() + ", jobId=" + ses.getJobId() + ", ses=" + ses + ", job=" + job + ']', e);
                        if (!internal && ctx.event().isRecordable(EVT_JOB_FAILED))
                            evts = addEvent(evts, EVT_JOB_FAILED, "Failed to send reply for job [nodeId=" + taskNode.id() + ", job=" + job + ']');
                    }// it gets thrown for some reason.
                     catch (Exception e) {
                        String msg = "Failed to send reply for job [nodeId=" + taskNode.id() + ", job=" + job + ']';
                        logError(msg, e);
                        if (!internal && ctx.event().isRecordable(EVT_JOB_FAILED))
                            evts = addEvent(evts, EVT_JOB_FAILED, msg);
                    }
                }
            } else {
                if (ex != null) {
                    status = FAILED;
                    if (isStarted) {
                        if (!internal && ctx.event().isRecordable(EVT_JOB_FAILED))
                            evts = addEvent(evts, EVT_JOB_FAILED, "Job failed due to exception [ex=" + ex + ", job=" + job + ']');
                    } else if (!internal && ctx.event().isRecordable(EVT_JOB_REJECTED))
                        evts = addEvent(evts, EVT_JOB_REJECTED, "Job has not been started [ex=" + ex + ", job=" + job + ']');
                } else {
                    status = FINISHED;
                    if (!internal && ctx.event().isRecordable(EVT_JOB_FINISHED))
                        evts = addEvent(evts, EVT_JOB_FINISHED, /*no message for success. */
                        null);
                }
            }
        } else {
            // Job timed out.
            status = FAILED;
            if (!internal && ctx.event().isRecordable(EVT_JOB_FAILED))
                evts = addEvent(evts, EVT_JOB_FAILED, "Job failed due to timeout: " + job);
        }
    } finally {
        if (evts != null) {
            for (IgniteBiTuple<Integer, String> t : evts) recordEvent(t.get1(), t.get2());
        }
        // Listener callback.
        evtLsnr.onJobFinished(this);
    }
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) IgniteBiTuple(org.apache.ignite.lang.IgniteBiTuple) ComputeExecutionRejectedException(org.apache.ignite.compute.ComputeExecutionRejectedException) GridServiceNotFoundException(org.apache.ignite.internal.processors.service.GridServiceNotFoundException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteException(org.apache.ignite.IgniteException) NodeStoppingException(org.apache.ignite.internal.NodeStoppingException) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) ClusterTopologyCheckedException(org.apache.ignite.internal.cluster.ClusterTopologyCheckedException) ComputeUserUndeclaredException(org.apache.ignite.compute.ComputeUserUndeclaredException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteException(org.apache.ignite.IgniteException) GridJobExecuteResponse(org.apache.ignite.internal.GridJobExecuteResponse) GridTimeoutObject(org.apache.ignite.internal.processors.timeout.GridTimeoutObject) ClusterTopologyCheckedException(org.apache.ignite.internal.cluster.ClusterTopologyCheckedException)

Example 3 with GridJobExecuteResponse

use of org.apache.ignite.internal.GridJobExecuteResponse in project ignite by apache.

the class GridTaskWorker method sendRequest.

/**
 * @param res Job result.
 */
private void sendRequest(ComputeJobResult res) {
    assert res != null;
    GridJobExecuteRequest req = null;
    ClusterNode node = res.getNode();
    try {
        ClusterNode curNode = ctx.discovery().node(node.id());
        // thrown in case of send failure.
        if (curNode == null) {
            if (log.isDebugEnabled()) {
                U.warn(log, "Failed to send job request because remote node left grid (if fail-over is enabled, " + "will attempt fail-over to another node) [node=" + node + ", taskName=" + ses.getTaskName() + ", taskSesId=" + ses.getId() + ", jobSesId=" + res.getJobContext().getJobId() + ']');
            }
            ctx.resource().invokeAnnotated(dep, res.getJob(), ComputeJobAfterSend.class);
            GridJobExecuteResponse fakeRes = new GridJobExecuteResponse(node.id(), ses.getId(), res.getJobContext().getJobId(), null, null, null, null, null, null, false, null);
            fakeRes.setFakeException(new ClusterTopologyException("Failed to send job due to node failure: " + node));
            onResponse(fakeRes);
        } else {
            long timeout = ses.getEndTime() == Long.MAX_VALUE ? Long.MAX_VALUE : ses.getEndTime() - U.currentTimeMillis();
            if (timeout > 0) {
                boolean loc = node.id().equals(ctx.discovery().localNode().id()) && !ctx.config().isMarshalLocalJobs();
                Map<Object, Object> sesAttrs = ses.isFullSupport() ? ses.getAttributes() : null;
                Map<? extends Serializable, ? extends Serializable> jobAttrs = (Map<? extends Serializable, ? extends Serializable>) res.getJobContext().getAttributes();
                boolean forceLocDep = internal || !ctx.deploy().enabled();
                try {
                    MarshallerUtils.jobReceiverVersion(node.version());
                    req = new GridJobExecuteRequest(ses.getId(), res.getJobContext().getJobId(), ses.getTaskName(), ses.getUserVersion(), ses.getTaskClassName(), loc ? null : U.marshal(marsh, res.getJob()), loc ? res.getJob() : null, ses.getStartTime(), timeout, ses.getTopology(), loc ? ses.getTopologyPredicate() : null, loc ? null : U.marshal(marsh, ses.getTopologyPredicate()), loc ? null : U.marshal(marsh, ses.getJobSiblings()), loc ? ses.getJobSiblings() : null, loc ? null : U.marshal(marsh, sesAttrs), loc ? sesAttrs : null, loc ? null : U.marshal(marsh, jobAttrs), loc ? jobAttrs : null, ses.getCheckpointSpi(), dep.classLoaderId(), dep.deployMode(), continuous, dep.participants(), forceLocDep, ses.isFullSupport(), internal, affCacheIds, affPartId, mapTopVer, ses.executorName());
                } finally {
                    MarshallerUtils.jobReceiverVersion(null);
                }
                if (loc)
                    ctx.job().processJobExecuteRequest(ctx.discovery().localNode(), req);
                else {
                    byte plc;
                    if (internal)
                        plc = MANAGEMENT_POOL;
                    else {
                        Byte ctxPlc = getThreadContext(TC_IO_POLICY);
                        if (ctxPlc != null)
                            plc = ctxPlc;
                        else
                            plc = PUBLIC_POOL;
                    }
                    // Send job execution request.
                    ctx.io().sendToGridTopic(node, TOPIC_JOB, req, plc);
                    if (log.isDebugEnabled())
                        log.debug("Sent job request [req=" + req + ", node=" + node + ']');
                }
                if (!loc)
                    ctx.resource().invokeAnnotated(dep, res.getJob(), ComputeJobAfterSend.class);
            } else
                U.warn(log, "Job timed out prior to sending job execution request: " + res.getJob());
        }
    } catch (IgniteCheckedException e) {
        IgniteException fakeErr = null;
        try {
            boolean deadNode = e instanceof ClusterTopologyCheckedException || isDeadNode(res.getNode().id());
            // Avoid stack trace if node has left grid.
            if (deadNode) {
                if (log.isDebugEnabled()) {
                    U.warn(log, "Failed to send job request because remote node left grid (if failover is enabled, " + "will attempt fail-over to another node) [node=" + node + ", taskName=" + ses.getTaskName() + ", taskSesId=" + ses.getId() + ", jobSesId=" + res.getJobContext().getJobId() + ']');
                }
                fakeErr = new ClusterTopologyException("Failed to send job due to node failure: " + node, e);
            } else if (log.isDebugEnabled())
                U.error(log, "Failed to send job request: " + req, e);
        } catch (IgniteClientDisconnectedCheckedException e0) {
            if (log.isDebugEnabled())
                log.debug("Failed to send job request, client disconnected [node=" + node + ", taskName=" + ses.getTaskName() + ", taskSesId=" + ses.getId() + ", jobSesId=" + res.getJobContext().getJobId() + ']');
            fakeErr = U.convertException(e0);
        }
        GridJobExecuteResponse fakeRes = new GridJobExecuteResponse(node.id(), ses.getId(), res.getJobContext().getJobId(), null, null, null, null, null, null, false, null);
        if (fakeErr == null)
            fakeErr = U.convertException(e);
        fakeRes.setFakeException(fakeErr);
        onResponse(fakeRes);
    }
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) Serializable(java.io.Serializable) GridJobExecuteRequest(org.apache.ignite.internal.GridJobExecuteRequest) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) GridJobExecuteResponse(org.apache.ignite.internal.GridJobExecuteResponse) ComputeJobAfterSend(org.apache.ignite.compute.ComputeJobAfterSend) IgniteException(org.apache.ignite.IgniteException) IgniteClientDisconnectedCheckedException(org.apache.ignite.internal.IgniteClientDisconnectedCheckedException) ClusterTopologyException(org.apache.ignite.cluster.ClusterTopologyException) GridTimeoutObject(org.apache.ignite.internal.processors.timeout.GridTimeoutObject) Map(java.util.Map) EnumMap(java.util.EnumMap) HashMap(java.util.HashMap) Collections.emptyMap(java.util.Collections.emptyMap) ClusterTopologyCheckedException(org.apache.ignite.internal.cluster.ClusterTopologyCheckedException)

Example 4 with GridJobExecuteResponse

use of org.apache.ignite.internal.GridJobExecuteResponse in project ignite by apache.

the class GridTaskWorker method onNodeLeft.

/**
 * @param nodeId Node ID.
 */
void onNodeLeft(UUID nodeId) {
    Collection<GridJobExecuteResponse> resList = null;
    synchronized (mux) {
        // First check if job cares about future responses.
        if (state != State.WAITING)
            return;
        if (jobRes != null) {
            for (GridJobResultImpl jr : jobRes.values()) {
                if (!jr.hasResponse() && jr.getNode().id().equals(nodeId)) {
                    if (log.isDebugEnabled())
                        log.debug("Creating fake response because node left grid [job=" + jr.getJob() + ", nodeId=" + nodeId + ']');
                    // Artificial response in case if a job is waiting for a response from
                    // non-existent node.
                    GridJobExecuteResponse fakeRes = new GridJobExecuteResponse(nodeId, ses.getId(), jr.getJobContext().getJobId(), null, null, null, null, null, null, false, null);
                    fakeRes.setFakeException(new ClusterTopologyException("Node has left grid: " + nodeId));
                    if (resList == null)
                        resList = new ArrayList<>();
                    resList.add(fakeRes);
                }
            }
        }
    }
    if (resList == null)
        return;
    // Simulate responses without holding synchronization.
    for (GridJobExecuteResponse res : resList) {
        if (log.isDebugEnabled())
            log.debug("Simulating fake response from left node [res=" + res + ", nodeId=" + nodeId + ']');
        onResponse(res);
    }
}
Also used : GridJobExecuteResponse(org.apache.ignite.internal.GridJobExecuteResponse) GridJobResultImpl(org.apache.ignite.internal.GridJobResultImpl) ArrayList(java.util.ArrayList) ClusterTopologyException(org.apache.ignite.cluster.ClusterTopologyException)

Example 5 with GridJobExecuteResponse

use of org.apache.ignite.internal.GridJobExecuteResponse in project ignite by apache.

the class GridTaskWorker method onResponse.

/**
 * @param msg Job execution response.
 */
void onResponse(GridJobExecuteResponse msg) {
    assert msg != null;
    if (fut.isDone()) {
        if (log.isDebugEnabled())
            log.debug("Ignoring job response since task has finished: " + msg);
        return;
    }
    GridJobExecuteResponse res = msg;
    while (res != null) {
        GridJobResultImpl jobRes = null;
        // Flag indicating whether occupied flag for
        // job response was changed in this method apply.
        boolean selfOccupied = false;
        IgniteInternalFuture<?> affFut = null;
        boolean waitForAffTop = false;
        final GridJobExecuteResponse failoverRes = res;
        try {
            synchronized (mux) {
                // then there is no point to proceed.
                if (state != State.WAITING) {
                    if (log.isDebugEnabled())
                        log.debug("Ignoring response since task is already reducing or finishing [res=" + res + ", job=" + ses + ", state=" + state + ']');
                    return;
                }
                jobRes = this.jobRes.get(res.getJobId());
                if (jobRes == null) {
                    if (log.isDebugEnabled())
                        U.warn(log, "Received response for unknown child job (was job presumed failed?): " + res);
                    res = delayedRess.poll();
                    // We can not return here because there can be more delayed messages in the queue.
                    continue;
                }
                // was created from discovery listener and when sending request failed.
                if (jobRes.hasResponse()) {
                    if (log.isDebugEnabled())
                        log.debug("Received redundant response for a job (will ignore): " + res);
                    res = delayedRess.poll();
                    // We can not return here because there can be more delayed messages in the queue.
                    continue;
                }
                if (!jobRes.getNode().id().equals(res.getNodeId())) {
                    if (log.isDebugEnabled())
                        log.debug("Ignoring stale response as job was already resent to other node [res=" + res + ", jobRes=" + jobRes + ']');
                    // Prevent processing 2 responses for the same job simultaneously.
                    jobRes.setOccupied(true);
                    selfOccupied = true;
                    // We can not return here because there can be more delayed messages in the queue.
                    continue;
                }
                if (jobRes.isOccupied()) {
                    if (log.isDebugEnabled())
                        log.debug("Adding response to delayed queue (job is either being sent or processing " + "another response): " + res);
                    delayedRess.offer(res);
                    return;
                }
                if (lockRespProc) {
                    delayedRess.offer(res);
                    return;
                }
                lockRespProc = true;
                selfOccupied = true;
                // Prevent processing 2 responses for the same job simultaneously.
                jobRes.setOccupied(true);
                // We don't keep reference to job if results are not cached.
                if (!resCache)
                    this.jobRes.remove(res.getJobId());
            }
            if (res.getFakeException() != null)
                jobRes.onResponse(null, res.getFakeException(), null, false);
            else {
                ClassLoader clsLdr = dep.classLoader();
                try {
                    boolean loc = ctx.localNodeId().equals(res.getNodeId()) && !ctx.config().isMarshalLocalJobs();
                    Object res0 = loc ? res.getJobResult() : U.unmarshal(marsh, res.getJobResultBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
                    IgniteException ex = loc ? res.getException() : U.<IgniteException>unmarshal(marsh, res.getExceptionBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
                    Map<Object, Object> attrs = loc ? res.getJobAttributes() : U.<Map<Object, Object>>unmarshal(marsh, res.getJobAttributesBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
                    jobRes.onResponse(res0, ex, attrs, res.isCancelled());
                    if (loc)
                        ctx.resource().invokeAnnotated(dep, jobRes.getJob(), ComputeJobAfterSend.class);
                } catch (IgniteCheckedException e) {
                    if (log.isDebugEnabled())
                        U.error(log, "Error deserializing job response: " + res, e);
                    finishTask(null, e);
                }
            }
            List<ComputeJobResult> results;
            if (!resCache)
                results = emptyList();
            else {
                synchronized (mux) {
                    results = getRemoteResults();
                }
            }
            ComputeJobResultPolicy plc = result(jobRes, results);
            if (plc == null) {
                String errMsg = "Failed to obtain remote job result policy for result from ComputeTask.result(..) " + "method that returned null (will fail the whole task): " + jobRes;
                finishTask(null, new IgniteCheckedException(errMsg));
                return;
            }
            boolean retry = false;
            synchronized (mux) {
                // then there is no point to proceed.
                if (state != State.WAITING) {
                    if (log.isDebugEnabled())
                        log.debug("Ignoring ComputeTask.result(..) value since task is already reducing or" + "finishing [res=" + res + ", job=" + ses + ", state=" + state + ']');
                    return;
                }
                if (res.retry()) {
                    // Retry is used only with affinity call / run.
                    assert affCacheIds != null;
                    retry = true;
                    mapTopVer = U.max(res.getRetryTopologyVersion(), ctx.cache().context().exchange().readyAffinityVersion());
                    affFut = ctx.cache().context().exchange().lastTopologyFuture();
                    if (affFut != null && !affFut.isDone()) {
                        waitForAffTop = true;
                        jobRes.resetResponse();
                    }
                } else {
                    switch(plc) {
                        // Start reducing all results received so far.
                        case REDUCE:
                            {
                                state = State.REDUCING;
                                break;
                            }
                        // otherwise, reduce.
                        case WAIT:
                            {
                                assert results.size() <= this.jobRes.size();
                                // when both collections are empty.
                                if (results.size() == this.jobRes.size()) {
                                    plc = ComputeJobResultPolicy.REDUCE;
                                    // All results are received, proceed to reduce method.
                                    state = State.REDUCING;
                                }
                                break;
                            }
                        case FAILOVER:
                            {
                                if (affCacheIds != null) {
                                    mapTopVer = ctx.cache().context().exchange().readyAffinityVersion();
                                    affFut = ctx.cache().context().exchange().lastTopologyFuture();
                                    if (affFut == null || affFut.isDone()) {
                                        affFut = null;
                                        // Need asynchronosly fetch affinity if cache is not started on node .
                                        if (affCacheName != null && ctx.cache().internalCache(affCacheName) == null) {
                                            affFut = ctx.affinity().affinityCacheFuture(affCacheName, mapTopVer);
                                            if (affFut.isDone())
                                                affFut = null;
                                        }
                                    }
                                }
                                if (affFut != null) {
                                    waitForAffTop = true;
                                    jobRes.resetResponse();
                                } else if (!failover(res, jobRes, getTaskTopology()))
                                    plc = null;
                                break;
                            }
                    }
                }
            }
            // Outside of synchronization.
            if (retry && !waitForAffTop) {
                // Handle retry
                retryAttemptCnt++;
                final long wait = retryAttemptCnt * RETRY_DELAY_MS;
                sendRetryRequest(wait, jobRes, res);
            } else if (plc != null && !waitForAffTop && !retry) {
                // Handle failover.
                if (plc == FAILOVER)
                    sendFailoverRequest(jobRes);
                else {
                    evtLsnr.onJobFinished(this, jobRes.getSibling());
                    if (plc == ComputeJobResultPolicy.REDUCE)
                        reduce(results);
                }
            }
        } catch (IgniteCheckedException e) {
            if (log.isDebugEnabled())
                U.error(log, "Failed to obtain topology [ses=" + ses + ", err=" + e + ']', e);
            finishTask(null, e);
            waitForAffTop = false;
        } finally {
            // set in this method.
            if (selfOccupied) {
                assert jobRes != null;
                synchronized (mux) {
                    jobRes.setOccupied(false);
                    lockRespProc = false;
                }
                // Process delayed responses if there are any.
                res = delayedRess.poll();
            }
        }
        if (waitForAffTop && affFut != null) {
            affFut.listen(new IgniteInClosure<IgniteInternalFuture<?>>() {

                @Override
                public void apply(IgniteInternalFuture<?> fut0) {
                    ctx.closure().runLocalSafe(new GridPlainRunnable() {

                        @Override
                        public void run() {
                            onResponse(failoverRes);
                        }
                    }, false);
                }
            });
        }
    }
}
Also used : GridJobResultImpl(org.apache.ignite.internal.GridJobResultImpl) IgniteInternalFuture(org.apache.ignite.internal.IgniteInternalFuture) GridPlainRunnable(org.apache.ignite.internal.util.lang.GridPlainRunnable) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) ComputeJobResultPolicy(org.apache.ignite.compute.ComputeJobResultPolicy) GridJobExecuteResponse(org.apache.ignite.internal.GridJobExecuteResponse) IgniteException(org.apache.ignite.IgniteException) ComputeJobAfterSend(org.apache.ignite.compute.ComputeJobAfterSend) GridTimeoutObject(org.apache.ignite.internal.processors.timeout.GridTimeoutObject) ComputeJobResult(org.apache.ignite.compute.ComputeJobResult)

Aggregations

GridJobExecuteResponse (org.apache.ignite.internal.GridJobExecuteResponse)5 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)4 IgniteException (org.apache.ignite.IgniteException)3 ClusterNode (org.apache.ignite.cluster.ClusterNode)3 ClusterTopologyCheckedException (org.apache.ignite.internal.cluster.ClusterTopologyCheckedException)3 GridTimeoutObject (org.apache.ignite.internal.processors.timeout.GridTimeoutObject)3 ClusterTopologyException (org.apache.ignite.cluster.ClusterTopologyException)2 ComputeJobAfterSend (org.apache.ignite.compute.ComputeJobAfterSend)2 GridJobResultImpl (org.apache.ignite.internal.GridJobResultImpl)2 Serializable (java.io.Serializable)1 ArrayList (java.util.ArrayList)1 Collections.emptyMap (java.util.Collections.emptyMap)1 EnumMap (java.util.EnumMap)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 UUID (java.util.UUID)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 ComputeExecutionRejectedException (org.apache.ignite.compute.ComputeExecutionRejectedException)1 ComputeJobResult (org.apache.ignite.compute.ComputeJobResult)1 ComputeJobResultPolicy (org.apache.ignite.compute.ComputeJobResultPolicy)1