Search in sources :

Example 1 with GridJobResultImpl

use of org.apache.ignite.internal.GridJobResultImpl in project ignite by apache.

the class GridTaskWorker method processMappedJobs.

/**
 * @param jobs Map of jobs.
 * @throws IgniteCheckedException Thrown in case of any error.
 */
private void processMappedJobs(Map<? extends ComputeJob, ClusterNode> jobs) throws IgniteCheckedException {
    if (F.isEmpty(jobs))
        return;
    List<GridJobResultImpl> jobResList = new ArrayList<>(jobs.size());
    Collection<ComputeJobSibling> sibs = new ArrayList<>(jobs.size());
    // Map jobs to nodes for computation.
    for (Map.Entry<? extends ComputeJob, ClusterNode> mappedJob : jobs.entrySet()) {
        ComputeJob job = mappedJob.getKey();
        ClusterNode node = mappedJob.getValue();
        if (job == null)
            throw new IgniteCheckedException("Job can not be null [mappedJob=" + mappedJob + ", ses=" + ses + ']');
        if (node == null)
            throw new IgniteCheckedException("Node can not be null [mappedJob=" + mappedJob + ", ses=" + ses + ']');
        IgniteUuid jobId = IgniteUuid.fromUuid(ctx.localNodeId());
        GridJobSiblingImpl sib = new GridJobSiblingImpl(ses.getId(), jobId, node.id(), ctx);
        jobResList.add(new GridJobResultImpl(job, jobId, node, sib));
        // Do not add siblings if result cache is disabled.
        if (resCache)
            sibs.add(sib);
        recordJobEvent(EVT_JOB_MAPPED, jobId, node, null, "Job got mapped.");
    }
    synchronized (mux) {
        if (state != State.WAITING)
            throw new IgniteCheckedException("Task is not in waiting state [state=" + state + ", ses=" + ses + ']');
        // Do not add siblings if result cache is disabled.
        if (resCache)
            ses.addJobSiblings(sibs);
        if (jobRes == null)
            jobRes = new HashMap<>();
        // getting results while still sending out references.
        for (GridJobResultImpl res : jobResList) {
            if (jobRes.put(res.getJobContext().getJobId(), res) != null)
                throw new IgniteCheckedException("Duplicate job ID for remote job found: " + res.getJobContext().getJobId());
            res.setOccupied(true);
            if (resCache && jobRes.size() > ctx.discovery().size() && jobRes.size() % SPLIT_WARN_THRESHOLD == 0)
                LT.warn(log, "Number of jobs in task is too large for task: " + ses.getTaskName() + ". Consider reducing number of jobs or disabling job result cache with " + "@ComputeTaskNoResultCache annotation.");
        }
    }
    ses.jobNodes(F.viewReadOnly(jobs.values(), F.node2id()));
    evtLsnr.onJobsMapped(this);
    // Set mapped flag.
    ses.onMapped();
    // Move local jobs to the end of the list, because
    // they will be invoked in current thread that will hold other
    // jobs.
    int jobResSize = jobResList.size();
    if (jobResSize > 1) {
        UUID locId = ctx.discovery().localNode().id();
        for (int i = 0; i < jobResSize; i++) {
            UUID jobNodeId = jobResList.get(i).getNode().id();
            if (jobNodeId.equals(locId) && i < jobResSize - 1) {
                Collections.swap(jobResList, i, jobResSize - 1);
                jobResSize--;
                i--;
            }
        }
    }
    // Send out all remote mappedJobs.
    for (GridJobResultImpl res : jobResList) {
        evtLsnr.onJobSend(this, res.getSibling());
        try {
            sendRequest(res);
        } finally {
            // Open job for processing results.
            synchronized (mux) {
                res.setOccupied(false);
            }
        }
    }
    processDelayedResponses();
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) HashMap(java.util.HashMap) GridJobResultImpl(org.apache.ignite.internal.GridJobResultImpl) ArrayList(java.util.ArrayList) ComputeJob(org.apache.ignite.compute.ComputeJob) GridJobSiblingImpl(org.apache.ignite.internal.GridJobSiblingImpl) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteUuid(org.apache.ignite.lang.IgniteUuid) ComputeJobSibling(org.apache.ignite.compute.ComputeJobSibling) UUID(java.util.UUID) Map(java.util.Map) EnumMap(java.util.EnumMap) HashMap(java.util.HashMap) Collections.emptyMap(java.util.Collections.emptyMap)

Example 2 with GridJobResultImpl

use of org.apache.ignite.internal.GridJobResultImpl in project ignite by apache.

the class GridTaskWorker method jobStatuses.

/**
 * Collects statistics on jobs locally, only for those jobs that have
 * already sent a response or are being executed locally.
 *
 * @return Job statistics for the task. Mapping: Job status -> count of jobs.
 */
Map<ComputeJobStatusEnum, Long> jobStatuses() {
    List<GridJobResultImpl> jobResults = null;
    synchronized (mux) {
        if (jobRes != null)
            jobResults = new ArrayList<>(jobRes.values());
    }
    // Jobs have not been mapped yet.
    if (F.isEmpty(jobResults))
        return emptyMap();
    UUID locNodeId = ctx.localNodeId();
    boolean getLocJobStatistics = false;
    Map<ComputeJobStatusEnum, Long> res = new EnumMap<>(ComputeJobStatusEnum.class);
    for (GridJobResultImpl jobResult : jobResults) {
        if (jobResult.hasResponse()) {
            ComputeJobStatusEnum jobStatus;
            if (jobResult.isCancelled())
                jobStatus = CANCELLED;
            else if (jobResult.getException() != null)
                jobStatus = FAILED;
            else
                jobStatus = FINISHED;
            res.merge(jobStatus, 1L, Long::sum);
        } else if (!getLocJobStatistics && locNodeId.equals(jobResult.getNode().id()))
            getLocJobStatistics = true;
    }
    if (getLocJobStatistics) {
        Map<ComputeJobStatusEnum, Long> jobStatuses = ctx.job().jobStatuses(getTaskSessionId());
        for (Map.Entry<ComputeJobStatusEnum, Long> e : jobStatuses.entrySet()) res.merge(e.getKey(), e.getValue(), Long::sum);
    }
    return res;
}
Also used : GridJobResultImpl(org.apache.ignite.internal.GridJobResultImpl) ArrayList(java.util.ArrayList) UUID(java.util.UUID) EnumMap(java.util.EnumMap) Map(java.util.Map) EnumMap(java.util.EnumMap) HashMap(java.util.HashMap) Collections.emptyMap(java.util.Collections.emptyMap) ComputeJobStatusEnum(org.apache.ignite.internal.processors.job.ComputeJobStatusEnum)

Example 3 with GridJobResultImpl

use of org.apache.ignite.internal.GridJobResultImpl in project ignite by apache.

the class GridTaskWorker method onNodeLeft.

/**
 * @param nodeId Node ID.
 */
void onNodeLeft(UUID nodeId) {
    Collection<GridJobExecuteResponse> resList = null;
    synchronized (mux) {
        // First check if job cares about future responses.
        if (state != State.WAITING)
            return;
        if (jobRes != null) {
            for (GridJobResultImpl jr : jobRes.values()) {
                if (!jr.hasResponse() && jr.getNode().id().equals(nodeId)) {
                    if (log.isDebugEnabled())
                        log.debug("Creating fake response because node left grid [job=" + jr.getJob() + ", nodeId=" + nodeId + ']');
                    // Artificial response in case if a job is waiting for a response from
                    // non-existent node.
                    GridJobExecuteResponse fakeRes = new GridJobExecuteResponse(nodeId, ses.getId(), jr.getJobContext().getJobId(), null, null, null, null, null, null, false, null);
                    fakeRes.setFakeException(new ClusterTopologyException("Node has left grid: " + nodeId));
                    if (resList == null)
                        resList = new ArrayList<>();
                    resList.add(fakeRes);
                }
            }
        }
    }
    if (resList == null)
        return;
    // Simulate responses without holding synchronization.
    for (GridJobExecuteResponse res : resList) {
        if (log.isDebugEnabled())
            log.debug("Simulating fake response from left node [res=" + res + ", nodeId=" + nodeId + ']');
        onResponse(res);
    }
}
Also used : GridJobExecuteResponse(org.apache.ignite.internal.GridJobExecuteResponse) GridJobResultImpl(org.apache.ignite.internal.GridJobResultImpl) ArrayList(java.util.ArrayList) ClusterTopologyException(org.apache.ignite.cluster.ClusterTopologyException)

Example 4 with GridJobResultImpl

use of org.apache.ignite.internal.GridJobResultImpl in project ignite by apache.

the class GridTaskWorker method onResponse.

/**
 * @param msg Job execution response.
 */
void onResponse(GridJobExecuteResponse msg) {
    assert msg != null;
    if (fut.isDone()) {
        if (log.isDebugEnabled())
            log.debug("Ignoring job response since task has finished: " + msg);
        return;
    }
    GridJobExecuteResponse res = msg;
    while (res != null) {
        GridJobResultImpl jobRes = null;
        // Flag indicating whether occupied flag for
        // job response was changed in this method apply.
        boolean selfOccupied = false;
        IgniteInternalFuture<?> affFut = null;
        boolean waitForAffTop = false;
        final GridJobExecuteResponse failoverRes = res;
        try {
            synchronized (mux) {
                // then there is no point to proceed.
                if (state != State.WAITING) {
                    if (log.isDebugEnabled())
                        log.debug("Ignoring response since task is already reducing or finishing [res=" + res + ", job=" + ses + ", state=" + state + ']');
                    return;
                }
                jobRes = this.jobRes.get(res.getJobId());
                if (jobRes == null) {
                    if (log.isDebugEnabled())
                        U.warn(log, "Received response for unknown child job (was job presumed failed?): " + res);
                    res = delayedRess.poll();
                    // We can not return here because there can be more delayed messages in the queue.
                    continue;
                }
                // was created from discovery listener and when sending request failed.
                if (jobRes.hasResponse()) {
                    if (log.isDebugEnabled())
                        log.debug("Received redundant response for a job (will ignore): " + res);
                    res = delayedRess.poll();
                    // We can not return here because there can be more delayed messages in the queue.
                    continue;
                }
                if (!jobRes.getNode().id().equals(res.getNodeId())) {
                    if (log.isDebugEnabled())
                        log.debug("Ignoring stale response as job was already resent to other node [res=" + res + ", jobRes=" + jobRes + ']');
                    // Prevent processing 2 responses for the same job simultaneously.
                    jobRes.setOccupied(true);
                    selfOccupied = true;
                    // We can not return here because there can be more delayed messages in the queue.
                    continue;
                }
                if (jobRes.isOccupied()) {
                    if (log.isDebugEnabled())
                        log.debug("Adding response to delayed queue (job is either being sent or processing " + "another response): " + res);
                    delayedRess.offer(res);
                    return;
                }
                if (lockRespProc) {
                    delayedRess.offer(res);
                    return;
                }
                lockRespProc = true;
                selfOccupied = true;
                // Prevent processing 2 responses for the same job simultaneously.
                jobRes.setOccupied(true);
                // We don't keep reference to job if results are not cached.
                if (!resCache)
                    this.jobRes.remove(res.getJobId());
            }
            if (res.getFakeException() != null)
                jobRes.onResponse(null, res.getFakeException(), null, false);
            else {
                ClassLoader clsLdr = dep.classLoader();
                try {
                    boolean loc = ctx.localNodeId().equals(res.getNodeId()) && !ctx.config().isMarshalLocalJobs();
                    Object res0 = loc ? res.getJobResult() : U.unmarshal(marsh, res.getJobResultBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
                    IgniteException ex = loc ? res.getException() : U.<IgniteException>unmarshal(marsh, res.getExceptionBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
                    Map<Object, Object> attrs = loc ? res.getJobAttributes() : U.<Map<Object, Object>>unmarshal(marsh, res.getJobAttributesBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
                    jobRes.onResponse(res0, ex, attrs, res.isCancelled());
                    if (loc)
                        ctx.resource().invokeAnnotated(dep, jobRes.getJob(), ComputeJobAfterSend.class);
                } catch (IgniteCheckedException e) {
                    if (log.isDebugEnabled())
                        U.error(log, "Error deserializing job response: " + res, e);
                    finishTask(null, e);
                }
            }
            List<ComputeJobResult> results;
            if (!resCache)
                results = emptyList();
            else {
                synchronized (mux) {
                    results = getRemoteResults();
                }
            }
            ComputeJobResultPolicy plc = result(jobRes, results);
            if (plc == null) {
                String errMsg = "Failed to obtain remote job result policy for result from ComputeTask.result(..) " + "method that returned null (will fail the whole task): " + jobRes;
                finishTask(null, new IgniteCheckedException(errMsg));
                return;
            }
            boolean retry = false;
            synchronized (mux) {
                // then there is no point to proceed.
                if (state != State.WAITING) {
                    if (log.isDebugEnabled())
                        log.debug("Ignoring ComputeTask.result(..) value since task is already reducing or" + "finishing [res=" + res + ", job=" + ses + ", state=" + state + ']');
                    return;
                }
                if (res.retry()) {
                    // Retry is used only with affinity call / run.
                    assert affCacheIds != null;
                    retry = true;
                    mapTopVer = U.max(res.getRetryTopologyVersion(), ctx.cache().context().exchange().readyAffinityVersion());
                    affFut = ctx.cache().context().exchange().lastTopologyFuture();
                    if (affFut != null && !affFut.isDone()) {
                        waitForAffTop = true;
                        jobRes.resetResponse();
                    }
                } else {
                    switch(plc) {
                        // Start reducing all results received so far.
                        case REDUCE:
                            {
                                state = State.REDUCING;
                                break;
                            }
                        // otherwise, reduce.
                        case WAIT:
                            {
                                assert results.size() <= this.jobRes.size();
                                // when both collections are empty.
                                if (results.size() == this.jobRes.size()) {
                                    plc = ComputeJobResultPolicy.REDUCE;
                                    // All results are received, proceed to reduce method.
                                    state = State.REDUCING;
                                }
                                break;
                            }
                        case FAILOVER:
                            {
                                if (affCacheIds != null) {
                                    mapTopVer = ctx.cache().context().exchange().readyAffinityVersion();
                                    affFut = ctx.cache().context().exchange().lastTopologyFuture();
                                    if (affFut == null || affFut.isDone()) {
                                        affFut = null;
                                        // Need asynchronosly fetch affinity if cache is not started on node .
                                        if (affCacheName != null && ctx.cache().internalCache(affCacheName) == null) {
                                            affFut = ctx.affinity().affinityCacheFuture(affCacheName, mapTopVer);
                                            if (affFut.isDone())
                                                affFut = null;
                                        }
                                    }
                                }
                                if (affFut != null) {
                                    waitForAffTop = true;
                                    jobRes.resetResponse();
                                } else if (!failover(res, jobRes, getTaskTopology()))
                                    plc = null;
                                break;
                            }
                    }
                }
            }
            // Outside of synchronization.
            if (retry && !waitForAffTop) {
                // Handle retry
                retryAttemptCnt++;
                final long wait = retryAttemptCnt * RETRY_DELAY_MS;
                sendRetryRequest(wait, jobRes, res);
            } else if (plc != null && !waitForAffTop && !retry) {
                // Handle failover.
                if (plc == FAILOVER)
                    sendFailoverRequest(jobRes);
                else {
                    evtLsnr.onJobFinished(this, jobRes.getSibling());
                    if (plc == ComputeJobResultPolicy.REDUCE)
                        reduce(results);
                }
            }
        } catch (IgniteCheckedException e) {
            if (log.isDebugEnabled())
                U.error(log, "Failed to obtain topology [ses=" + ses + ", err=" + e + ']', e);
            finishTask(null, e);
            waitForAffTop = false;
        } finally {
            // set in this method.
            if (selfOccupied) {
                assert jobRes != null;
                synchronized (mux) {
                    jobRes.setOccupied(false);
                    lockRespProc = false;
                }
                // Process delayed responses if there are any.
                res = delayedRess.poll();
            }
        }
        if (waitForAffTop && affFut != null) {
            affFut.listen(new IgniteInClosure<IgniteInternalFuture<?>>() {

                @Override
                public void apply(IgniteInternalFuture<?> fut0) {
                    ctx.closure().runLocalSafe(new GridPlainRunnable() {

                        @Override
                        public void run() {
                            onResponse(failoverRes);
                        }
                    }, false);
                }
            });
        }
    }
}
Also used : GridJobResultImpl(org.apache.ignite.internal.GridJobResultImpl) IgniteInternalFuture(org.apache.ignite.internal.IgniteInternalFuture) GridPlainRunnable(org.apache.ignite.internal.util.lang.GridPlainRunnable) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) ComputeJobResultPolicy(org.apache.ignite.compute.ComputeJobResultPolicy) GridJobExecuteResponse(org.apache.ignite.internal.GridJobExecuteResponse) IgniteException(org.apache.ignite.IgniteException) ComputeJobAfterSend(org.apache.ignite.compute.ComputeJobAfterSend) GridTimeoutObject(org.apache.ignite.internal.processors.timeout.GridTimeoutObject) ComputeJobResult(org.apache.ignite.compute.ComputeJobResult)

Aggregations

GridJobResultImpl (org.apache.ignite.internal.GridJobResultImpl)4 ArrayList (java.util.ArrayList)3 Collections.emptyMap (java.util.Collections.emptyMap)2 EnumMap (java.util.EnumMap)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 UUID (java.util.UUID)2 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)2 GridJobExecuteResponse (org.apache.ignite.internal.GridJobExecuteResponse)2 IgniteException (org.apache.ignite.IgniteException)1 ClusterNode (org.apache.ignite.cluster.ClusterNode)1 ClusterTopologyException (org.apache.ignite.cluster.ClusterTopologyException)1 ComputeJob (org.apache.ignite.compute.ComputeJob)1 ComputeJobAfterSend (org.apache.ignite.compute.ComputeJobAfterSend)1 ComputeJobResult (org.apache.ignite.compute.ComputeJobResult)1 ComputeJobResultPolicy (org.apache.ignite.compute.ComputeJobResultPolicy)1 ComputeJobSibling (org.apache.ignite.compute.ComputeJobSibling)1 GridJobSiblingImpl (org.apache.ignite.internal.GridJobSiblingImpl)1 IgniteInternalFuture (org.apache.ignite.internal.IgniteInternalFuture)1 ComputeJobStatusEnum (org.apache.ignite.internal.processors.job.ComputeJobStatusEnum)1