Search in sources :

Example 6 with ComputeJobResultPolicy

use of org.apache.ignite.compute.ComputeJobResultPolicy in project ignite by apache.

the class GridTaskWorker method onResponse.

/**
 * @param msg Job execution response.
 */
void onResponse(GridJobExecuteResponse msg) {
    assert msg != null;
    if (fut.isDone()) {
        if (log.isDebugEnabled())
            log.debug("Ignoring job response since task has finished: " + msg);
        return;
    }
    GridJobExecuteResponse res = msg;
    while (res != null) {
        GridJobResultImpl jobRes = null;
        // Flag indicating whether occupied flag for
        // job response was changed in this method apply.
        boolean selfOccupied = false;
        IgniteInternalFuture<?> affFut = null;
        boolean waitForAffTop = false;
        final GridJobExecuteResponse failoverRes = res;
        try {
            synchronized (mux) {
                // then there is no point to proceed.
                if (state != State.WAITING) {
                    if (log.isDebugEnabled())
                        log.debug("Ignoring response since task is already reducing or finishing [res=" + res + ", job=" + ses + ", state=" + state + ']');
                    return;
                }
                jobRes = this.jobRes.get(res.getJobId());
                if (jobRes == null) {
                    if (log.isDebugEnabled())
                        U.warn(log, "Received response for unknown child job (was job presumed failed?): " + res);
                    res = delayedRess.poll();
                    // We can not return here because there can be more delayed messages in the queue.
                    continue;
                }
                // was created from discovery listener and when sending request failed.
                if (jobRes.hasResponse()) {
                    if (log.isDebugEnabled())
                        log.debug("Received redundant response for a job (will ignore): " + res);
                    res = delayedRess.poll();
                    // We can not return here because there can be more delayed messages in the queue.
                    continue;
                }
                if (!jobRes.getNode().id().equals(res.getNodeId())) {
                    if (log.isDebugEnabled())
                        log.debug("Ignoring stale response as job was already resent to other node [res=" + res + ", jobRes=" + jobRes + ']');
                    // Prevent processing 2 responses for the same job simultaneously.
                    jobRes.setOccupied(true);
                    selfOccupied = true;
                    // We can not return here because there can be more delayed messages in the queue.
                    continue;
                }
                if (jobRes.isOccupied()) {
                    if (log.isDebugEnabled())
                        log.debug("Adding response to delayed queue (job is either being sent or processing " + "another response): " + res);
                    delayedRess.offer(res);
                    return;
                }
                if (lockRespProc) {
                    delayedRess.offer(res);
                    return;
                }
                lockRespProc = true;
                selfOccupied = true;
                // Prevent processing 2 responses for the same job simultaneously.
                jobRes.setOccupied(true);
                // We don't keep reference to job if results are not cached.
                if (!resCache)
                    this.jobRes.remove(res.getJobId());
            }
            if (res.getFakeException() != null)
                jobRes.onResponse(null, res.getFakeException(), null, false);
            else {
                ClassLoader clsLdr = dep.classLoader();
                try {
                    boolean loc = ctx.localNodeId().equals(res.getNodeId()) && !ctx.config().isMarshalLocalJobs();
                    Object res0 = loc ? res.getJobResult() : U.unmarshal(marsh, res.getJobResultBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
                    IgniteException ex = loc ? res.getException() : U.<IgniteException>unmarshal(marsh, res.getExceptionBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
                    Map<Object, Object> attrs = loc ? res.getJobAttributes() : U.<Map<Object, Object>>unmarshal(marsh, res.getJobAttributesBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
                    jobRes.onResponse(res0, ex, attrs, res.isCancelled());
                    if (loc)
                        ctx.resource().invokeAnnotated(dep, jobRes.getJob(), ComputeJobAfterSend.class);
                } catch (IgniteCheckedException e) {
                    if (log.isDebugEnabled())
                        U.error(log, "Error deserializing job response: " + res, e);
                    finishTask(null, e);
                }
            }
            List<ComputeJobResult> results;
            if (!resCache)
                results = emptyList();
            else {
                synchronized (mux) {
                    results = getRemoteResults();
                }
            }
            ComputeJobResultPolicy plc = result(jobRes, results);
            if (plc == null) {
                String errMsg = "Failed to obtain remote job result policy for result from ComputeTask.result(..) " + "method that returned null (will fail the whole task): " + jobRes;
                finishTask(null, new IgniteCheckedException(errMsg));
                return;
            }
            boolean retry = false;
            synchronized (mux) {
                // then there is no point to proceed.
                if (state != State.WAITING) {
                    if (log.isDebugEnabled())
                        log.debug("Ignoring ComputeTask.result(..) value since task is already reducing or" + "finishing [res=" + res + ", job=" + ses + ", state=" + state + ']');
                    return;
                }
                if (res.retry()) {
                    // Retry is used only with affinity call / run.
                    assert affCacheIds != null;
                    retry = true;
                    mapTopVer = U.max(res.getRetryTopologyVersion(), ctx.cache().context().exchange().readyAffinityVersion());
                    affFut = ctx.cache().context().exchange().lastTopologyFuture();
                    if (affFut != null && !affFut.isDone()) {
                        waitForAffTop = true;
                        jobRes.resetResponse();
                    }
                } else {
                    switch(plc) {
                        // Start reducing all results received so far.
                        case REDUCE:
                            {
                                state = State.REDUCING;
                                break;
                            }
                        // otherwise, reduce.
                        case WAIT:
                            {
                                assert results.size() <= this.jobRes.size();
                                // when both collections are empty.
                                if (results.size() == this.jobRes.size()) {
                                    plc = ComputeJobResultPolicy.REDUCE;
                                    // All results are received, proceed to reduce method.
                                    state = State.REDUCING;
                                }
                                break;
                            }
                        case FAILOVER:
                            {
                                if (affCacheIds != null) {
                                    mapTopVer = ctx.cache().context().exchange().readyAffinityVersion();
                                    affFut = ctx.cache().context().exchange().lastTopologyFuture();
                                    if (affFut == null || affFut.isDone()) {
                                        affFut = null;
                                        // Need asynchronosly fetch affinity if cache is not started on node .
                                        if (affCacheName != null && ctx.cache().internalCache(affCacheName) == null) {
                                            affFut = ctx.affinity().affinityCacheFuture(affCacheName, mapTopVer);
                                            if (affFut.isDone())
                                                affFut = null;
                                        }
                                    }
                                }
                                if (affFut != null) {
                                    waitForAffTop = true;
                                    jobRes.resetResponse();
                                } else if (!failover(res, jobRes, getTaskTopology()))
                                    plc = null;
                                break;
                            }
                    }
                }
            }
            // Outside of synchronization.
            if (retry && !waitForAffTop) {
                // Handle retry
                retryAttemptCnt++;
                final long wait = retryAttemptCnt * RETRY_DELAY_MS;
                sendRetryRequest(wait, jobRes, res);
            } else if (plc != null && !waitForAffTop && !retry) {
                // Handle failover.
                if (plc == FAILOVER)
                    sendFailoverRequest(jobRes);
                else {
                    evtLsnr.onJobFinished(this, jobRes.getSibling());
                    if (plc == ComputeJobResultPolicy.REDUCE)
                        reduce(results);
                }
            }
        } catch (IgniteCheckedException e) {
            if (log.isDebugEnabled())
                U.error(log, "Failed to obtain topology [ses=" + ses + ", err=" + e + ']', e);
            finishTask(null, e);
            waitForAffTop = false;
        } finally {
            // set in this method.
            if (selfOccupied) {
                assert jobRes != null;
                synchronized (mux) {
                    jobRes.setOccupied(false);
                    lockRespProc = false;
                }
                // Process delayed responses if there are any.
                res = delayedRess.poll();
            }
        }
        if (waitForAffTop && affFut != null) {
            affFut.listen(new IgniteInClosure<IgniteInternalFuture<?>>() {

                @Override
                public void apply(IgniteInternalFuture<?> fut0) {
                    ctx.closure().runLocalSafe(new GridPlainRunnable() {

                        @Override
                        public void run() {
                            onResponse(failoverRes);
                        }
                    }, false);
                }
            });
        }
    }
}
Also used : GridJobResultImpl(org.apache.ignite.internal.GridJobResultImpl) IgniteInternalFuture(org.apache.ignite.internal.IgniteInternalFuture) GridPlainRunnable(org.apache.ignite.internal.util.lang.GridPlainRunnable) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) ComputeJobResultPolicy(org.apache.ignite.compute.ComputeJobResultPolicy) GridJobExecuteResponse(org.apache.ignite.internal.GridJobExecuteResponse) IgniteException(org.apache.ignite.IgniteException) ComputeJobAfterSend(org.apache.ignite.compute.ComputeJobAfterSend) GridTimeoutObject(org.apache.ignite.internal.processors.timeout.GridTimeoutObject) ComputeJobResult(org.apache.ignite.compute.ComputeJobResult)

Example 7 with ComputeJobResultPolicy

use of org.apache.ignite.compute.ComputeJobResultPolicy in project ignite by apache.

the class PlatformAbstractTask method result.

/**
 * {@inheritDoc}
 */
@Override
public ComputeJobResultPolicy result(ComputeJobResult res, List<ComputeJobResult> rcvd) {
    assert rcvd.isEmpty() : "Should not cache result in Java for interop task";
    lock.readLock().lock();
    try {
        assert !done;
        PlatformAbstractJob job = res.getJob();
        assert job.pointer() != 0;
        Object res0bj = res.getData();
        int plc;
        if (res0bj == PlatformAbstractJob.LOC_JOB_RES)
            // Processing local job execution result.
            plc = ctx.gateway().computeTaskLocalJobResult(taskPtr, job.pointer());
        else {
            // Processing remote job execution result or exception.
            try (PlatformMemory mem = ctx.memory().allocate()) {
                PlatformOutputStream out = mem.output();
                BinaryRawWriterEx writer = ctx.writer(out);
                writer.writeLong(taskPtr);
                writer.writeLong(job.pointer());
                writer.writeUuid(res.getNode().id());
                writer.writeBoolean(res.isCancelled());
                IgniteException err = res.getException();
                PlatformUtils.writeInvocationResult(writer, res0bj, err);
                out.synchronize();
                plc = ctx.gateway().computeTaskJobResult(mem.pointer());
            }
        }
        ComputeJobResultPolicy plc0 = ComputeJobResultPolicy.fromOrdinal((byte) plc);
        assert plc0 != null : plc;
        return plc0;
    } finally {
        lock.readLock().unlock();
    }
}
Also used : ComputeJobResultPolicy(org.apache.ignite.compute.ComputeJobResultPolicy) IgniteException(org.apache.ignite.IgniteException) PlatformOutputStream(org.apache.ignite.internal.processors.platform.memory.PlatformOutputStream) BinaryRawWriterEx(org.apache.ignite.internal.binary.BinaryRawWriterEx) PlatformMemory(org.apache.ignite.internal.processors.platform.memory.PlatformMemory)

Example 8 with ComputeJobResultPolicy

use of org.apache.ignite.compute.ComputeJobResultPolicy in project ignite by apache.

the class SystemViewComputeJobTest method testCancelComputeTask.

/**
 */
@Test
public void testCancelComputeTask() throws Exception {
    barrier = new CyclicBarrier(2);
    SystemView<ComputeJobView> jobs = server.context().systemView().view(JOBS_VIEW);
    client.compute().withName("cancel-task").executeAsync(new ComputeTask<Object, Object>() {

        @Override
        @NotNull
        public Map<? extends ComputeJob, ClusterNode> map(List<ClusterNode> subgrid, @Nullable Object arg) throws IgniteException {
            return Collections.singletonMap(new ComputeJob() {

                @Override
                public void cancel() {
                // No-op.
                }

                @Override
                public Object execute() throws IgniteException {
                    try {
                        Thread.sleep(60_000);
                    } catch (InterruptedException e) {
                        throw new IgniteException(e);
                    }
                    return null;
                }
            }, subgrid.get(0));
        }

        @Override
        public ComputeJobResultPolicy result(ComputeJobResult res, List<ComputeJobResult> rcvd) throws IgniteException {
            return null;
        }

        @Nullable
        @Override
        public Object reduce(List<ComputeJobResult> results) throws IgniteException {
            return 1;
        }
    }, 1);
    barrier.await(TIMEOUT, MILLISECONDS);
    assertEquals(1, jobs.size());
    checkJobView(jobs.iterator().next(), "cancel-task", PASSIVE);
    barrier.await(TIMEOUT, MILLISECONDS);
    barrier.await(TIMEOUT, MILLISECONDS);
    assertEquals(1, jobs.size());
    checkJobView(jobs.iterator().next(), "cancel-task", CANCELED);
    barrier.await(TIMEOUT, MILLISECONDS);
    boolean res = waitForCondition(() -> jobs.size() == 0, TIMEOUT);
    assertTrue(res);
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) NotNull(org.jetbrains.annotations.NotNull) CyclicBarrier(java.util.concurrent.CyclicBarrier) ComputeJobView(org.apache.ignite.spi.systemview.view.ComputeJobView) ComputeJob(org.apache.ignite.compute.ComputeJob) ComputeJobResultPolicy(org.apache.ignite.compute.ComputeJobResultPolicy) IgniteException(org.apache.ignite.IgniteException) Map(java.util.Map) Nullable(org.jetbrains.annotations.Nullable) ComputeJobResult(org.apache.ignite.compute.ComputeJobResult) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Example 9 with ComputeJobResultPolicy

use of org.apache.ignite.compute.ComputeJobResultPolicy in project ignite by apache.

the class SystemViewComputeJobTest method testComputeTask.

/**
 */
@Test
public void testComputeTask() throws Exception {
    barrier = new CyclicBarrier(2);
    SystemView<ComputeJobView> jobs = server.context().systemView().view(JOBS_VIEW);
    client.compute().executeAsync(new ComputeTask<Object, Object>() {

        @Override
        @NotNull
        public Map<? extends ComputeJob, ClusterNode> map(List<ClusterNode> subgrid, @Nullable Object arg) throws IgniteException {
            return Collections.singletonMap(new ComputeJob() {

                @Override
                public void cancel() {
                // No-op.
                }

                @Override
                public Object execute() throws IgniteException {
                    try {
                        barrier.await(TIMEOUT, MILLISECONDS);
                        barrier.await(TIMEOUT, MILLISECONDS);
                    } catch (InterruptedException | BrokenBarrierException | TimeoutException e) {
                        throw new RuntimeException(e);
                    }
                    return 1;
                }
            }, subgrid.get(0));
        }

        @Override
        public ComputeJobResultPolicy result(ComputeJobResult res, List<ComputeJobResult> rcvd) throws IgniteException {
            return null;
        }

        @Nullable
        @Override
        public Object reduce(List<ComputeJobResult> results) throws IgniteException {
            return 1;
        }
    }, 1);
    barrier.await(TIMEOUT, MILLISECONDS);
    assertEquals(1, jobs.size());
    ComputeJobView t = jobs.iterator().next();
    checkJobView(t);
    barrier.await(TIMEOUT, MILLISECONDS);
    boolean res = waitForCondition(() -> jobs.size() == 0, TIMEOUT);
    assertTrue(res);
}
Also used : ClusterNode(org.apache.ignite.cluster.ClusterNode) BrokenBarrierException(java.util.concurrent.BrokenBarrierException) NotNull(org.jetbrains.annotations.NotNull) CyclicBarrier(java.util.concurrent.CyclicBarrier) ComputeJobView(org.apache.ignite.spi.systemview.view.ComputeJobView) ComputeJob(org.apache.ignite.compute.ComputeJob) ComputeJobResultPolicy(org.apache.ignite.compute.ComputeJobResultPolicy) IgniteException(org.apache.ignite.IgniteException) Map(java.util.Map) Nullable(org.jetbrains.annotations.Nullable) TimeoutException(java.util.concurrent.TimeoutException) ComputeJobResult(org.apache.ignite.compute.ComputeJobResult) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Aggregations

ComputeJobResultPolicy (org.apache.ignite.compute.ComputeJobResultPolicy)9 IgniteException (org.apache.ignite.IgniteException)6 ComputeJobResult (org.apache.ignite.compute.ComputeJobResult)4 Nullable (org.jetbrains.annotations.Nullable)4 Map (java.util.Map)3 CyclicBarrier (java.util.concurrent.CyclicBarrier)3 ClusterNode (org.apache.ignite.cluster.ClusterNode)3 ComputeJob (org.apache.ignite.compute.ComputeJob)3 GridCommonAbstractTest (org.apache.ignite.testframework.junits.common.GridCommonAbstractTest)3 NotNull (org.jetbrains.annotations.NotNull)3 Test (org.junit.Test)3 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)2 ComputeJobView (org.apache.ignite.spi.systemview.view.ComputeJobView)2 BrokenBarrierException (java.util.concurrent.BrokenBarrierException)1 TimeoutException (java.util.concurrent.TimeoutException)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 ComputeJobAfterSend (org.apache.ignite.compute.ComputeJobAfterSend)1 ComputeUserUndeclaredException (org.apache.ignite.compute.ComputeUserUndeclaredException)1 GridJobExecuteResponse (org.apache.ignite.internal.GridJobExecuteResponse)1 GridJobResultImpl (org.apache.ignite.internal.GridJobResultImpl)1