use of org.apache.ignite.compute.ComputeJobResultPolicy in project ignite by apache.
the class GridTaskWorker method onResponse.
/**
* @param msg Job execution response.
*/
void onResponse(GridJobExecuteResponse msg) {
assert msg != null;
if (fut.isDone()) {
if (log.isDebugEnabled())
log.debug("Ignoring job response since task has finished: " + msg);
return;
}
GridJobExecuteResponse res = msg;
while (res != null) {
GridJobResultImpl jobRes = null;
// Flag indicating whether occupied flag for
// job response was changed in this method apply.
boolean selfOccupied = false;
IgniteInternalFuture<?> affFut = null;
boolean waitForAffTop = false;
final GridJobExecuteResponse failoverRes = res;
try {
synchronized (mux) {
// then there is no point to proceed.
if (state != State.WAITING) {
if (log.isDebugEnabled())
log.debug("Ignoring response since task is already reducing or finishing [res=" + res + ", job=" + ses + ", state=" + state + ']');
return;
}
jobRes = this.jobRes.get(res.getJobId());
if (jobRes == null) {
if (log.isDebugEnabled())
U.warn(log, "Received response for unknown child job (was job presumed failed?): " + res);
res = delayedRess.poll();
// We can not return here because there can be more delayed messages in the queue.
continue;
}
// was created from discovery listener and when sending request failed.
if (jobRes.hasResponse()) {
if (log.isDebugEnabled())
log.debug("Received redundant response for a job (will ignore): " + res);
res = delayedRess.poll();
// We can not return here because there can be more delayed messages in the queue.
continue;
}
if (!jobRes.getNode().id().equals(res.getNodeId())) {
if (log.isDebugEnabled())
log.debug("Ignoring stale response as job was already resent to other node [res=" + res + ", jobRes=" + jobRes + ']');
// Prevent processing 2 responses for the same job simultaneously.
jobRes.setOccupied(true);
selfOccupied = true;
// We can not return here because there can be more delayed messages in the queue.
continue;
}
if (jobRes.isOccupied()) {
if (log.isDebugEnabled())
log.debug("Adding response to delayed queue (job is either being sent or processing " + "another response): " + res);
delayedRess.offer(res);
return;
}
if (lockRespProc) {
delayedRess.offer(res);
return;
}
lockRespProc = true;
selfOccupied = true;
// Prevent processing 2 responses for the same job simultaneously.
jobRes.setOccupied(true);
// We don't keep reference to job if results are not cached.
if (!resCache)
this.jobRes.remove(res.getJobId());
}
if (res.getFakeException() != null)
jobRes.onResponse(null, res.getFakeException(), null, false);
else {
ClassLoader clsLdr = dep.classLoader();
try {
boolean loc = ctx.localNodeId().equals(res.getNodeId()) && !ctx.config().isMarshalLocalJobs();
Object res0 = loc ? res.getJobResult() : U.unmarshal(marsh, res.getJobResultBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
IgniteException ex = loc ? res.getException() : U.<IgniteException>unmarshal(marsh, res.getExceptionBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
Map<Object, Object> attrs = loc ? res.getJobAttributes() : U.<Map<Object, Object>>unmarshal(marsh, res.getJobAttributesBytes(), U.resolveClassLoader(clsLdr, ctx.config()));
jobRes.onResponse(res0, ex, attrs, res.isCancelled());
if (loc)
ctx.resource().invokeAnnotated(dep, jobRes.getJob(), ComputeJobAfterSend.class);
} catch (IgniteCheckedException e) {
if (log.isDebugEnabled())
U.error(log, "Error deserializing job response: " + res, e);
finishTask(null, e);
}
}
List<ComputeJobResult> results;
if (!resCache)
results = emptyList();
else {
synchronized (mux) {
results = getRemoteResults();
}
}
ComputeJobResultPolicy plc = result(jobRes, results);
if (plc == null) {
String errMsg = "Failed to obtain remote job result policy for result from ComputeTask.result(..) " + "method that returned null (will fail the whole task): " + jobRes;
finishTask(null, new IgniteCheckedException(errMsg));
return;
}
boolean retry = false;
synchronized (mux) {
// then there is no point to proceed.
if (state != State.WAITING) {
if (log.isDebugEnabled())
log.debug("Ignoring ComputeTask.result(..) value since task is already reducing or" + "finishing [res=" + res + ", job=" + ses + ", state=" + state + ']');
return;
}
if (res.retry()) {
// Retry is used only with affinity call / run.
assert affCacheIds != null;
retry = true;
mapTopVer = U.max(res.getRetryTopologyVersion(), ctx.cache().context().exchange().readyAffinityVersion());
affFut = ctx.cache().context().exchange().lastTopologyFuture();
if (affFut != null && !affFut.isDone()) {
waitForAffTop = true;
jobRes.resetResponse();
}
} else {
switch(plc) {
// Start reducing all results received so far.
case REDUCE:
{
state = State.REDUCING;
break;
}
// otherwise, reduce.
case WAIT:
{
assert results.size() <= this.jobRes.size();
// when both collections are empty.
if (results.size() == this.jobRes.size()) {
plc = ComputeJobResultPolicy.REDUCE;
// All results are received, proceed to reduce method.
state = State.REDUCING;
}
break;
}
case FAILOVER:
{
if (affCacheIds != null) {
mapTopVer = ctx.cache().context().exchange().readyAffinityVersion();
affFut = ctx.cache().context().exchange().lastTopologyFuture();
if (affFut == null || affFut.isDone()) {
affFut = null;
// Need asynchronosly fetch affinity if cache is not started on node .
if (affCacheName != null && ctx.cache().internalCache(affCacheName) == null) {
affFut = ctx.affinity().affinityCacheFuture(affCacheName, mapTopVer);
if (affFut.isDone())
affFut = null;
}
}
}
if (affFut != null) {
waitForAffTop = true;
jobRes.resetResponse();
} else if (!failover(res, jobRes, getTaskTopology()))
plc = null;
break;
}
}
}
}
// Outside of synchronization.
if (retry && !waitForAffTop) {
// Handle retry
retryAttemptCnt++;
final long wait = retryAttemptCnt * RETRY_DELAY_MS;
sendRetryRequest(wait, jobRes, res);
} else if (plc != null && !waitForAffTop && !retry) {
// Handle failover.
if (plc == FAILOVER)
sendFailoverRequest(jobRes);
else {
evtLsnr.onJobFinished(this, jobRes.getSibling());
if (plc == ComputeJobResultPolicy.REDUCE)
reduce(results);
}
}
} catch (IgniteCheckedException e) {
if (log.isDebugEnabled())
U.error(log, "Failed to obtain topology [ses=" + ses + ", err=" + e + ']', e);
finishTask(null, e);
waitForAffTop = false;
} finally {
// set in this method.
if (selfOccupied) {
assert jobRes != null;
synchronized (mux) {
jobRes.setOccupied(false);
lockRespProc = false;
}
// Process delayed responses if there are any.
res = delayedRess.poll();
}
}
if (waitForAffTop && affFut != null) {
affFut.listen(new IgniteInClosure<IgniteInternalFuture<?>>() {
@Override
public void apply(IgniteInternalFuture<?> fut0) {
ctx.closure().runLocalSafe(new GridPlainRunnable() {
@Override
public void run() {
onResponse(failoverRes);
}
}, false);
}
});
}
}
}
use of org.apache.ignite.compute.ComputeJobResultPolicy in project ignite by apache.
the class PlatformAbstractTask method result.
/**
* {@inheritDoc}
*/
@Override
public ComputeJobResultPolicy result(ComputeJobResult res, List<ComputeJobResult> rcvd) {
assert rcvd.isEmpty() : "Should not cache result in Java for interop task";
lock.readLock().lock();
try {
assert !done;
PlatformAbstractJob job = res.getJob();
assert job.pointer() != 0;
Object res0bj = res.getData();
int plc;
if (res0bj == PlatformAbstractJob.LOC_JOB_RES)
// Processing local job execution result.
plc = ctx.gateway().computeTaskLocalJobResult(taskPtr, job.pointer());
else {
// Processing remote job execution result or exception.
try (PlatformMemory mem = ctx.memory().allocate()) {
PlatformOutputStream out = mem.output();
BinaryRawWriterEx writer = ctx.writer(out);
writer.writeLong(taskPtr);
writer.writeLong(job.pointer());
writer.writeUuid(res.getNode().id());
writer.writeBoolean(res.isCancelled());
IgniteException err = res.getException();
PlatformUtils.writeInvocationResult(writer, res0bj, err);
out.synchronize();
plc = ctx.gateway().computeTaskJobResult(mem.pointer());
}
}
ComputeJobResultPolicy plc0 = ComputeJobResultPolicy.fromOrdinal((byte) plc);
assert plc0 != null : plc;
return plc0;
} finally {
lock.readLock().unlock();
}
}
use of org.apache.ignite.compute.ComputeJobResultPolicy in project ignite by apache.
the class SystemViewComputeJobTest method testCancelComputeTask.
/**
*/
@Test
public void testCancelComputeTask() throws Exception {
barrier = new CyclicBarrier(2);
SystemView<ComputeJobView> jobs = server.context().systemView().view(JOBS_VIEW);
client.compute().withName("cancel-task").executeAsync(new ComputeTask<Object, Object>() {
@Override
@NotNull
public Map<? extends ComputeJob, ClusterNode> map(List<ClusterNode> subgrid, @Nullable Object arg) throws IgniteException {
return Collections.singletonMap(new ComputeJob() {
@Override
public void cancel() {
// No-op.
}
@Override
public Object execute() throws IgniteException {
try {
Thread.sleep(60_000);
} catch (InterruptedException e) {
throw new IgniteException(e);
}
return null;
}
}, subgrid.get(0));
}
@Override
public ComputeJobResultPolicy result(ComputeJobResult res, List<ComputeJobResult> rcvd) throws IgniteException {
return null;
}
@Nullable
@Override
public Object reduce(List<ComputeJobResult> results) throws IgniteException {
return 1;
}
}, 1);
barrier.await(TIMEOUT, MILLISECONDS);
assertEquals(1, jobs.size());
checkJobView(jobs.iterator().next(), "cancel-task", PASSIVE);
barrier.await(TIMEOUT, MILLISECONDS);
barrier.await(TIMEOUT, MILLISECONDS);
assertEquals(1, jobs.size());
checkJobView(jobs.iterator().next(), "cancel-task", CANCELED);
barrier.await(TIMEOUT, MILLISECONDS);
boolean res = waitForCondition(() -> jobs.size() == 0, TIMEOUT);
assertTrue(res);
}
use of org.apache.ignite.compute.ComputeJobResultPolicy in project ignite by apache.
the class SystemViewComputeJobTest method testComputeTask.
/**
*/
@Test
public void testComputeTask() throws Exception {
barrier = new CyclicBarrier(2);
SystemView<ComputeJobView> jobs = server.context().systemView().view(JOBS_VIEW);
client.compute().executeAsync(new ComputeTask<Object, Object>() {
@Override
@NotNull
public Map<? extends ComputeJob, ClusterNode> map(List<ClusterNode> subgrid, @Nullable Object arg) throws IgniteException {
return Collections.singletonMap(new ComputeJob() {
@Override
public void cancel() {
// No-op.
}
@Override
public Object execute() throws IgniteException {
try {
barrier.await(TIMEOUT, MILLISECONDS);
barrier.await(TIMEOUT, MILLISECONDS);
} catch (InterruptedException | BrokenBarrierException | TimeoutException e) {
throw new RuntimeException(e);
}
return 1;
}
}, subgrid.get(0));
}
@Override
public ComputeJobResultPolicy result(ComputeJobResult res, List<ComputeJobResult> rcvd) throws IgniteException {
return null;
}
@Nullable
@Override
public Object reduce(List<ComputeJobResult> results) throws IgniteException {
return 1;
}
}, 1);
barrier.await(TIMEOUT, MILLISECONDS);
assertEquals(1, jobs.size());
ComputeJobView t = jobs.iterator().next();
checkJobView(t);
barrier.await(TIMEOUT, MILLISECONDS);
boolean res = waitForCondition(() -> jobs.size() == 0, TIMEOUT);
assertTrue(res);
}
Aggregations