use of org.apache.ignite.cluster.ClusterTopologyException in project ignite by apache.
the class DataStreamerImpl method load0.
/**
* @param entries Entries.
* @param resFut Result future.
* @param activeKeys Active keys.
* @param remaps Remaps count.
*/
private void load0(Collection<? extends DataStreamerEntry> entries, final GridFutureAdapter<Object> resFut, @Nullable final Collection<KeyCacheObjectWrapper> activeKeys, final int remaps) {
try {
assert entries != null;
final boolean remap = remaps > 0;
if (!remap) {
// Failed data should be processed prior to new data.
acquireRemapSemaphore();
}
if (!isWarningPrinted) {
synchronized (this) {
if (!allowOverwrite() && !isWarningPrinted) {
U.warn(log, "Data streamer will not overwrite existing cache entries for better performance " + "(to change, set allowOverwrite to true)");
}
isWarningPrinted = true;
}
}
Map<ClusterNode, Collection<DataStreamerEntry>> mappings = new HashMap<>();
boolean initPda = ctx.deploy().enabled() && jobPda == null;
GridCacheAdapter cache = ctx.cache().internalCache(cacheName);
if (cache == null)
throw new IgniteCheckedException("Cache not created or already destroyed.");
GridCacheContext cctx = cache.context();
GridCacheGateway gate = null;
if (!allowOverwrite() && !cctx.isLocal()) {
// Cases where cctx required.
gate = cctx.gate();
gate.enter();
}
try {
AffinityTopologyVersion topVer = allowOverwrite() || cctx.isLocal() ? ctx.cache().context().exchange().readyAffinityVersion() : cctx.topology().topologyVersion();
for (DataStreamerEntry entry : entries) {
List<ClusterNode> nodes;
try {
KeyCacheObject key = entry.getKey();
assert key != null;
if (initPda) {
if (cacheObjCtx.addDeploymentInfo())
jobPda = new DataStreamerPda(key.value(cacheObjCtx, false), entry.getValue() != null ? entry.getValue().value(cacheObjCtx, false) : null, rcvr);
else if (rcvr != null)
jobPda = new DataStreamerPda(rcvr);
initPda = false;
}
nodes = nodes(key, topVer, cctx);
} catch (IgniteCheckedException e) {
resFut.onDone(e);
return;
}
if (F.isEmpty(nodes)) {
resFut.onDone(new ClusterTopologyException("Failed to map key to node " + "(no nodes with cache found in topology) [infos=" + entries.size() + ", cacheName=" + cacheName + ']'));
return;
}
for (ClusterNode node : nodes) {
Collection<DataStreamerEntry> col = mappings.get(node);
if (col == null)
mappings.put(node, col = new ArrayList<>());
col.add(entry);
}
}
for (final Map.Entry<ClusterNode, Collection<DataStreamerEntry>> e : mappings.entrySet()) {
final UUID nodeId = e.getKey().id();
Buffer buf = bufMappings.get(nodeId);
if (buf == null) {
Buffer old = bufMappings.putIfAbsent(nodeId, buf = new Buffer(e.getKey()));
if (old != null)
buf = old;
}
final Collection<DataStreamerEntry> entriesForNode = e.getValue();
IgniteInClosure<IgniteInternalFuture<?>> lsnr = new IgniteInClosure<IgniteInternalFuture<?>>() {
@Override
public void apply(IgniteInternalFuture<?> t) {
try {
t.get();
if (activeKeys != null) {
for (DataStreamerEntry e : entriesForNode) activeKeys.remove(new KeyCacheObjectWrapper(e.getKey()));
if (activeKeys.isEmpty())
resFut.onDone();
} else {
assert entriesForNode.size() == 1;
// That has been a single key,
// so complete result future right away.
resFut.onDone();
}
} catch (IgniteClientDisconnectedCheckedException e1) {
if (log.isDebugEnabled())
log.debug("Future finished with disconnect error [nodeId=" + nodeId + ", err=" + e1 + ']');
resFut.onDone(e1);
} catch (IgniteCheckedException e1) {
if (log.isDebugEnabled())
log.debug("Future finished with error [nodeId=" + nodeId + ", err=" + e1 + ']');
if (cancelled) {
resFut.onDone(new IgniteCheckedException("Data streamer has been cancelled: " + DataStreamerImpl.this, e1));
} else if (remaps + 1 > maxRemapCnt) {
resFut.onDone(new IgniteCheckedException("Failed to finish operation (too many remaps): " + remaps, e1));
} else {
try {
remapSem.acquire();
final Runnable r = new Runnable() {
@Override
public void run() {
try {
if (cancelled)
throw new IllegalStateException("DataStreamer closed.");
load0(entriesForNode, resFut, activeKeys, remaps + 1);
} catch (Throwable ex) {
resFut.onDone(new IgniteCheckedException("DataStreamer remapping failed. ", ex));
} finally {
remapSem.release();
}
}
};
dataToRemap.add(r);
if (!remapOwning.get() && remapOwning.compareAndSet(false, true)) {
ctx.closure().callLocalSafe(new GPC<Boolean>() {
@Override
public Boolean call() {
boolean locked = true;
while (locked || !dataToRemap.isEmpty()) {
if (!locked && !remapOwning.compareAndSet(false, true))
return false;
try {
Runnable r = dataToRemap.poll();
if (r != null)
r.run();
} finally {
if (!dataToRemap.isEmpty())
locked = true;
else {
remapOwning.set(false);
locked = false;
}
}
}
return true;
}
}, true);
}
} catch (InterruptedException e2) {
resFut.onDone(e2);
}
}
}
}
};
final GridFutureAdapter<?> f;
try {
f = buf.update(entriesForNode, topVer, lsnr, remap);
} catch (IgniteInterruptedCheckedException e1) {
resFut.onDone(e1);
return;
}
if (ctx.discovery().node(nodeId) == null) {
if (bufMappings.remove(nodeId, buf)) {
final Buffer buf0 = buf;
waitAffinityAndRun(new Runnable() {
@Override
public void run() {
buf0.onNodeLeft();
if (f != null)
f.onDone(new ClusterTopologyCheckedException("Failed to wait for request completion " + "(node has left): " + nodeId));
}
}, ctx.discovery().topologyVersion(), false);
}
}
}
} finally {
if (gate != null)
gate.leave();
}
} catch (Exception ex) {
resFut.onDone(new IgniteCheckedException("DataStreamer data loading failed.", ex));
}
}
use of org.apache.ignite.cluster.ClusterTopologyException in project ignite by apache.
the class GridTaskWorker method sendRequest.
/**
* @param res Job result.
*/
private void sendRequest(ComputeJobResult res) {
assert res != null;
GridJobExecuteRequest req = null;
ClusterNode node = res.getNode();
try {
ClusterNode curNode = ctx.discovery().node(node.id());
// thrown in case of send failure.
if (curNode == null) {
U.warn(log, "Failed to send job request because remote node left grid (if fail-over is enabled, " + "will attempt fail-over to another node) [node=" + node + ", taskName=" + ses.getTaskName() + ", taskSesId=" + ses.getId() + ", jobSesId=" + res.getJobContext().getJobId() + ']');
ctx.resource().invokeAnnotated(dep, res.getJob(), ComputeJobAfterSend.class);
GridJobExecuteResponse fakeRes = new GridJobExecuteResponse(node.id(), ses.getId(), res.getJobContext().getJobId(), null, null, null, null, null, null, false, null);
fakeRes.setFakeException(new ClusterTopologyException("Failed to send job due to node failure: " + node));
onResponse(fakeRes);
} else {
long timeout = ses.getEndTime() == Long.MAX_VALUE ? Long.MAX_VALUE : ses.getEndTime() - U.currentTimeMillis();
if (timeout > 0) {
boolean loc = node.id().equals(ctx.discovery().localNode().id()) && !ctx.config().isMarshalLocalJobs();
Map<Object, Object> sesAttrs = ses.isFullSupport() ? ses.getAttributes() : null;
Map<? extends Serializable, ? extends Serializable> jobAttrs = (Map<? extends Serializable, ? extends Serializable>) res.getJobContext().getAttributes();
boolean forceLocDep = internal || !ctx.deploy().enabled();
req = new GridJobExecuteRequest(ses.getId(), res.getJobContext().getJobId(), ses.getTaskName(), ses.getUserVersion(), ses.getTaskClassName(), loc ? null : U.marshal(marsh, res.getJob()), loc ? res.getJob() : null, ses.getStartTime(), timeout, ses.getTopology(), loc ? null : U.marshal(marsh, ses.getJobSiblings()), loc ? ses.getJobSiblings() : null, loc ? null : U.marshal(marsh, sesAttrs), loc ? sesAttrs : null, loc ? null : U.marshal(marsh, jobAttrs), loc ? jobAttrs : null, ses.getCheckpointSpi(), dep.classLoaderId(), dep.deployMode(), continuous, dep.participants(), forceLocDep, ses.isFullSupport(), internal, subjId, affCacheIds, affPartId, mapTopVer, ses.executorName());
if (loc)
ctx.job().processJobExecuteRequest(ctx.discovery().localNode(), req);
else {
byte plc;
if (internal)
plc = MANAGEMENT_POOL;
else {
Byte ctxPlc = getThreadContext(TC_IO_POLICY);
if (ctxPlc != null)
plc = ctxPlc;
else
plc = PUBLIC_POOL;
}
// Send job execution request.
ctx.io().sendToGridTopic(node, TOPIC_JOB, req, plc);
if (log.isDebugEnabled())
log.debug("Sent job request [req=" + req + ", node=" + node + ']');
}
if (!loc)
ctx.resource().invokeAnnotated(dep, res.getJob(), ComputeJobAfterSend.class);
} else
U.warn(log, "Job timed out prior to sending job execution request: " + res.getJob());
}
} catch (IgniteCheckedException e) {
IgniteException fakeErr = null;
try {
boolean deadNode = isDeadNode(res.getNode().id());
// Avoid stack trace if node has left grid.
if (deadNode) {
U.warn(log, "Failed to send job request because remote node left grid (if failover is enabled, " + "will attempt fail-over to another node) [node=" + node + ", taskName=" + ses.getTaskName() + ", taskSesId=" + ses.getId() + ", jobSesId=" + res.getJobContext().getJobId() + ']');
fakeErr = new ClusterTopologyException("Failed to send job due to node failure: " + node, e);
} else
U.error(log, "Failed to send job request: " + req, e);
} catch (IgniteClientDisconnectedCheckedException e0) {
if (log.isDebugEnabled())
log.debug("Failed to send job request, client disconnected [node=" + node + ", taskName=" + ses.getTaskName() + ", taskSesId=" + ses.getId() + ", jobSesId=" + res.getJobContext().getJobId() + ']');
fakeErr = U.convertException(e0);
}
GridJobExecuteResponse fakeRes = new GridJobExecuteResponse(node.id(), ses.getId(), res.getJobContext().getJobId(), null, null, null, null, null, null, false, null);
if (fakeErr == null)
fakeErr = U.convertException(e);
fakeRes.setFakeException(fakeErr);
onResponse(fakeRes);
}
}
use of org.apache.ignite.cluster.ClusterTopologyException in project ignite by apache.
the class CacheContinuousQueryFailoverAbstractSelfTest method checkBackupQueue.
/**
* @param backups Number of backups.
* @param updateFromClient If {@code true} executes cache update from client node.
* @throws Exception If failed.
*/
private void checkBackupQueue(int backups, boolean updateFromClient) throws Exception {
this.backups = atomicityMode() == CacheAtomicityMode.ATOMIC ? backups : backups < 2 ? 2 : backups;
final int SRV_NODES = 4;
startGridsMultiThreaded(SRV_NODES);
client = true;
Ignite qryClient = startGrid(SRV_NODES);
client = false;
IgniteCache<Object, Object> qryClientCache = qryClient.cache(DEFAULT_CACHE_NAME);
Affinity<Object> aff = qryClient.affinity(DEFAULT_CACHE_NAME);
CacheEventListener1 lsnr = asyncCallback() ? new CacheEventAsyncListener1(false) : new CacheEventListener1(false);
ContinuousQuery<Object, Object> qry = new ContinuousQuery<>();
qry.setLocalListener(lsnr);
QueryCursor<?> cur = qryClientCache.query(qry);
int PARTS = 10;
Map<Object, T2<Object, Object>> updates = new HashMap<>();
List<T3<Object, Object, Object>> expEvts = new ArrayList<>();
for (int i = 0; i < (atomicityMode() == CacheAtomicityMode.ATOMIC ? SRV_NODES - 1 : SRV_NODES - 2); i++) {
log.info("Stop iteration: " + i);
TestCommunicationSpi spi = (TestCommunicationSpi) ignite(i).configuration().getCommunicationSpi();
Ignite ignite = ignite(i);
IgniteCache<Object, Object> cache = ignite.cache(DEFAULT_CACHE_NAME);
List<Integer> keys = testKeys(cache, PARTS);
CountDownLatch latch = new CountDownLatch(keys.size());
lsnr.latch = latch;
boolean first = true;
for (Integer key : keys) {
log.info("Put [node=" + ignite.name() + ", key=" + key + ", part=" + aff.partition(key) + ']');
T2<Object, Object> t = updates.get(key);
if (updateFromClient) {
if (atomicityMode() == CacheAtomicityMode.TRANSACTIONAL) {
try (Transaction tx = qryClient.transactions().txStart()) {
qryClientCache.put(key, key);
tx.commit();
} catch (CacheException | ClusterTopologyException ignored) {
log.warning("Failed put. [Key=" + key + ", val=" + key + "]");
continue;
}
} else
qryClientCache.put(key, key);
} else {
if (atomicityMode() == CacheAtomicityMode.TRANSACTIONAL) {
try (Transaction tx = ignite.transactions().txStart()) {
cache.put(key, key);
tx.commit();
} catch (CacheException | ClusterTopologyException ignored) {
log.warning("Failed put. [Key=" + key + ", val=" + key + "]");
continue;
}
} else
cache.put(key, key);
}
if (t == null) {
updates.put(key, new T2<>((Object) key, null));
expEvts.add(new T3<>((Object) key, (Object) key, null));
} else {
updates.put(key, new T2<>((Object) key, (Object) key));
expEvts.add(new T3<>((Object) key, (Object) key, (Object) key));
}
if (first) {
spi.skipMsg = true;
first = false;
}
}
stopGrid(i);
if (!latch.await(5, SECONDS)) {
Set<Integer> keys0 = new HashSet<>(keys);
keys0.removeAll(lsnr.keys);
log.info("Missed events for keys: " + keys0);
fail("Failed to wait for notifications [exp=" + keys.size() + ", left=" + lsnr.latch.getCount() + ']');
}
checkEvents(expEvts, lsnr);
}
for (int i = 0; i < (atomicityMode() == CacheAtomicityMode.ATOMIC ? SRV_NODES - 1 : SRV_NODES - 2); i++) {
log.info("Start iteration: " + i);
Ignite ignite = startGrid(i);
IgniteCache<Object, Object> cache = ignite.cache(DEFAULT_CACHE_NAME);
List<Integer> keys = testKeys(cache, PARTS);
CountDownLatch latch = new CountDownLatch(keys.size());
lsnr.latch = latch;
for (Integer key : keys) {
log.info("Put [node=" + ignite.name() + ", key=" + key + ", part=" + aff.partition(key) + ']');
T2<Object, Object> t = updates.get(key);
if (t == null) {
updates.put(key, new T2<>((Object) key, null));
expEvts.add(new T3<>((Object) key, (Object) key, null));
} else {
updates.put(key, new T2<>((Object) key, (Object) key));
expEvts.add(new T3<>((Object) key, (Object) key, (Object) key));
}
if (updateFromClient)
qryClientCache.put(key, key);
else
cache.put(key, key);
}
if (!latch.await(10, SECONDS)) {
Set<Integer> keys0 = new HashSet<>(keys);
keys0.removeAll(lsnr.keys);
log.info("Missed events for keys: " + keys0);
fail("Failed to wait for notifications [exp=" + keys.size() + ", left=" + lsnr.latch.getCount() + ']');
}
checkEvents(expEvts, lsnr);
}
cur.close();
assertFalse("Unexpected error during test, see log for details.", err);
}
use of org.apache.ignite.cluster.ClusterTopologyException in project ignite by apache.
the class GridFailoverTaskWithPredicateSelfTest method testJobFailedOver.
/**
* Tests that failover happens on three-node grid when the Task is applicable for the first node
* and fails on it, but is also applicable on another node.
*
* @throws Exception If failed.
*/
public void testJobFailedOver() throws Exception {
failed.set(false);
routed.set(false);
try {
Ignite ignite1 = startGrid(NODE1);
Ignite ignite2 = startGrid(NODE2);
Ignite ignite3 = startGrid(NODE3);
assert ignite1 != null;
assert ignite2 != null;
assert ignite3 != null;
Integer res = (Integer) compute(ignite1.cluster().forPredicate(p)).withTimeout(10000).execute(JobFailTask.class.getName(), "1");
assert res == 1;
} catch (ClusterTopologyException ignored) {
failed.set(true);
} finally {
assertFalse(failed.get());
assertTrue(routed.get());
stopGrid(NODE1);
stopGrid(NODE2);
stopGrid(NODE3);
}
}
use of org.apache.ignite.cluster.ClusterTopologyException in project ignite by apache.
the class GridFailoverTaskWithPredicateSelfTest method testJobNotFailedOver.
/**
* Tests that failover doesn't happen on two-node grid when the Task is applicable only for the first node
* and fails on it.
*
* @throws Exception If failed.
*/
public void testJobNotFailedOver() throws Exception {
failed.set(false);
routed.set(false);
try {
Ignite ignite1 = startGrid(NODE1);
Ignite ignite2 = startGrid(NODE2);
assert ignite1 != null;
assert ignite2 != null;
compute(ignite1.cluster().forPredicate(p)).withTimeout(10000).execute(JobFailTask.class.getName(), "1");
} catch (ClusterTopologyException ignored) {
failed.set(true);
} finally {
assertTrue(failed.get());
assertFalse(routed.get());
stopGrid(NODE1);
stopGrid(NODE2);
}
}
Aggregations