use of org.apache.ignite.internal.cluster.ClusterTopologyCheckedException in project gridgain by gridgain.
the class GridDhtTxPrepareFuture method sendPrepareRequests.
/**
*/
private void sendPrepareRequests() {
assert !tx.txState().mvccEnabled() || !tx.onePhaseCommit() || tx.mvccSnapshot() != null;
int miniId = 0;
assert tx.transactionNodes() != null;
final long timeout = timeoutObj != null ? timeoutObj.timeout : 0;
// Do not need process active transactions on backups.
MvccSnapshot mvccSnapshot = tx.mvccSnapshot();
if (mvccSnapshot != null)
mvccSnapshot = mvccSnapshot.withoutActiveTransactions();
// Create mini futures.
for (GridDistributedTxMapping dhtMapping : tx.dhtMap().values()) {
assert !dhtMapping.empty() || dhtMapping.queryUpdate();
ClusterNode n = dhtMapping.primary();
assert !n.isLocal();
GridDistributedTxMapping nearMapping = tx.nearMap().get(n.id());
Collection<IgniteTxEntry> nearWrites = nearMapping == null ? null : nearMapping.writes();
Collection<IgniteTxEntry> dhtWrites = dhtMapping.writes();
if (!dhtMapping.queryUpdate() && F.isEmpty(dhtWrites) && F.isEmpty(nearWrites))
continue;
MiniFuture fut = new MiniFuture(n.id(), ++miniId, dhtMapping, nearMapping);
// Append new future.
add(fut);
assert req.transactionNodes() != null;
GridDhtTxPrepareRequest req = new GridDhtTxPrepareRequest(futId, fut.futureId(), tx.topologyVersion(), tx, timeout, dhtWrites, nearWrites, this.req.transactionNodes(), tx.nearXidVersion(), true, tx.onePhaseCommit(), tx.subjectId(), tx.taskNameHash(), tx.activeCachesDeploymentEnabled(), tx.storeWriteThrough(), retVal, mvccSnapshot, IgniteFeatures.nodeSupports(cctx.kernalContext(), n, IgniteFeatures.TX_TRACKING_UPDATE_COUNTER) ? cctx.tm().txHandler().filterUpdateCountersForBackupNode(tx, n) : null);
req.queryUpdate(dhtMapping.queryUpdate());
int idx = 0;
for (IgniteTxEntry entry : dhtWrites) {
try {
GridDhtCacheEntry cached = (GridDhtCacheEntry) entry.cached();
GridCacheContext<?, ?> cacheCtx = cached.context();
// Do not invalidate near entry on originating transaction node.
req.invalidateNearEntry(idx, !tx.nearNodeId().equals(n.id()) && cached.readerId(n.id()) != null);
if (cached.isNewLocked()) {
List<ClusterNode> owners = cacheCtx.topology().owners(cached.partition(), tx != null ? tx.topologyVersion() : cacheCtx.affinity().affinityTopologyVersion());
// Do not preload if local node is a partition owner.
if (!owners.contains(cctx.localNode()))
req.markKeyForPreload(idx);
}
break;
} catch (GridCacheEntryRemovedException e) {
log.error("Got removed exception on entry with dht local candidate. Transaction will be " + "rolled back. Entry: " + entry + " tx: " + CU.txDump(tx), e);
// Entry was unlocked by concurrent rollback.
onError(tx.rollbackException());
}
idx++;
}
if (!F.isEmpty(nearWrites)) {
for (IgniteTxEntry entry : nearWrites) {
try {
if (entry.explicitVersion() == null) {
GridCacheMvccCandidate added = entry.cached().candidate(version());
assert added != null : "Missing candidate for cache entry:" + entry;
assert added.dhtLocal();
if (added.ownerVersion() != null)
req.owned(entry.txKey(), added.ownerVersion());
}
break;
} catch (GridCacheEntryRemovedException e) {
log.error("Got removed exception on entry with dht local candidate. Transaction will be " + "rolled back. Entry: " + entry + " tx: " + CU.txDump(tx), e);
// Entry was unlocked by concurrent rollback.
onError(tx.rollbackException());
}
}
}
assert req.transactionNodes() != null;
try {
cctx.io().send(n, req, tx.ioPolicy());
if (msgLog.isDebugEnabled()) {
msgLog.debug("DHT prepare fut, sent request dht [txId=" + tx.nearXidVersion() + ", dhtTxId=" + tx.xidVersion() + ", node=" + n.id() + ']');
}
} catch (ClusterTopologyCheckedException ignored) {
fut.onNodeLeft();
} catch (IgniteCheckedException e) {
if (msgLog.isDebugEnabled()) {
msgLog.debug("DHT prepare fut, failed to send request dht [txId=" + tx.nearXidVersion() + ", dhtTxId=" + tx.xidVersion() + ", node=" + n.id() + ']');
}
fut.onResult(e);
}
}
for (GridDistributedTxMapping nearMapping : tx.nearMap().values()) {
if (!tx.dhtMap().containsKey(nearMapping.primary().id())) {
if (tx.remainingTime() == -1)
return;
MiniFuture fut = new MiniFuture(nearMapping.primary().id(), ++miniId, null, nearMapping);
// Append new future.
add(fut);
GridDhtTxPrepareRequest req = new GridDhtTxPrepareRequest(futId, fut.futureId(), tx.topologyVersion(), tx, timeout, null, nearMapping.writes(), tx.transactionNodes(), tx.nearXidVersion(), true, tx.onePhaseCommit(), tx.subjectId(), tx.taskNameHash(), tx.activeCachesDeploymentEnabled(), tx.storeWriteThrough(), retVal, mvccSnapshot, null);
for (IgniteTxEntry entry : nearMapping.entries()) {
if (CU.writes().apply(entry)) {
try {
if (entry.explicitVersion() == null) {
GridCacheMvccCandidate added = entry.cached().candidate(version());
assert added != null : "Null candidate for non-group-lock entry " + "[added=" + added + ", entry=" + entry + ']';
assert added.dhtLocal() : "Got non-dht-local candidate for prepare future" + "[added=" + added + ", entry=" + entry + ']';
if (added != null && added.ownerVersion() != null)
req.owned(entry.txKey(), added.ownerVersion());
}
break;
} catch (GridCacheEntryRemovedException e) {
log.error("Got removed exception on entry with dht local candidate. Transaction will be " + "rolled back. Entry: " + entry + " tx: " + CU.txDump(tx), e);
// Entry was unlocked by concurrent rollback.
onError(tx.rollbackException());
}
}
}
assert req.transactionNodes() != null;
try {
cctx.io().send(nearMapping.primary(), req, tx.ioPolicy());
if (msgLog.isDebugEnabled()) {
msgLog.debug("DHT prepare fut, sent request near [txId=" + tx.nearXidVersion() + ", dhtTxId=" + tx.xidVersion() + ", node=" + nearMapping.primary().id() + ']');
}
} catch (ClusterTopologyCheckedException ignored) {
fut.onNodeLeft();
} catch (IgniteCheckedException e) {
if (!cctx.kernalContext().isStopping()) {
if (msgLog.isDebugEnabled()) {
msgLog.debug("DHT prepare fut, failed to send request near [txId=" + tx.nearXidVersion() + ", dhtTxId=" + tx.xidVersion() + ", node=" + nearMapping.primary().id() + ']');
}
fut.onResult(e);
} else {
if (msgLog.isDebugEnabled()) {
msgLog.debug("DHT prepare fut, failed to send request near, ignore [txId=" + tx.nearXidVersion() + ", dhtTxId=" + tx.xidVersion() + ", node=" + nearMapping.primary().id() + ", err=" + e + ']');
}
}
}
}
}
}
use of org.apache.ignite.internal.cluster.ClusterTopologyCheckedException in project gridgain by gridgain.
the class GridDhtTxAbstractEnlistFuture method sendBatch.
/**
* Send batch request to remote data node.
*
* @param batch Batch.
*/
private void sendBatch(Batch batch) throws IgniteCheckedException {
assert batch != null && !batch.node().isLocal();
ClusterNode node = batch.node();
updateMappings(node);
GridDhtTxQueryEnlistRequest req;
if (newRemoteTx(node))
addNewRemoteTxNode(node);
if (firstReqSent.add(node)) {
// If this is a first request to this node, send full info.
req = new GridDhtTxQueryFirstEnlistRequest(cctx.cacheId(), futId, cctx.localNodeId(), tx.topologyVersionSnapshot(), lockVer, mvccSnapshot.withoutActiveTransactions(), tx.remainingTime(), tx.taskNameHash(), nearNodeId, nearLockVer, it.operation(), FIRST_BATCH_ID, batch.keys(), batch.values());
} else {
// Send only keys, values, LockVersion and batchId if this is not a first request to this backup.
req = new GridDhtTxQueryEnlistRequest(cctx.cacheId(), futId, lockVer, it.operation(), ++batchIdCntr, mvccSnapshot.operationCounter(), batch.keys(), batch.values());
}
ConcurrentMap<Integer, Batch> pending0 = null;
if (pending == null)
pending = new ConcurrentHashMap<>();
else
pending0 = pending.get(node.id());
if (pending0 == null)
pending.put(node.id(), pending0 = new ConcurrentHashMap<>());
Batch prev = pending0.put(req.batchId(), batch);
assert prev == null;
try {
cctx.io().send(node, req, cctx.ioPolicy());
} catch (ClusterTopologyCheckedException e) {
// backup node left the grid, will continue.
onNodeLeft(node.id());
}
}
use of org.apache.ignite.internal.cluster.ClusterTopologyCheckedException in project gridgain by gridgain.
the class GridDhtTxFinishFuture method finish.
/**
* @param commit Commit flag.
* @param dhtMap DHT map.
* @param nearMap Near map.
* @return {@code True} in case there is at least one synchronous {@code MiniFuture} to wait for.
*/
private boolean finish(boolean commit, Map<UUID, GridDistributedTxMapping> dhtMap, Map<UUID, GridDistributedTxMapping> nearMap) {
if (tx.onePhaseCommit())
return false;
assert !commit || !tx.txState().mvccEnabled() || tx.mvccSnapshot() != null || F.isEmpty(tx.writeEntries());
boolean sync = tx.syncMode() == FULL_SYNC;
if (tx.explicitLock() || tx.queryEnlisted())
sync = true;
boolean res = false;
int miniId = 0;
// Do not need process active transactions on backups.
MvccSnapshot mvccSnapshot = tx.mvccSnapshot();
if (mvccSnapshot != null)
mvccSnapshot = mvccSnapshot.withoutActiveTransactions();
// Create mini futures.
for (GridDistributedTxMapping dhtMapping : dhtMap.values()) {
ClusterNode n = dhtMapping.primary();
assert !n.isLocal();
GridDistributedTxMapping nearMapping = nearMap.get(n.id());
if (!dhtMapping.queryUpdate() && dhtMapping.empty() && nearMapping != null && nearMapping.empty())
// Nothing to send.
continue;
MiniFuture fut = new MiniFuture(++miniId, dhtMapping, nearMapping);
// Append new future.
add(fut);
Collection<Long> updCntrs = null;
if (!IgniteFeatures.nodeSupports(cctx.kernalContext(), n, IgniteFeatures.TX_TRACKING_UPDATE_COUNTER)) {
updCntrs = new ArrayList<>(dhtMapping.entries().size());
for (IgniteTxEntry e : dhtMapping.entries()) {
assert e.op() != CREATE && e.op() != UPDATE && e.op() != DELETE || e.updateCounter() != 0 : e;
updCntrs.add(e.updateCounter());
}
}
GridDhtTxFinishRequest req = new GridDhtTxFinishRequest(tx.nearNodeId(), futId, fut.futureId(), tx.topologyVersion(), tx.xidVersion(), tx.commitVersion(), tx.threadId(), tx.isolation(), commit, tx.isInvalidate(), tx.system(), tx.ioPolicy(), tx.isSystemInvalidate(), sync || !commit ? FULL_SYNC : tx.syncMode(), tx.completedBase(), tx.committedVersions(), tx.rolledbackVersions(), tx.pendingVersions(), tx.size(), tx.subjectId(), tx.taskNameHash(), tx.activeCachesDeploymentEnabled(), updCntrs, false, false, mvccSnapshot, commit ? null : cctx.tm().txHandler().filterUpdateCountersForBackupNode(tx, n));
req.writeVersion(tx.writeVersion() != null ? tx.writeVersion() : tx.xidVersion());
try {
if (isNull(cctx.discovery().getAlive(n.id()))) {
log.error("Unable to send message (node left topology): " + n);
fut.onNodeLeft(new ClusterTopologyCheckedException("Node left grid while sending message to: " + n.id()));
} else {
cctx.io().send(n, req, tx.ioPolicy());
if (msgLog.isDebugEnabled()) {
msgLog.debug("DHT finish fut, sent request dht [txId=" + tx.nearXidVersion() + ", dhtTxId=" + tx.xidVersion() + ", node=" + n.id() + ']');
}
if (sync || !commit)
// Force sync mode for rollback to prevent an issue with concurrent recovery.
res = true;
else
fut.onDone();
}
} catch (IgniteCheckedException e) {
// Fail the whole thing.
if (e instanceof ClusterTopologyCheckedException)
fut.onNodeLeft((ClusterTopologyCheckedException) e);
else {
if (msgLog.isDebugEnabled()) {
msgLog.debug("DHT finish fut, failed to send request dht [txId=" + tx.nearXidVersion() + ", dhtTxId=" + tx.xidVersion() + ", node=" + n.id() + ", err=" + e + ']');
}
fut.onResult(e);
}
}
}
for (GridDistributedTxMapping nearMapping : nearMap.values()) {
if (!dhtMap.containsKey(nearMapping.primary().id())) {
if (nearMapping.empty())
// Nothing to send.
continue;
MiniFuture fut = new MiniFuture(++miniId, null, nearMapping);
// Append new future.
add(fut);
GridDhtTxFinishRequest req = new GridDhtTxFinishRequest(tx.nearNodeId(), futId, fut.futureId(), tx.topologyVersion(), tx.xidVersion(), tx.commitVersion(), tx.threadId(), tx.isolation(), commit, tx.isInvalidate(), tx.system(), tx.ioPolicy(), tx.isSystemInvalidate(), sync ? FULL_SYNC : tx.syncMode(), tx.completedBase(), tx.committedVersions(), tx.rolledbackVersions(), tx.pendingVersions(), tx.size(), tx.subjectId(), tx.taskNameHash(), tx.activeCachesDeploymentEnabled(), false, false, mvccSnapshot, null);
req.writeVersion(tx.writeVersion());
try {
cctx.io().send(nearMapping.primary(), req, tx.ioPolicy());
if (msgLog.isDebugEnabled()) {
msgLog.debug("DHT finish fut, sent request near [txId=" + tx.nearXidVersion() + ", dhtTxId=" + tx.xidVersion() + ", node=" + nearMapping.primary().id() + ']');
}
if (sync)
res = true;
else
fut.onDone();
} catch (IgniteCheckedException e) {
// Fail the whole thing.
if (e instanceof ClusterTopologyCheckedException)
fut.onNodeLeft((ClusterTopologyCheckedException) e);
else {
if (msgLog.isDebugEnabled()) {
msgLog.debug("DHT finish fut, failed to send request near [txId=" + tx.nearXidVersion() + ", dhtTxId=" + tx.xidVersion() + ", node=" + nearMapping.primary().id() + ", err=" + e + ']');
}
fut.onResult(e);
}
}
}
}
return res;
}
use of org.apache.ignite.internal.cluster.ClusterTopologyCheckedException in project gridgain by gridgain.
the class ZookeeperDiscoveryImpl method resolveCommunicationError.
/**
* @param node0 Problem node ID
* @param err Connect error.
*/
public void resolveCommunicationError(ClusterNode node0, Exception err) {
if (node0.isClient())
return;
ZookeeperClusterNode node = node(node0.id());
if (node == null)
throw new IgniteSpiException(new ClusterTopologyCheckedException("Node failed: " + node0.id()));
IgniteInternalFuture<Boolean> nodeStatusFut;
for (; ; ) {
checkState();
ZkCommunicationErrorProcessFuture fut = commErrProcFut.get();
if (fut == null || fut.isDone()) {
ZkCommunicationErrorProcessFuture newFut = ZkCommunicationErrorProcessFuture.createOnCommunicationError(this, node.sessionTimeout() + 1000);
stats.onCommunicationError();
if (commErrProcFut.compareAndSet(fut, newFut)) {
fut = newFut;
if (log.isInfoEnabled()) {
log.info("Created new communication error process future [errNode=" + node0.id() + ", err=" + err + ']');
}
try {
checkState();
} catch (Exception e) {
fut.onError(e);
throw e;
}
fut.scheduleCheckOnTimeout();
} else {
fut = commErrProcFut.get();
if (fut == null)
continue;
}
}
nodeStatusFut = fut.nodeStatusFuture(node);
if (nodeStatusFut != null)
break;
else {
try {
fut.get();
} catch (IgniteCheckedException e) {
U.warn(log, "Previous communication error process future failed: " + e);
}
}
}
try {
if (!nodeStatusFut.get())
throw new IgniteSpiException(new ClusterTopologyCheckedException("Node failed: " + node0.id()));
} catch (IgniteCheckedException e) {
throw new IgniteSpiException(e);
}
}
use of org.apache.ignite.internal.cluster.ClusterTopologyCheckedException in project gridgain by gridgain.
the class ZookeeperDiscoveryCommunicationFailureTest method testCommunicationFailureResolve_KillCoordinator_5.
/**
* @throws Exception If failed.
*/
@Test
public void testCommunicationFailureResolve_KillCoordinator_5() throws Exception {
sesTimeout = 2000;
testCommSpi = true;
commFailureRslvr = KillCoordinatorCommunicationFailureResolver.FACTORY;
startGrids(10);
int crd = 0;
int nodeIdx = 10;
for (int i = 0; i < GridTestUtils.SF.applyLB(4, 2); i++) {
info("Iteration: " + i);
for (Ignite node : G.allGrids()) ZkTestCommunicationSpi.testSpi(node).initCheckResult(10);
UUID crdId = ignite(crd).cluster().localNode().id();
ZookeeperDiscoverySpi spi = spi(ignite(crd + 1));
try {
spi.resolveCommunicationFailure(spi.getNode(crdId), new Exception("test"));
fail("Exception is not thrown");
} catch (IgniteSpiException e) {
assertTrue("Unexpected exception: " + e, e.getCause() instanceof ClusterTopologyCheckedException);
}
waitForTopology(9);
startGrid(nodeIdx++);
waitForTopology(10);
crd++;
}
}
Aggregations