use of org.apache.ignite.internal.cluster.ClusterTopologyCheckedException in project ignite by apache.
the class IgniteTxManager method start0.
/** {@inheritDoc} */
@Override
protected void start0() throws IgniteCheckedException {
txFinishSync = new GridCacheTxFinishSync<>(cctx);
txHnd = new IgniteTxHandler(cctx);
deferredAckMsgSnd = new GridDeferredAckMessageSender<GridCacheVersion>(cctx.time(), cctx.kernalContext().closure()) {
@Override
public int getTimeout() {
return DEFERRED_ONE_PHASE_COMMIT_ACK_REQUEST_TIMEOUT;
}
@Override
public int getBufferSize() {
return DEFERRED_ONE_PHASE_COMMIT_ACK_REQUEST_BUFFER_SIZE;
}
@Override
public void finish(UUID nodeId, ConcurrentLinkedDeque8<GridCacheVersion> vers) {
GridDhtTxOnePhaseCommitAckRequest ackReq = new GridDhtTxOnePhaseCommitAckRequest(vers);
cctx.kernalContext().gateway().readLock();
try {
cctx.io().send(nodeId, ackReq, GridIoPolicy.SYSTEM_POOL);
} catch (ClusterTopologyCheckedException ignored) {
if (log.isDebugEnabled())
log.debug("Failed to send one phase commit ack to backup node because it left grid: " + nodeId);
} catch (IgniteCheckedException e) {
log.error("Failed to send one phase commit ack to backup node [backup=" + nodeId + ']', e);
} finally {
cctx.kernalContext().gateway().readUnlock();
}
}
};
cctx.gridEvents().addLocalEventListener(new GridLocalEventListener() {
@Override
public void onEvent(Event evt) {
assert evt instanceof DiscoveryEvent;
assert evt.type() == EVT_NODE_FAILED || evt.type() == EVT_NODE_LEFT;
DiscoveryEvent discoEvt = (DiscoveryEvent) evt;
UUID nodeId = discoEvt.eventNode().id();
// Wait some time in case there are some unprocessed messages from failed node.
cctx.time().addTimeoutObject(new NodeFailureTimeoutObject(nodeId));
if (txFinishSync != null)
txFinishSync.onNodeLeft(nodeId);
for (TxDeadlockFuture fut : deadlockDetectFuts.values()) fut.onNodeLeft(nodeId);
for (Map.Entry<GridCacheVersion, Object> entry : completedVersHashMap.entrySet()) {
Object obj = entry.getValue();
if (obj instanceof GridCacheReturnCompletableWrapper && nodeId.equals(((GridCacheReturnCompletableWrapper) obj).nodeId()))
removeTxReturn(entry.getKey());
}
}
}, EVT_NODE_FAILED, EVT_NODE_LEFT);
this.txDeadlockDetection = new TxDeadlockDetection(cctx);
cctx.gridIO().addMessageListener(TOPIC_TX, new DeadlockDetectionListener());
}
use of org.apache.ignite.internal.cluster.ClusterTopologyCheckedException in project ignite by apache.
the class IgniteUtils method exceptionConverters.
/**
* Gets map with converters to convert internal checked exceptions to public API unchecked exceptions.
*
* @return Exception converters.
*/
private static Map<Class<? extends IgniteCheckedException>, C1<IgniteCheckedException, IgniteException>> exceptionConverters() {
Map<Class<? extends IgniteCheckedException>, C1<IgniteCheckedException, IgniteException>> m = new HashMap<>();
m.put(IgniteInterruptedCheckedException.class, new C1<IgniteCheckedException, IgniteException>() {
@Override
public IgniteException apply(IgniteCheckedException e) {
return new IgniteInterruptedException(e.getMessage(), (InterruptedException) e.getCause());
}
});
m.put(IgniteFutureCancelledCheckedException.class, new C1<IgniteCheckedException, IgniteException>() {
@Override
public IgniteException apply(IgniteCheckedException e) {
return new IgniteFutureCancelledException(e.getMessage(), e);
}
});
m.put(IgniteFutureTimeoutCheckedException.class, new C1<IgniteCheckedException, IgniteException>() {
@Override
public IgniteException apply(IgniteCheckedException e) {
return new IgniteFutureTimeoutException(e.getMessage(), e);
}
});
m.put(ClusterGroupEmptyCheckedException.class, new C1<IgniteCheckedException, IgniteException>() {
@Override
public IgniteException apply(IgniteCheckedException e) {
return new ClusterGroupEmptyException(e.getMessage(), e);
}
});
m.put(ClusterTopologyCheckedException.class, new C1<IgniteCheckedException, IgniteException>() {
@Override
public IgniteException apply(IgniteCheckedException e) {
ClusterTopologyException topEx = new ClusterTopologyException(e.getMessage(), e);
ClusterTopologyCheckedException checked = (ClusterTopologyCheckedException) e;
if (checked.retryReadyFuture() != null)
topEx.retryReadyFuture(new IgniteFutureImpl<>(checked.retryReadyFuture()));
return topEx;
}
});
m.put(IgniteDeploymentCheckedException.class, new C1<IgniteCheckedException, IgniteException>() {
@Override
public IgniteException apply(IgniteCheckedException e) {
return new IgniteDeploymentException(e.getMessage(), e);
}
});
m.put(ComputeTaskTimeoutCheckedException.class, new C1<IgniteCheckedException, IgniteException>() {
@Override
public IgniteException apply(IgniteCheckedException e) {
return new ComputeTaskTimeoutException(e.getMessage(), e);
}
});
m.put(ComputeTaskCancelledCheckedException.class, new C1<IgniteCheckedException, IgniteException>() {
@Override
public IgniteException apply(IgniteCheckedException e) {
return new ComputeTaskCancelledException(e.getMessage(), e);
}
});
m.put(IgniteTxRollbackCheckedException.class, new C1<IgniteCheckedException, IgniteException>() {
@Override
public IgniteException apply(IgniteCheckedException e) {
return new TransactionRollbackException(e.getMessage(), e);
}
});
m.put(IgniteTxHeuristicCheckedException.class, new C1<IgniteCheckedException, IgniteException>() {
@Override
public IgniteException apply(IgniteCheckedException e) {
return new TransactionHeuristicException(e.getMessage(), e);
}
});
m.put(IgniteTxTimeoutCheckedException.class, new C1<IgniteCheckedException, IgniteException>() {
@Override
public IgniteException apply(IgniteCheckedException e) {
if (e.getCause() instanceof TransactionDeadlockException)
return new TransactionTimeoutException(e.getMessage(), e.getCause());
return new TransactionTimeoutException(e.getMessage(), e);
}
});
m.put(IgniteTxOptimisticCheckedException.class, new C1<IgniteCheckedException, IgniteException>() {
@Override
public IgniteException apply(IgniteCheckedException e) {
return new TransactionOptimisticException(e.getMessage(), e);
}
});
m.put(IgniteClientDisconnectedCheckedException.class, new C1<IgniteCheckedException, IgniteException>() {
@Override
public IgniteException apply(IgniteCheckedException e) {
return new IgniteClientDisconnectedException(((IgniteClientDisconnectedCheckedException) e).reconnectFuture(), e.getMessage(), e);
}
});
return m;
}
use of org.apache.ignite.internal.cluster.ClusterTopologyCheckedException in project ignite by apache.
the class TcpCommunicationSpi method createTcpClient.
/**
* Establish TCP connection to remote node and returns client.
*
* @param node Remote node.
* @param connIdx Connection index.
* @return Client.
* @throws IgniteCheckedException If failed.
*/
protected GridCommunicationClient createTcpClient(ClusterNode node, int connIdx) throws IgniteCheckedException {
Collection<String> rmtAddrs0 = node.attribute(createSpiAttributeName(ATTR_ADDRS));
Collection<String> rmtHostNames0 = node.attribute(createSpiAttributeName(ATTR_HOST_NAMES));
Integer boundPort = node.attribute(createSpiAttributeName(ATTR_PORT));
Collection<InetSocketAddress> extAddrs = node.attribute(createSpiAttributeName(ATTR_EXT_ADDRS));
boolean isRmtAddrsExist = (!F.isEmpty(rmtAddrs0) && boundPort != null);
boolean isExtAddrsExist = !F.isEmpty(extAddrs);
if (!isRmtAddrsExist && !isExtAddrsExist)
throw new IgniteCheckedException("Failed to send message to the destination node. Node doesn't have any " + "TCP communication addresses or mapped external addresses. Check configuration and make sure " + "that you use the same communication SPI on all nodes. Remote node id: " + node.id());
LinkedHashSet<InetSocketAddress> addrs;
// Try to connect first on bound addresses.
if (isRmtAddrsExist) {
List<InetSocketAddress> addrs0 = new ArrayList<>(U.toSocketAddresses(rmtAddrs0, rmtHostNames0, boundPort));
boolean sameHost = U.sameMacs(getSpiContext().localNode(), node);
Collections.sort(addrs0, U.inetAddressesComparator(sameHost));
addrs = new LinkedHashSet<>(addrs0);
} else
addrs = new LinkedHashSet<>();
// Then on mapped external addresses.
if (isExtAddrsExist)
addrs.addAll(extAddrs);
Set<InetAddress> allInetAddrs = U.newHashSet(addrs.size());
for (InetSocketAddress addr : addrs) {
// Skip unresolved as addr.getAddress() can return null.
if (!addr.isUnresolved())
allInetAddrs.add(addr.getAddress());
}
List<InetAddress> reachableInetAddrs = U.filterReachable(allInetAddrs);
if (reachableInetAddrs.size() < allInetAddrs.size()) {
LinkedHashSet<InetSocketAddress> addrs0 = U.newLinkedHashSet(addrs.size());
List<InetSocketAddress> unreachableInetAddr = new ArrayList<>(allInetAddrs.size() - reachableInetAddrs.size());
for (InetSocketAddress addr : addrs) {
if (reachableInetAddrs.contains(addr.getAddress()))
addrs0.add(addr);
else
unreachableInetAddr.add(addr);
}
addrs0.addAll(unreachableInetAddr);
addrs = addrs0;
}
if (log.isDebugEnabled())
log.debug("Addresses to connect for node [rmtNode=" + node.id() + ", addrs=" + addrs.toString() + ']');
boolean conn = false;
GridCommunicationClient client = null;
IgniteCheckedException errs = null;
int connectAttempts = 1;
for (InetSocketAddress addr : addrs) {
long connTimeout0 = connTimeout;
int attempt = 1;
IgniteSpiOperationTimeoutHelper timeoutHelper = new IgniteSpiOperationTimeoutHelper(this, !node.isClient());
while (!conn) {
// Reconnection on handshake timeout.
try {
SocketChannel ch = SocketChannel.open();
ch.configureBlocking(true);
ch.socket().setTcpNoDelay(tcpNoDelay);
ch.socket().setKeepAlive(true);
if (sockRcvBuf > 0)
ch.socket().setReceiveBufferSize(sockRcvBuf);
if (sockSndBuf > 0)
ch.socket().setSendBufferSize(sockSndBuf);
if (getSpiContext().node(node.id()) == null) {
U.closeQuiet(ch);
throw new ClusterTopologyCheckedException("Failed to send message " + "(node left topology): " + node);
}
ConnectionKey connKey = new ConnectionKey(node.id(), connIdx, -1);
GridNioRecoveryDescriptor recoveryDesc = outRecoveryDescriptor(node, connKey);
if (!recoveryDesc.reserve()) {
U.closeQuiet(ch);
return null;
}
long rcvCnt = -1;
Map<Integer, Object> meta = new HashMap<>();
GridSslMeta sslMeta = null;
try {
ch.socket().connect(addr, (int) timeoutHelper.nextTimeoutChunk(connTimeout));
if (isSslEnabled()) {
meta.put(SSL_META.ordinal(), sslMeta = new GridSslMeta());
SSLEngine sslEngine = ignite.configuration().getSslContextFactory().create().createSSLEngine();
sslEngine.setUseClientMode(true);
sslMeta.sslEngine(sslEngine);
}
Integer handshakeConnIdx = connIdx;
rcvCnt = safeHandshake(ch, recoveryDesc, node.id(), timeoutHelper.nextTimeoutChunk(connTimeout0), sslMeta, handshakeConnIdx);
if (rcvCnt == -1)
return null;
} finally {
if (recoveryDesc != null && rcvCnt == -1)
recoveryDesc.release();
}
try {
meta.put(CONN_IDX_META, connKey);
if (recoveryDesc != null) {
recoveryDesc.onHandshake(rcvCnt);
meta.put(-1, recoveryDesc);
}
GridNioSession ses = nioSrvr.createSession(ch, meta).get();
client = new GridTcpNioCommunicationClient(connIdx, ses, log);
conn = true;
} finally {
if (!conn) {
if (recoveryDesc != null)
recoveryDesc.release();
}
}
} catch (HandshakeTimeoutException | IgniteSpiOperationTimeoutException e) {
if (client != null) {
client.forceClose();
client = null;
}
if (failureDetectionTimeoutEnabled() && (e instanceof HandshakeTimeoutException || timeoutHelper.checkFailureTimeoutReached(e))) {
String msg = "Handshake timed out (failure detection timeout is reached) " + "[failureDetectionTimeout=" + failureDetectionTimeout() + ", addr=" + addr + ']';
onException(msg, e);
if (log.isDebugEnabled())
log.debug(msg);
if (errs == null)
errs = new IgniteCheckedException("Failed to connect to node (is node still alive?). " + "Make sure that each ComputeTask and cache Transaction has a timeout set " + "in order to prevent parties from waiting forever in case of network issues " + "[nodeId=" + node.id() + ", addrs=" + addrs + ']');
errs.addSuppressed(new IgniteCheckedException("Failed to connect to address: " + addr, e));
break;
}
assert !failureDetectionTimeoutEnabled();
onException("Handshake timed out (will retry with increased timeout) [timeout=" + connTimeout0 + ", addr=" + addr + ']', e);
if (log.isDebugEnabled())
log.debug("Handshake timed out (will retry with increased timeout) [timeout=" + connTimeout0 + ", addr=" + addr + ", err=" + e + ']');
if (attempt == reconCnt || connTimeout0 > maxConnTimeout) {
if (log.isDebugEnabled())
log.debug("Handshake timedout (will stop attempts to perform the handshake) " + "[timeout=" + connTimeout0 + ", maxConnTimeout=" + maxConnTimeout + ", attempt=" + attempt + ", reconCnt=" + reconCnt + ", err=" + e.getMessage() + ", addr=" + addr + ']');
if (errs == null)
errs = new IgniteCheckedException("Failed to connect to node (is node still alive?). " + "Make sure that each ComputeTask and cache Transaction has a timeout set " + "in order to prevent parties from waiting forever in case of network issues " + "[nodeId=" + node.id() + ", addrs=" + addrs + ']');
errs.addSuppressed(new IgniteCheckedException("Failed to connect to address: " + addr, e));
break;
} else {
attempt++;
connTimeout0 *= 2;
// Continue loop.
}
} catch (Exception e) {
if (client != null) {
client.forceClose();
client = null;
}
onException("Client creation failed [addr=" + addr + ", err=" + e + ']', e);
if (log.isDebugEnabled())
log.debug("Client creation failed [addr=" + addr + ", err=" + e + ']');
boolean failureDetThrReached = timeoutHelper.checkFailureTimeoutReached(e);
if (failureDetThrReached)
LT.warn(log, "Connect timed out (consider increasing 'failureDetectionTimeout' " + "configuration property) [addr=" + addr + ", failureDetectionTimeout=" + failureDetectionTimeout() + ']');
else if (X.hasCause(e, SocketTimeoutException.class))
LT.warn(log, "Connect timed out (consider increasing 'connTimeout' " + "configuration property) [addr=" + addr + ", connTimeout=" + connTimeout + ']');
if (errs == null)
errs = new IgniteCheckedException("Failed to connect to node (is node still alive?). " + "Make sure that each ComputeTask and cache Transaction has a timeout set " + "in order to prevent parties from waiting forever in case of network issues " + "[nodeId=" + node.id() + ", addrs=" + addrs + ']');
errs.addSuppressed(new IgniteCheckedException("Failed to connect to address: " + addr, e));
// Reconnect for the second time, if connection is not established.
if (!failureDetThrReached && connectAttempts < 2 && (e instanceof ConnectException || X.hasCause(e, ConnectException.class))) {
connectAttempts++;
continue;
}
break;
}
}
if (conn)
break;
}
if (client == null) {
assert errs != null;
if (X.hasCause(errs, ConnectException.class))
LT.warn(log, "Failed to connect to a remote node " + "(make sure that destination node is alive and " + "operating system firewall is disabled on local and remote hosts) " + "[addrs=" + addrs + ']');
if (getSpiContext().node(node.id()) != null && (CU.clientNode(node) || !CU.clientNode(getLocalNode())) && X.hasCause(errs, ConnectException.class, SocketTimeoutException.class, HandshakeTimeoutException.class, IgniteSpiOperationTimeoutException.class)) {
LT.warn(log, "TcpCommunicationSpi failed to establish connection to node, node will be dropped from " + "cluster [" + "rmtNode=" + node + ", err=" + errs + ", connectErrs=" + Arrays.toString(errs.getSuppressed()) + ']');
getSpiContext().failNode(node.id(), "TcpCommunicationSpi failed to establish connection to node [" + "rmtNode=" + node + ", errs=" + errs + ", connectErrs=" + Arrays.toString(errs.getSuppressed()) + ']');
}
throw errs;
}
return client;
}
Aggregations