Search in sources :

Example 1 with IgniteSpiOperationTimeoutHelper

use of org.apache.ignite.spi.IgniteSpiOperationTimeoutHelper in project ignite by apache.

the class ServerImpl method pingNode.

/**
     * Pings the node by its address to see if it's alive.
     *
     * @param addr Address of the node.
     * @param nodeId Node ID to ping. In case when client node ID is not null this node ID is an ID of the router node.
     * @param clientNodeId Client node ID.
     * @return ID of the remote node and "client exists" flag if node alive or {@code null} if the remote node has
     *         left a topology during the ping process.
     * @throws IgniteCheckedException If an error occurs.
     */
@Nullable
private IgniteBiTuple<UUID, Boolean> pingNode(InetSocketAddress addr, @Nullable UUID nodeId, @Nullable UUID clientNodeId) throws IgniteCheckedException {
    assert addr != null;
    UUID locNodeId = getLocalNodeId();
    IgniteSpiOperationTimeoutHelper timeoutHelper = new IgniteSpiOperationTimeoutHelper(spi, clientNodeId == null);
    if (F.contains(spi.locNodeAddrs, addr)) {
        if (clientNodeId == null)
            return F.t(getLocalNodeId(), false);
        ClientMessageWorker clientWorker = clientMsgWorkers.get(clientNodeId);
        if (clientWorker == null)
            return F.t(getLocalNodeId(), false);
        boolean clientPingRes;
        try {
            clientPingRes = clientWorker.ping(timeoutHelper);
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new IgniteInterruptedCheckedException(e);
        }
        return F.t(getLocalNodeId(), clientPingRes);
    }
    GridPingFutureAdapter<IgniteBiTuple<UUID, Boolean>> fut = new GridPingFutureAdapter<>();
    GridPingFutureAdapter<IgniteBiTuple<UUID, Boolean>> oldFut = pingMap.putIfAbsent(addr, fut);
    if (oldFut != null)
        return oldFut.get();
    else {
        Collection<Throwable> errs = null;
        try {
            Socket sock = null;
            int reconCnt = 0;
            boolean openedSock = false;
            while (true) {
                try {
                    if (addr.isUnresolved())
                        addr = new InetSocketAddress(InetAddress.getByName(addr.getHostName()), addr.getPort());
                    long tstamp = U.currentTimeMillis();
                    sock = spi.createSocket();
                    fut.sock = sock;
                    sock = spi.openSocket(sock, addr, timeoutHelper);
                    openedSock = true;
                    spi.writeToSocket(sock, new TcpDiscoveryPingRequest(locNodeId, clientNodeId), timeoutHelper.nextTimeoutChunk(spi.getSocketTimeout()));
                    TcpDiscoveryPingResponse res = spi.readMessage(sock, null, timeoutHelper.nextTimeoutChunk(spi.getAckTimeout()));
                    if (locNodeId.equals(res.creatorNodeId())) {
                        if (log.isDebugEnabled())
                            log.debug("Ping response from local node: " + res);
                        break;
                    }
                    spi.stats.onClientSocketInitialized(U.currentTimeMillis() - tstamp);
                    IgniteBiTuple<UUID, Boolean> t = F.t(res.creatorNodeId(), res.clientExists());
                    fut.onDone(t);
                    return t;
                } catch (IOException | IgniteCheckedException e) {
                    if (nodeId != null && !nodeAlive(nodeId)) {
                        if (log.isDebugEnabled())
                            log.debug("Failed to ping the node (has left or leaving topology): [nodeId=" + nodeId + ']');
                        fut.onDone((IgniteBiTuple<UUID, Boolean>) null);
                        return null;
                    }
                    if (errs == null)
                        errs = new ArrayList<>();
                    errs.add(e);
                    reconCnt++;
                    if (!openedSock && reconCnt == 2)
                        break;
                    if (timeoutHelper.checkFailureTimeoutReached(e))
                        break;
                    else if (!spi.failureDetectionTimeoutEnabled() && reconCnt == spi.getReconnectCount())
                        break;
                } finally {
                    U.closeQuiet(sock);
                }
            }
        } catch (Throwable t) {
            fut.onDone(t);
            if (t instanceof Error)
                throw t;
            throw U.cast(t);
        } finally {
            if (!fut.isDone())
                fut.onDone(U.exceptionWithSuppressed("Failed to ping node by address: " + addr, errs));
            boolean b = pingMap.remove(addr, fut);
            assert b;
        }
        return fut.get();
    }
}
Also used : IgniteBiTuple(org.apache.ignite.lang.IgniteBiTuple) InetSocketAddress(java.net.InetSocketAddress) TcpDiscoveryPingResponse(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryPingResponse) IOException(java.io.IOException) IgniteSpiOperationTimeoutHelper(org.apache.ignite.spi.IgniteSpiOperationTimeoutHelper) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) UUID(java.util.UUID) TcpDiscoveryPingRequest(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryPingRequest) ServerSocket(java.net.ServerSocket) Socket(java.net.Socket) Nullable(org.jetbrains.annotations.Nullable)

Example 2 with IgniteSpiOperationTimeoutHelper

use of org.apache.ignite.spi.IgniteSpiOperationTimeoutHelper in project ignite by apache.

the class TcpCommunicationSpi method createShmemClient.

/**
     * @param node Node.
     * @param port Port.
     * @param connIdx Connection index.
     * @return Client.
     * @throws IgniteCheckedException If failed.
     */
@Nullable
private GridCommunicationClient createShmemClient(ClusterNode node, int connIdx, Integer port) throws IgniteCheckedException {
    int attempt = 1;
    int connectAttempts = 1;
    long connTimeout0 = connTimeout;
    IgniteSpiOperationTimeoutHelper timeoutHelper = new IgniteSpiOperationTimeoutHelper(this, !node.isClient());
    while (true) {
        GridCommunicationClient client;
        try {
            client = new GridShmemCommunicationClient(connIdx, metricsLsnr, port, timeoutHelper.nextTimeoutChunk(connTimeout), log, getSpiContext().messageFormatter());
        } catch (IgniteCheckedException e) {
            if (timeoutHelper.checkFailureTimeoutReached(e))
                throw e;
            // Reconnect for the second time, if connection is not established.
            if (connectAttempts < 2 && X.hasCause(e, ConnectException.class)) {
                connectAttempts++;
                continue;
            }
            throw e;
        }
        try {
            safeHandshake(client, null, node.id(), timeoutHelper.nextTimeoutChunk(connTimeout0), null, null);
        } catch (HandshakeTimeoutException | IgniteSpiOperationTimeoutException e) {
            client.forceClose();
            if (failureDetectionTimeoutEnabled() && (e instanceof HandshakeTimeoutException || timeoutHelper.checkFailureTimeoutReached(e))) {
                if (log.isDebugEnabled())
                    log.debug("Handshake timed out (failure threshold reached) [failureDetectionTimeout=" + failureDetectionTimeout() + ", err=" + e.getMessage() + ", client=" + client + ']');
                throw e;
            }
            assert !failureDetectionTimeoutEnabled();
            if (log.isDebugEnabled())
                log.debug("Handshake timed out (will retry with increased timeout) [timeout=" + connTimeout0 + ", err=" + e.getMessage() + ", client=" + client + ']');
            if (attempt == reconCnt || connTimeout0 > maxConnTimeout) {
                if (log.isDebugEnabled())
                    log.debug("Handshake timedout (will stop attempts to perform the handshake) " + "[timeout=" + connTimeout0 + ", maxConnTimeout=" + maxConnTimeout + ", attempt=" + attempt + ", reconCnt=" + reconCnt + ", err=" + e.getMessage() + ", client=" + client + ']');
                throw e;
            } else {
                attempt++;
                connTimeout0 *= 2;
                continue;
            }
        } catch (IgniteCheckedException | RuntimeException | Error e) {
            if (log.isDebugEnabled())
                log.debug("Caught exception (will close client) [err=" + e.getMessage() + ", client=" + client + ']');
            client.forceClose();
            throw e;
        }
        return client;
    }
}
Also used : IgniteSpiOperationTimeoutHelper(org.apache.ignite.spi.IgniteSpiOperationTimeoutHelper) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) GridShmemCommunicationClient(org.apache.ignite.internal.util.nio.GridShmemCommunicationClient) IgniteSpiOperationTimeoutException(org.apache.ignite.spi.IgniteSpiOperationTimeoutException) GridCommunicationClient(org.apache.ignite.internal.util.nio.GridCommunicationClient) IpcEndpoint(org.apache.ignite.internal.util.ipc.IpcEndpoint) IpcSharedMemoryServerEndpoint(org.apache.ignite.internal.util.ipc.shmem.IpcSharedMemoryServerEndpoint) Nullable(org.jetbrains.annotations.Nullable)

Example 3 with IgniteSpiOperationTimeoutHelper

use of org.apache.ignite.spi.IgniteSpiOperationTimeoutHelper in project ignite by apache.

the class TcpCommunicationSpi method createTcpClient.

/**
     * Establish TCP connection to remote node and returns client.
     *
     * @param node Remote node.
     * @param connIdx Connection index.
     * @return Client.
     * @throws IgniteCheckedException If failed.
     */
protected GridCommunicationClient createTcpClient(ClusterNode node, int connIdx) throws IgniteCheckedException {
    Collection<String> rmtAddrs0 = node.attribute(createSpiAttributeName(ATTR_ADDRS));
    Collection<String> rmtHostNames0 = node.attribute(createSpiAttributeName(ATTR_HOST_NAMES));
    Integer boundPort = node.attribute(createSpiAttributeName(ATTR_PORT));
    Collection<InetSocketAddress> extAddrs = node.attribute(createSpiAttributeName(ATTR_EXT_ADDRS));
    boolean isRmtAddrsExist = (!F.isEmpty(rmtAddrs0) && boundPort != null);
    boolean isExtAddrsExist = !F.isEmpty(extAddrs);
    if (!isRmtAddrsExist && !isExtAddrsExist)
        throw new IgniteCheckedException("Failed to send message to the destination node. Node doesn't have any " + "TCP communication addresses or mapped external addresses. Check configuration and make sure " + "that you use the same communication SPI on all nodes. Remote node id: " + node.id());
    LinkedHashSet<InetSocketAddress> addrs;
    // Try to connect first on bound addresses.
    if (isRmtAddrsExist) {
        List<InetSocketAddress> addrs0 = new ArrayList<>(U.toSocketAddresses(rmtAddrs0, rmtHostNames0, boundPort));
        boolean sameHost = U.sameMacs(getSpiContext().localNode(), node);
        Collections.sort(addrs0, U.inetAddressesComparator(sameHost));
        addrs = new LinkedHashSet<>(addrs0);
    } else
        addrs = new LinkedHashSet<>();
    // Then on mapped external addresses.
    if (isExtAddrsExist)
        addrs.addAll(extAddrs);
    Set<InetAddress> allInetAddrs = U.newHashSet(addrs.size());
    for (InetSocketAddress addr : addrs) {
        // Skip unresolved as addr.getAddress() can return null.
        if (!addr.isUnresolved())
            allInetAddrs.add(addr.getAddress());
    }
    List<InetAddress> reachableInetAddrs = U.filterReachable(allInetAddrs);
    if (reachableInetAddrs.size() < allInetAddrs.size()) {
        LinkedHashSet<InetSocketAddress> addrs0 = U.newLinkedHashSet(addrs.size());
        List<InetSocketAddress> unreachableInetAddr = new ArrayList<>(allInetAddrs.size() - reachableInetAddrs.size());
        for (InetSocketAddress addr : addrs) {
            if (reachableInetAddrs.contains(addr.getAddress()))
                addrs0.add(addr);
            else
                unreachableInetAddr.add(addr);
        }
        addrs0.addAll(unreachableInetAddr);
        addrs = addrs0;
    }
    if (log.isDebugEnabled())
        log.debug("Addresses to connect for node [rmtNode=" + node.id() + ", addrs=" + addrs.toString() + ']');
    boolean conn = false;
    GridCommunicationClient client = null;
    IgniteCheckedException errs = null;
    int connectAttempts = 1;
    for (InetSocketAddress addr : addrs) {
        long connTimeout0 = connTimeout;
        int attempt = 1;
        IgniteSpiOperationTimeoutHelper timeoutHelper = new IgniteSpiOperationTimeoutHelper(this, !node.isClient());
        while (!conn) {
            // Reconnection on handshake timeout.
            try {
                SocketChannel ch = SocketChannel.open();
                ch.configureBlocking(true);
                ch.socket().setTcpNoDelay(tcpNoDelay);
                ch.socket().setKeepAlive(true);
                if (sockRcvBuf > 0)
                    ch.socket().setReceiveBufferSize(sockRcvBuf);
                if (sockSndBuf > 0)
                    ch.socket().setSendBufferSize(sockSndBuf);
                if (getSpiContext().node(node.id()) == null) {
                    U.closeQuiet(ch);
                    throw new ClusterTopologyCheckedException("Failed to send message " + "(node left topology): " + node);
                }
                ConnectionKey connKey = new ConnectionKey(node.id(), connIdx, -1);
                GridNioRecoveryDescriptor recoveryDesc = outRecoveryDescriptor(node, connKey);
                if (!recoveryDesc.reserve()) {
                    U.closeQuiet(ch);
                    return null;
                }
                long rcvCnt = -1;
                Map<Integer, Object> meta = new HashMap<>();
                GridSslMeta sslMeta = null;
                try {
                    ch.socket().connect(addr, (int) timeoutHelper.nextTimeoutChunk(connTimeout));
                    if (isSslEnabled()) {
                        meta.put(SSL_META.ordinal(), sslMeta = new GridSslMeta());
                        SSLEngine sslEngine = ignite.configuration().getSslContextFactory().create().createSSLEngine();
                        sslEngine.setUseClientMode(true);
                        sslMeta.sslEngine(sslEngine);
                    }
                    Integer handshakeConnIdx = connIdx;
                    rcvCnt = safeHandshake(ch, recoveryDesc, node.id(), timeoutHelper.nextTimeoutChunk(connTimeout0), sslMeta, handshakeConnIdx);
                    if (rcvCnt == -1)
                        return null;
                } finally {
                    if (recoveryDesc != null && rcvCnt == -1)
                        recoveryDesc.release();
                }
                try {
                    meta.put(CONN_IDX_META, connKey);
                    if (recoveryDesc != null) {
                        recoveryDesc.onHandshake(rcvCnt);
                        meta.put(-1, recoveryDesc);
                    }
                    GridNioSession ses = nioSrvr.createSession(ch, meta).get();
                    client = new GridTcpNioCommunicationClient(connIdx, ses, log);
                    conn = true;
                } finally {
                    if (!conn) {
                        if (recoveryDesc != null)
                            recoveryDesc.release();
                    }
                }
            } catch (HandshakeTimeoutException | IgniteSpiOperationTimeoutException e) {
                if (client != null) {
                    client.forceClose();
                    client = null;
                }
                if (failureDetectionTimeoutEnabled() && (e instanceof HandshakeTimeoutException || timeoutHelper.checkFailureTimeoutReached(e))) {
                    String msg = "Handshake timed out (failure detection timeout is reached) " + "[failureDetectionTimeout=" + failureDetectionTimeout() + ", addr=" + addr + ']';
                    onException(msg, e);
                    if (log.isDebugEnabled())
                        log.debug(msg);
                    if (errs == null)
                        errs = new IgniteCheckedException("Failed to connect to node (is node still alive?). " + "Make sure that each ComputeTask and cache Transaction has a timeout set " + "in order to prevent parties from waiting forever in case of network issues " + "[nodeId=" + node.id() + ", addrs=" + addrs + ']');
                    errs.addSuppressed(new IgniteCheckedException("Failed to connect to address: " + addr, e));
                    break;
                }
                assert !failureDetectionTimeoutEnabled();
                onException("Handshake timed out (will retry with increased timeout) [timeout=" + connTimeout0 + ", addr=" + addr + ']', e);
                if (log.isDebugEnabled())
                    log.debug("Handshake timed out (will retry with increased timeout) [timeout=" + connTimeout0 + ", addr=" + addr + ", err=" + e + ']');
                if (attempt == reconCnt || connTimeout0 > maxConnTimeout) {
                    if (log.isDebugEnabled())
                        log.debug("Handshake timedout (will stop attempts to perform the handshake) " + "[timeout=" + connTimeout0 + ", maxConnTimeout=" + maxConnTimeout + ", attempt=" + attempt + ", reconCnt=" + reconCnt + ", err=" + e.getMessage() + ", addr=" + addr + ']');
                    if (errs == null)
                        errs = new IgniteCheckedException("Failed to connect to node (is node still alive?). " + "Make sure that each ComputeTask and cache Transaction has a timeout set " + "in order to prevent parties from waiting forever in case of network issues " + "[nodeId=" + node.id() + ", addrs=" + addrs + ']');
                    errs.addSuppressed(new IgniteCheckedException("Failed to connect to address: " + addr, e));
                    break;
                } else {
                    attempt++;
                    connTimeout0 *= 2;
                // Continue loop.
                }
            } catch (Exception e) {
                if (client != null) {
                    client.forceClose();
                    client = null;
                }
                onException("Client creation failed [addr=" + addr + ", err=" + e + ']', e);
                if (log.isDebugEnabled())
                    log.debug("Client creation failed [addr=" + addr + ", err=" + e + ']');
                boolean failureDetThrReached = timeoutHelper.checkFailureTimeoutReached(e);
                if (failureDetThrReached)
                    LT.warn(log, "Connect timed out (consider increasing 'failureDetectionTimeout' " + "configuration property) [addr=" + addr + ", failureDetectionTimeout=" + failureDetectionTimeout() + ']');
                else if (X.hasCause(e, SocketTimeoutException.class))
                    LT.warn(log, "Connect timed out (consider increasing 'connTimeout' " + "configuration property) [addr=" + addr + ", connTimeout=" + connTimeout + ']');
                if (errs == null)
                    errs = new IgniteCheckedException("Failed to connect to node (is node still alive?). " + "Make sure that each ComputeTask and cache Transaction has a timeout set " + "in order to prevent parties from waiting forever in case of network issues " + "[nodeId=" + node.id() + ", addrs=" + addrs + ']');
                errs.addSuppressed(new IgniteCheckedException("Failed to connect to address: " + addr, e));
                // Reconnect for the second time, if connection is not established.
                if (!failureDetThrReached && connectAttempts < 2 && (e instanceof ConnectException || X.hasCause(e, ConnectException.class))) {
                    connectAttempts++;
                    continue;
                }
                break;
            }
        }
        if (conn)
            break;
    }
    if (client == null) {
        assert errs != null;
        if (X.hasCause(errs, ConnectException.class))
            LT.warn(log, "Failed to connect to a remote node " + "(make sure that destination node is alive and " + "operating system firewall is disabled on local and remote hosts) " + "[addrs=" + addrs + ']');
        if (getSpiContext().node(node.id()) != null && (CU.clientNode(node) || !CU.clientNode(getLocalNode())) && X.hasCause(errs, ConnectException.class, SocketTimeoutException.class, HandshakeTimeoutException.class, IgniteSpiOperationTimeoutException.class)) {
            LT.warn(log, "TcpCommunicationSpi failed to establish connection to node, node will be dropped from " + "cluster [" + "rmtNode=" + node + ", err=" + errs + ", connectErrs=" + Arrays.toString(errs.getSuppressed()) + ']');
            getSpiContext().failNode(node.id(), "TcpCommunicationSpi failed to establish connection to node [" + "rmtNode=" + node + ", errs=" + errs + ", connectErrs=" + Arrays.toString(errs.getSuppressed()) + ']');
        }
        throw errs;
    }
    return client;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) SocketChannel(java.nio.channels.SocketChannel) GridNioSession(org.apache.ignite.internal.util.nio.GridNioSession) HashMap(java.util.HashMap) InetSocketAddress(java.net.InetSocketAddress) SSLEngine(javax.net.ssl.SSLEngine) ArrayList(java.util.ArrayList) GridSslMeta(org.apache.ignite.internal.util.nio.ssl.GridSslMeta) GridTcpNioCommunicationClient(org.apache.ignite.internal.util.nio.GridTcpNioCommunicationClient) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteSpiOperationTimeoutException(org.apache.ignite.spi.IgniteSpiOperationTimeoutException) ConnectException(java.net.ConnectException) GridCommunicationClient(org.apache.ignite.internal.util.nio.GridCommunicationClient) IpcEndpoint(org.apache.ignite.internal.util.ipc.IpcEndpoint) IpcSharedMemoryServerEndpoint(org.apache.ignite.internal.util.ipc.shmem.IpcSharedMemoryServerEndpoint) IgniteClientDisconnectedException(org.apache.ignite.IgniteClientDisconnectedException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) SSLException(javax.net.ssl.SSLException) IgniteSpiOperationTimeoutException(org.apache.ignite.spi.IgniteSpiOperationTimeoutException) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) SocketTimeoutException(java.net.SocketTimeoutException) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) ConnectException(java.net.ConnectException) IpcOutOfSystemResourcesException(org.apache.ignite.internal.util.ipc.shmem.IpcOutOfSystemResourcesException) IOException(java.io.IOException) IgniteClientDisconnectedCheckedException(org.apache.ignite.internal.IgniteClientDisconnectedCheckedException) IgniteException(org.apache.ignite.IgniteException) ClusterTopologyCheckedException(org.apache.ignite.internal.cluster.ClusterTopologyCheckedException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IgniteSpiOperationTimeoutHelper(org.apache.ignite.spi.IgniteSpiOperationTimeoutHelper) SocketTimeoutException(java.net.SocketTimeoutException) GridNioRecoveryDescriptor(org.apache.ignite.internal.util.nio.GridNioRecoveryDescriptor) IgniteSpiTimeoutObject(org.apache.ignite.spi.IgniteSpiTimeoutObject) InetAddress(java.net.InetAddress) ClusterTopologyCheckedException(org.apache.ignite.internal.cluster.ClusterTopologyCheckedException)

Example 4 with IgniteSpiOperationTimeoutHelper

use of org.apache.ignite.spi.IgniteSpiOperationTimeoutHelper in project ignite by apache.

the class ClientImpl method sendJoinRequest.

/**
     * @param recon {@code True} if reconnects.
     * @param addr Address.
     * @return Socket, connect response and client acknowledge support flag.
     */
@Nullable
private T3<SocketStream, Integer, Boolean> sendJoinRequest(boolean recon, InetSocketAddress addr) {
    assert addr != null;
    if (log.isDebugEnabled())
        log.debug("Send join request [addr=" + addr + ", reconnect=" + recon + ", locNodeId=" + getLocalNodeId() + ']');
    Collection<Throwable> errs = null;
    long ackTimeout0 = spi.getAckTimeout();
    int reconCnt = 0;
    int connectAttempts = 1;
    int sslConnectAttempts = 3;
    UUID locNodeId = getLocalNodeId();
    IgniteSpiOperationTimeoutHelper timeoutHelper = new IgniteSpiOperationTimeoutHelper(spi, true);
    while (true) {
        boolean openSock = false;
        Socket sock = null;
        try {
            long tstamp = U.currentTimeMillis();
            sock = spi.openSocket(addr, timeoutHelper);
            openSock = true;
            TcpDiscoveryHandshakeRequest req = new TcpDiscoveryHandshakeRequest(locNodeId);
            req.client(true);
            spi.writeToSocket(sock, req, timeoutHelper.nextTimeoutChunk(spi.getSocketTimeout()));
            TcpDiscoveryHandshakeResponse res = spi.readMessage(sock, null, ackTimeout0);
            UUID rmtNodeId = res.creatorNodeId();
            assert rmtNodeId != null;
            assert !getLocalNodeId().equals(rmtNodeId);
            spi.stats.onClientSocketInitialized(U.currentTimeMillis() - tstamp);
            locNode.clientRouterNodeId(rmtNodeId);
            tstamp = U.currentTimeMillis();
            TcpDiscoveryAbstractMessage msg;
            if (!recon) {
                TcpDiscoveryNode node = locNode;
                if (locNode.order() > 0) {
                    node = locNode.clientReconnectNode(spi.spiCtx.nodeAttributes());
                    marshalCredentials(node);
                }
                msg = new TcpDiscoveryJoinRequestMessage(node, spi.collectExchangeData(new DiscoveryDataPacket(getLocalNodeId())));
            } else
                msg = new TcpDiscoveryClientReconnectMessage(getLocalNodeId(), rmtNodeId, lastMsgId);
            msg.client(true);
            spi.writeToSocket(sock, msg, timeoutHelper.nextTimeoutChunk(spi.getSocketTimeout()));
            spi.stats.onMessageSent(msg, U.currentTimeMillis() - tstamp);
            if (log.isDebugEnabled())
                log.debug("Message has been sent to address [msg=" + msg + ", addr=" + addr + ", rmtNodeId=" + rmtNodeId + ']');
            return new T3<>(new SocketStream(sock), spi.readReceipt(sock, timeoutHelper.nextTimeoutChunk(ackTimeout0)), res.clientAck());
        } catch (IOException | IgniteCheckedException e) {
            U.closeQuiet(sock);
            if (log.isDebugEnabled())
                log.error("Exception on joining: " + e.getMessage(), e);
            onException("Exception on joining: " + e.getMessage(), e);
            if (errs == null)
                errs = new ArrayList<>();
            errs.add(e);
            if (X.hasCause(e, SSLException.class)) {
                if (--sslConnectAttempts == 0)
                    throw new IgniteSpiException("Unable to establish secure connection. " + "Was remote cluster configured with SSL? [rmtAddr=" + addr + ", errMsg=\"" + e.getMessage() + "\"]", e);
                continue;
            }
            if (X.hasCause(e, StreamCorruptedException.class)) {
                if (--sslConnectAttempts == 0)
                    throw new IgniteSpiException("Unable to establish plain connection. " + "Was remote cluster configured with SSL? [rmtAddr=" + addr + ", errMsg=\"" + e.getMessage() + "\"]", e);
                continue;
            }
            if (timeoutHelper.checkFailureTimeoutReached(e))
                break;
            if (!spi.failureDetectionTimeoutEnabled() && ++reconCnt == spi.getReconnectCount())
                break;
            if (!openSock) {
                // Reconnect for the second time, if connection is not established.
                if (connectAttempts < 2) {
                    connectAttempts++;
                    continue;
                }
                // Don't retry if we can not establish connection.
                break;
            }
            if (!spi.failureDetectionTimeoutEnabled() && (e instanceof SocketTimeoutException || X.hasCause(e, SocketTimeoutException.class))) {
                ackTimeout0 *= 2;
                if (!checkAckTimeout(ackTimeout0))
                    break;
            }
        }
    }
    if (log.isDebugEnabled())
        log.debug("Failed to join to address [addr=" + addr + ", recon=" + recon + ", errs=" + errs + ']');
    return null;
}
Also used : SSLException(javax.net.ssl.SSLException) TcpDiscoveryClientReconnectMessage(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryClientReconnectMessage) DiscoveryDataPacket(org.apache.ignite.spi.discovery.tcp.internal.DiscoveryDataPacket) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) StreamCorruptedException(java.io.StreamCorruptedException) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) UUID(java.util.UUID) TcpDiscoveryJoinRequestMessage(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryJoinRequestMessage) T3(org.apache.ignite.internal.util.typedef.T3) TcpDiscoveryHandshakeRequest(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryHandshakeRequest) TcpDiscoveryHandshakeResponse(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryHandshakeResponse) IOException(java.io.IOException) IgniteSpiOperationTimeoutHelper(org.apache.ignite.spi.IgniteSpiOperationTimeoutHelper) SocketTimeoutException(java.net.SocketTimeoutException) TcpDiscoveryAbstractMessage(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryAbstractMessage) Socket(java.net.Socket) TcpDiscoveryNode(org.apache.ignite.spi.discovery.tcp.internal.TcpDiscoveryNode) Nullable(org.jetbrains.annotations.Nullable)

Example 5 with IgniteSpiOperationTimeoutHelper

use of org.apache.ignite.spi.IgniteSpiOperationTimeoutHelper in project ignite by apache.

the class ServerImpl method sendJoinRequestMessage.

/**
     * Tries to send join request message to a random node presenting in topology.
     * Address is provided by {@link org.apache.ignite.spi.discovery.tcp.ipfinder.TcpDiscoveryIpFinder} and message is
     * sent to first node connection succeeded to.
     *
     * @return {@code true} if send succeeded.
     * @throws IgniteSpiException If any error occurs.
     */
@SuppressWarnings({ "BusyWait" })
private boolean sendJoinRequestMessage() throws IgniteSpiException {
    TcpDiscoveryAbstractMessage joinReq = new TcpDiscoveryJoinRequestMessage(locNode, spi.collectExchangeData(new DiscoveryDataPacket(getLocalNodeId())));
    // Time when it has been detected, that addresses from IP finder do not respond.
    long noResStart = 0;
    while (true) {
        Collection<InetSocketAddress> addrs = spi.resolvedAddresses();
        if (F.isEmpty(addrs))
            return false;
        boolean retry = false;
        Collection<Exception> errs = new ArrayList<>();
        for (InetSocketAddress addr : addrs) {
            try {
                IgniteSpiOperationTimeoutHelper timeoutHelper = new IgniteSpiOperationTimeoutHelper(spi, true);
                Integer res;
                try {
                    SecurityUtils.serializeVersion(1);
                    res = sendMessageDirectly(joinReq, addr, timeoutHelper);
                } finally {
                    SecurityUtils.restoreDefaultSerializeVersion();
                }
                assert res != null;
                noResAddrs.remove(addr);
                // Address is responsive, reset period start.
                noResStart = 0;
                switch(res) {
                    case RES_WAIT:
                        // Concurrent startup, try sending join request again or wait if no success.
                        retry = true;
                        break;
                    case RES_OK:
                        if (log.isDebugEnabled())
                            log.debug("Join request message has been sent to address [addr=" + addr + ", req=" + joinReq + ']');
                        // Join request sending succeeded, wait for response from topology.
                        return true;
                    default:
                        // Concurrent startup, try next node.
                        if (res == RES_CONTINUE_JOIN) {
                            if (!fromAddrs.contains(addr))
                                retry = true;
                        } else {
                            if (log.isDebugEnabled())
                                log.debug("Unexpected response to join request: " + res);
                            retry = true;
                        }
                        break;
                }
            } catch (IgniteSpiException e) {
                errs.add(e);
                if (log.isDebugEnabled()) {
                    IOException ioe = X.cause(e, IOException.class);
                    log.debug("Failed to send join request message [addr=" + addr + ", msg=" + (ioe != null ? ioe.getMessage() : e.getMessage()) + ']');
                    onException("Failed to send join request message [addr=" + addr + ", msg=" + (ioe != null ? ioe.getMessage() : e.getMessage()) + ']', ioe);
                }
                noResAddrs.add(addr);
            }
        }
        if (retry) {
            if (log.isDebugEnabled())
                log.debug("Concurrent discovery SPI start has been detected (local node should wait).");
            try {
                U.sleep(2000);
            } catch (IgniteInterruptedCheckedException e) {
                throw new IgniteSpiException("Thread has been interrupted.", e);
            }
        } else if (!spi.ipFinder.isShared() && !ipFinderHasLocAddr) {
            IgniteCheckedException e = null;
            if (!errs.isEmpty()) {
                e = new IgniteCheckedException("Multiple connection attempts failed.");
                for (Exception err : errs) e.addSuppressed(err);
            }
            if (e != null && X.hasCause(e, ConnectException.class)) {
                LT.warn(log, "Failed to connect to any address from IP finder " + "(make sure IP finder addresses are correct and firewalls are disabled on all host machines): " + toOrderedList(addrs), true);
            }
            if (spi.joinTimeout > 0) {
                if (noResStart == 0)
                    noResStart = U.currentTimeMillis();
                else if (U.currentTimeMillis() - noResStart > spi.joinTimeout)
                    throw new IgniteSpiException("Failed to connect to any address from IP finder within join timeout " + "(make sure IP finder addresses are correct, and operating system firewalls are disabled " + "on all host machines, or consider increasing 'joinTimeout' configuration property): " + addrs, e);
            }
            try {
                U.sleep(2000);
            } catch (IgniteInterruptedCheckedException ex) {
                throw new IgniteSpiException("Thread has been interrupted.", ex);
            }
        } else
            break;
    }
    return false;
}
Also used : InetSocketAddress(java.net.InetSocketAddress) ArrayList(java.util.ArrayList) IOException(java.io.IOException) StreamCorruptedException(java.io.StreamCorruptedException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) SSLException(javax.net.ssl.SSLException) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) SocketException(java.net.SocketException) SocketTimeoutException(java.net.SocketTimeoutException) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) ConnectException(java.net.ConnectException) IOException(java.io.IOException) ObjectStreamException(java.io.ObjectStreamException) IgniteException(org.apache.ignite.IgniteException) IgniteFutureTimeoutCheckedException(org.apache.ignite.internal.IgniteFutureTimeoutCheckedException) NoSuchElementException(java.util.NoSuchElementException) IgniteSystemProperties.getInteger(org.apache.ignite.IgniteSystemProperties.getInteger) IgniteSpiOperationTimeoutHelper(org.apache.ignite.spi.IgniteSpiOperationTimeoutHelper) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) DiscoveryDataPacket(org.apache.ignite.spi.discovery.tcp.internal.DiscoveryDataPacket) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) TcpDiscoveryAbstractMessage(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryAbstractMessage) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) TcpDiscoveryJoinRequestMessage(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryJoinRequestMessage)

Aggregations

IgniteCheckedException (org.apache.ignite.IgniteCheckedException)5 IgniteSpiOperationTimeoutHelper (org.apache.ignite.spi.IgniteSpiOperationTimeoutHelper)5 IOException (java.io.IOException)4 InetSocketAddress (java.net.InetSocketAddress)3 SocketTimeoutException (java.net.SocketTimeoutException)3 SSLException (javax.net.ssl.SSLException)3 IgniteInterruptedCheckedException (org.apache.ignite.internal.IgniteInterruptedCheckedException)3 IgniteSpiException (org.apache.ignite.spi.IgniteSpiException)3 Nullable (org.jetbrains.annotations.Nullable)3 StreamCorruptedException (java.io.StreamCorruptedException)2 ConnectException (java.net.ConnectException)2 Socket (java.net.Socket)2 ArrayList (java.util.ArrayList)2 UUID (java.util.UUID)2 IgniteException (org.apache.ignite.IgniteException)2 IpcEndpoint (org.apache.ignite.internal.util.ipc.IpcEndpoint)2 IpcSharedMemoryServerEndpoint (org.apache.ignite.internal.util.ipc.shmem.IpcSharedMemoryServerEndpoint)2 GridCommunicationClient (org.apache.ignite.internal.util.nio.GridCommunicationClient)2 IgniteSpiOperationTimeoutException (org.apache.ignite.spi.IgniteSpiOperationTimeoutException)2 DiscoveryDataPacket (org.apache.ignite.spi.discovery.tcp.internal.DiscoveryDataPacket)2