Search in sources :

Example 86 with IgniteSpiException

use of org.apache.ignite.spi.IgniteSpiException in project ignite by apache.

the class ServerImpl method spiStart.

/**
 * {@inheritDoc}
 */
@Override
public void spiStart(String igniteInstanceName) throws IgniteSpiException {
    synchronized (mux) {
        spiState = DISCONNECTED;
    }
    lastRingMsgReceivedTime = 0;
    lastRingMsgSentTime = 0;
    // Foundumental timeout value for actions related to connection check.
    connCheckTick = effectiveExchangeTimeout() / 3;
    // Since we take in account time of last sent message, the interval should be quite short to give enough piece
    // of failure detection timeout as send-and-acknowledge timeout of the message to send.
    connCheckInterval = Math.min(connCheckTick, MAX_CON_CHECK_INTERVAL);
    utilityPool = new IgniteThreadPoolExecutor("disco-pool", spi.ignite().name(), 0, 4, 2000, new LinkedBlockingQueue<>());
    if (debugMode) {
        if (!log.isInfoEnabled())
            throw new IgniteSpiException("Info log level should be enabled for TCP discovery to work " + "in debug mode.");
        debugLogQ = new ConcurrentLinkedDeque<>();
        U.quietAndWarn(log, "TCP discovery SPI is configured in debug mode.");
    }
    // Clear addresses collections.
    fromAddrs.clear();
    noResAddrs.clear();
    msgWorker = new RingMessageWorker(log);
    msgWorkerThread = new MessageWorkerDiscoveryThread(msgWorker, log);
    msgWorkerThread.start();
    if (tcpSrvr == null)
        tcpSrvr = new TcpServer(log);
    spi.initLocalNode(tcpSrvr.port, true);
    if (spi.locNodeAddrs.size() > 1 && log.isDebugEnabled()) {
        if (spi.failureDetectionTimeoutEnabled()) {
            log.debug("This node " + spi.locNode.id() + " has " + spi.locNodeAddrs.size() + " TCP " + "addresses. Note that TcpDiscoverySpi.failureDetectionTimeout works per address sequentially. " + "Setting of several addresses can prolong detection of current node failure.");
        } else {
            log.debug("This node " + spi.locNode.id() + " has " + spi.locNodeAddrs.size() + " TPC " + "addresses. With exception of connRecoveryTimeout, timeouts and setting like sockTimeout, " + "ackTimeout, reconCnt in TcpDiscoverySpi work per address sequentially. Setting of several " + "addresses can prolong detection of current node failure.");
        }
    }
    locNode = spi.locNode;
    // Start TCP server thread after local node is initialized.
    new TcpServerThread(tcpSrvr, log).start();
    ring.localNode(locNode);
    if (spi.ipFinder.isShared())
        registerLocalNodeAddress();
    else {
        if (F.isEmpty(spi.ipFinder.getRegisteredAddresses()))
            throw new IgniteSpiException("Non-shared IP finder must have IP addresses specified in " + "TcpDiscoveryIpFinder.getRegisteredAddresses() configuration property " + "(specify list of IP addresses in configuration).");
        ipFinderHasLocAddr = spi.ipFinderHasLocalAddress();
    }
    if (spi.getStatisticsPrintFrequency() > 0 && log.isInfoEnabled()) {
        statsPrinter = new StatisticsPrinter();
        statsPrinter.start();
    }
    joinTopology();
    if (locNode.order() == 1)
        U.enhanceThreadName(msgWorkerThread, "crd");
    if (spi.ipFinder.isShared()) {
        ipFinderCleaner = new IpFinderCleaner();
        ipFinderCleaner.start();
    }
    spi.printStartInfo();
}
Also used : IgniteThreadPoolExecutor(org.apache.ignite.thread.IgniteThreadPoolExecutor) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) LinkedBlockingQueue(java.util.concurrent.LinkedBlockingQueue)

Example 87 with IgniteSpiException

use of org.apache.ignite.spi.IgniteSpiException in project ignite by apache.

the class ServerImpl method sendJoinRequestMessage.

/**
 * Tries to send join request message to a random node presenting in topology.
 * Address is provided by {@link org.apache.ignite.spi.discovery.tcp.ipfinder.TcpDiscoveryIpFinder} and message is
 * sent to first node connection succeeded to.
 *
 * @param joinMsg Join request message.
 * @return {@code true} if send succeeded.
 * @throws IgniteSpiException If any error occurs.
 */
private boolean sendJoinRequestMessage(TcpDiscoveryJoinRequestMessage joinMsg) throws IgniteSpiException {
    // Time when join process started.
    long joinStartNanos = 0;
    while (true) {
        Collection<InetSocketAddress> addrs = spi.resolvedAddresses();
        if (F.isEmpty(addrs))
            return false;
        boolean retry = false;
        boolean joinImpossible = false;
        Collection<Exception> errs = new ArrayList<>();
        for (InetSocketAddress addr : addrs) {
            try {
                IgniteSpiOperationTimeoutHelper timeoutHelper = new IgniteSpiOperationTimeoutHelper(spi, true);
                Integer res;
                try {
                    SecurityUtils.serializeVersion(1);
                    res = sendMessageDirectly(joinMsg, addr, timeoutHelper);
                } finally {
                    SecurityUtils.restoreDefaultSerializeVersion();
                }
                assert res != null;
                noResAddrs.remove(addr);
                // otherwise two CONNECTING nodes can stuck in infinite loop sending join reqs to each other forever
                if (res != RES_WAIT && res != RES_CONTINUE_JOIN)
                    joinStartNanos = 0;
                switch(res) {
                    case RES_WAIT:
                        // Concurrent startup, try sending join request again or wait if no success.
                        retry = true;
                        break;
                    case RES_OK:
                        if (log.isDebugEnabled())
                            log.debug("Join request message has been sent to address [addr=" + addr + ", req=" + joinMsg + ']');
                        // Join request sending succeeded, wait for response from topology.
                        return true;
                    case RES_JOIN_IMPOSSIBLE:
                        joinImpossible = true;
                        break;
                    default:
                        // Concurrent startup, try next node.
                        if (res == RES_CONTINUE_JOIN) {
                            if (!fromAddrs.contains(addr))
                                retry = true;
                        } else {
                            if (log.isDebugEnabled())
                                log.debug("Unexpected response to join request: " + res);
                            retry = true;
                        }
                        break;
                }
            } catch (IgniteSpiException e) {
                errs.add(e);
                if (log.isDebugEnabled()) {
                    IOException ioe = X.cause(e, IOException.class);
                    log.debug("Failed to send join request message [addr=" + addr + ", msg=" + (ioe != null ? ioe.getMessage() : e.getMessage()) + ']');
                    onException("Failed to send join request message [addr=" + addr + ", msg=" + (ioe != null ? ioe.getMessage() : e.getMessage()) + ']', ioe);
                }
                noResAddrs.add(addr);
            }
            if (joinImpossible)
                throw new IgniteSpiException("Impossible to continue join, " + "check if local discovery and communication ports " + "are not blocked with firewall [addr=" + addr + ", req=" + joinMsg + ", discoLocalPort=" + spi.getLocalPort() + ", discoLocalPortRange=" + spi.getLocalPortRange() + ']');
        }
        if (retry) {
            if (log.isDebugEnabled())
                log.debug("Concurrent discovery SPI start has been detected (local node should wait).");
            try {
                U.sleep(spi.getReconnectDelay());
            } catch (IgniteInterruptedCheckedException e) {
                throw new IgniteSpiException("Thread has been interrupted.", e);
            }
        } else if (!spi.ipFinder.isShared() && !ipFinderHasLocAddr) {
            IgniteCheckedException e = null;
            if (!errs.isEmpty()) {
                e = new IgniteCheckedException("Multiple connection attempts failed.");
                for (Exception err : errs) e.addSuppressed(err);
            }
            if (X.hasCause(e, ConnectException.class)) {
                LT.warn(log, "Failed to connect to any address from IP finder " + "(make sure IP finder addresses are correct and firewalls are disabled on all host machines): " + toOrderedList(addrs), true);
            }
            if (spi.joinTimeout > 0) {
                if (joinStartNanos == 0)
                    joinStartNanos = System.nanoTime();
                else if (U.millisSinceNanos(joinStartNanos) > spi.joinTimeout)
                    throw new IgniteSpiException("Failed to connect to any address from IP finder within join timeout " + "(make sure IP finder addresses are correct, and operating system firewalls are disabled " + "on all host machines, or consider increasing 'joinTimeout' configuration property): " + addrs, e);
            }
            try {
                U.sleep(spi.getReconnectDelay());
            } catch (IgniteInterruptedCheckedException ex) {
                throw new IgniteSpiException("Thread has been interrupted.", ex);
            }
        } else
            break;
    }
    return false;
}
Also used : InetSocketAddress(java.net.InetSocketAddress) ArrayList(java.util.ArrayList) IOException(java.io.IOException) StreamCorruptedException(java.io.StreamCorruptedException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) SSLException(javax.net.ssl.SSLException) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) SocketException(java.net.SocketException) SocketTimeoutException(java.net.SocketTimeoutException) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) ConnectException(java.net.ConnectException) IOException(java.io.IOException) ObjectStreamException(java.io.ObjectStreamException) IgniteException(org.apache.ignite.IgniteException) IgniteFutureTimeoutCheckedException(org.apache.ignite.internal.IgniteFutureTimeoutCheckedException) NoSuchElementException(java.util.NoSuchElementException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IgniteSystemProperties.getInteger(org.apache.ignite.IgniteSystemProperties.getInteger) IgniteSpiOperationTimeoutHelper(org.apache.ignite.spi.IgniteSpiOperationTimeoutHelper) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) ConnectException(java.net.ConnectException)

Example 88 with IgniteSpiException

use of org.apache.ignite.spi.IgniteSpiException in project ignite by apache.

the class ServerImpl method sendMessageDirectly.

/**
 * Establishes connection to an address, sends message and returns the response (if any).
 *
 * @param msg Message to send.
 * @param addr Address to send message to.
 * @param timeoutHelper Operation timeout helper.
 * @return Response read from the recipient or {@code null} if no response is supposed.
 * @throws IgniteSpiException If an error occurs.
 */
@Nullable
private Integer sendMessageDirectly(TcpDiscoveryAbstractMessage msg, InetSocketAddress addr, IgniteSpiOperationTimeoutHelper timeoutHelper) throws IgniteSpiException {
    assert msg != null;
    assert addr != null;
    Collection<Throwable> errs = null;
    long ackTimeout0 = spi.getAckTimeout();
    int connectAttempts = 1;
    int sslConnectAttempts = 3;
    boolean joinReqSent;
    UUID locNodeId = getLocalNodeId();
    int reconCnt = 0;
    while (true) {
        // Need to set to false on each new iteration,
        // since remote node may leave in the middle of the first iteration.
        joinReqSent = false;
        boolean openSock = false;
        Socket sock = null;
        try {
            long tsNanos = System.nanoTime();
            sock = spi.openSocket(addr, timeoutHelper);
            openSock = true;
            TcpDiscoveryHandshakeRequest req = new TcpDiscoveryHandshakeRequest(locNodeId);
            // Handshake.
            spi.writeToSocket(sock, req, timeoutHelper.nextTimeoutChunk(spi.getSocketTimeout()));
            TcpDiscoveryHandshakeResponse res = spi.readMessage(sock, null, timeoutHelper.nextTimeoutChunk(ackTimeout0));
            if (msg instanceof TcpDiscoveryJoinRequestMessage) {
                boolean ignore = false;
                // The only way to know is passing flag directly with handshake response.
                if (!res.isDiscoveryDataPacketCompression())
                    ((TcpDiscoveryJoinRequestMessage) msg).gridDiscoveryData().unzipData(log);
                synchronized (mux) {
                    for (TcpDiscoveryNode failedNode : failedNodes.keySet()) {
                        if (failedNode.id().equals(res.creatorNodeId())) {
                            if (log.isDebugEnabled())
                                log.debug("Ignore response from node from failed list: " + res);
                            ignore = true;
                            break;
                        }
                    }
                }
                if (ignore)
                    break;
            }
            if (locNodeId.equals(res.creatorNodeId())) {
                if (log.isDebugEnabled())
                    log.debug("Handshake response from local node: " + res);
                break;
            }
            // Send message.
            tsNanos = System.nanoTime();
            spi.writeToSocket(sock, msg, timeoutHelper.nextTimeoutChunk(spi.getSocketTimeout()));
            long tsNanos0 = System.nanoTime();
            if (debugMode)
                debugLog(msg, "Message has been sent directly to address [msg=" + msg + ", addr=" + addr + ", rmtNodeId=" + res.creatorNodeId() + ']');
            if (log.isDebugEnabled())
                log.debug("Message has been sent directly to address [msg=" + msg + ", addr=" + addr + ", rmtNodeId=" + res.creatorNodeId() + ']');
            // Connection has been established, but
            // join request may not be unmarshalled on remote host.
            // E.g. due to class not found issue.
            joinReqSent = msg instanceof TcpDiscoveryJoinRequestMessage;
            int receipt = spi.readReceipt(sock, timeoutHelper.nextTimeoutChunk(ackTimeout0));
            spi.stats.onMessageSent(msg, U.nanosToMillis(tsNanos0 - tsNanos));
            return receipt;
        } catch (ClassCastException e) {
            // on dedicated machines.
            if (log.isDebugEnabled())
                U.error(log, "Class cast exception on direct send: " + addr, e);
            onException("Class cast exception on direct send: " + addr, e);
            if (errs == null)
                errs = new ArrayList<>();
            errs.add(e);
        } catch (IOException | IgniteCheckedException e) {
            if (log.isDebugEnabled())
                log.error("Exception on direct send: " + e.getMessage(), e);
            onException("Exception on direct send: " + e.getMessage(), e);
            if (errs == null)
                errs = new ArrayList<>();
            errs.add(e);
            if (X.hasCause(e, SSLException.class)) {
                if (--sslConnectAttempts == 0)
                    throw new IgniteException("Unable to establish secure connection. " + "Was remote cluster configured with SSL? [rmtAddr=" + addr + ", errMsg=\"" + e.getMessage() + "\"]", e);
                continue;
            }
            if (X.hasCause(e, StreamCorruptedException.class)) {
                // StreamCorruptedException could be caused by remote node failover
                if (connectAttempts < 2) {
                    connectAttempts++;
                    continue;
                }
                if (log.isDebugEnabled())
                    log.debug("Connect failed with StreamCorruptedException, skip address: " + addr);
                break;
            }
            if (spi.failureDetectionTimeoutEnabled() && timeoutHelper.checkFailureTimeoutReached(e))
                break;
            if (!spi.failureDetectionTimeoutEnabled() && ++reconCnt == spi.getReconnectCount())
                break;
            if (!openSock) {
                // Reconnect for the second time, if connection is not established.
                if (connectAttempts < 2) {
                    connectAttempts++;
                    continue;
                }
                // Don't retry if we can not establish connection.
                break;
            }
            if (!spi.failureDetectionTimeoutEnabled() && (e instanceof SocketTimeoutException || X.hasCause(e, SocketTimeoutException.class))) {
                ackTimeout0 *= 2;
                if (!checkAckTimeout(ackTimeout0))
                    break;
            }
        } finally {
            U.closeQuiet(sock);
        }
    }
    if (joinReqSent) {
        if (log.isDebugEnabled())
            log.debug("Join request has been sent, but receipt has not been read (returning RES_WAIT).");
        // however, warning on timed out join will be output.
        return RES_OK;
    }
    throw new IgniteSpiException("Failed to send message to address [addr=" + addr + ", msg=" + msg + ']', U.exceptionWithSuppressed("Failed to send message to address " + "[addr=" + addr + ", msg=" + msg + ']', errs));
}
Also used : TcpDiscoveryHandshakeRequest(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryHandshakeRequest) TcpDiscoveryHandshakeResponse(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryHandshakeResponse) IOException(java.io.IOException) SSLException(javax.net.ssl.SSLException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) SocketTimeoutException(java.net.SocketTimeoutException) IgniteException(org.apache.ignite.IgniteException) StreamCorruptedException(java.io.StreamCorruptedException) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) UUID(java.util.UUID) ServerSocket(java.net.ServerSocket) SSLServerSocket(javax.net.ssl.SSLServerSocket) SSLSocket(javax.net.ssl.SSLSocket) Socket(java.net.Socket) TcpDiscoveryNode(org.apache.ignite.spi.discovery.tcp.internal.TcpDiscoveryNode) TcpDiscoveryJoinRequestMessage(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryJoinRequestMessage) Nullable(org.jetbrains.annotations.Nullable)

Example 89 with IgniteSpiException

use of org.apache.ignite.spi.IgniteSpiException in project ignite by apache.

the class ServerImpl method joinTopology.

/**
 * Tries to join this node to topology.
 *
 * @throws IgniteSpiException If any error occurs.
 */
private void joinTopology() throws IgniteSpiException {
    synchronized (mux) {
        assert spiState == CONNECTING || spiState == DISCONNECTED;
        spiState = CONNECTING;
    }
    SecurityCredentials locCred = (SecurityCredentials) locNode.getAttributes().get(IgniteNodeAttributes.ATTR_SECURITY_CREDENTIALS);
    boolean auth = false;
    if (spi.nodeAuth != null && spi.nodeAuth.isGlobalNodeAuthentication()) {
        localAuthentication(locCred);
        auth = true;
    }
    // Marshal credentials for backward compatibility and security.
    marshalCredentials(locNode, locCred);
    DiscoveryDataPacket discoveryData = spi.collectExchangeData(new DiscoveryDataPacket(getLocalNodeId()));
    TcpDiscoveryJoinRequestMessage joinReqMsg = new TcpDiscoveryJoinRequestMessage(locNode, discoveryData);
    joinReqMsg.spanContainer().span(tracing.create(TraceableMessagesTable.traceName(joinReqMsg.getClass())).addTag(SpanTags.tag(SpanTags.EVENT_NODE, SpanTags.ID), () -> locNode.id().toString()).addTag(SpanTags.tag(SpanTags.EVENT_NODE, SpanTags.CONSISTENT_ID), () -> locNode.consistentId().toString()).addLog(() -> "Created"));
    tracing.messages().beforeSend(joinReqMsg);
    while (true) {
        if (!sendJoinRequestMessage(joinReqMsg)) {
            if (log.isDebugEnabled())
                log.debug("Join request message has not been sent (local node is the first in the topology).");
            if (!auth && spi.nodeAuth != null)
                localAuthentication(locCred);
            // TODO IGNITE-11272
            FutureTask<Void> fut = msgWorker.addTask(new FutureTask<Void>() {

                @Override
                protected Void body() {
                    pendingCustomMsgs.clear();
                    msgWorker.pendingMsgs.reset(null, null, null);
                    msgWorker.next = null;
                    failedNodes.clear();
                    leavingNodes.clear();
                    failedNodesMsgSent.clear();
                    locNode.attributes().remove(IgniteNodeAttributes.ATTR_SECURITY_CREDENTIALS);
                    locNode.order(1);
                    locNode.internalOrder(1);
                    spi.gridStartTime = U.currentTimeMillis();
                    locNode.visible(true);
                    ring.clear();
                    ring.topologyVersion(1);
                    synchronized (mux) {
                        topHist.clear();
                        spiState = CONNECTED;
                        mux.notifyAll();
                    }
                    notifyDiscovery(EVT_NODE_JOINED, 1, locNode, joinReqMsg.spanContainer());
                    return null;
                }
            });
            try {
                fut.get();
            } catch (IgniteCheckedException e) {
                throw new IgniteSpiException(e);
            }
            msgWorker.nullifyDiscoData();
            break;
        }
        if (log.isDebugEnabled())
            log.debug("Join request message has been sent (waiting for coordinator response).");
        synchronized (mux) {
            long timeout = spi.netTimeout;
            long thresholdNanos = System.nanoTime() + U.millisToNanos(timeout);
            while (spiState == CONNECTING && timeout > 0) {
                try {
                    mux.wait(timeout);
                    timeout = U.nanosToMillis(thresholdNanos - System.nanoTime());
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    throw new IgniteSpiException("Thread has been interrupted.", e);
                }
            }
            if (spiState == CONNECTED)
                break;
            else if (spiState == DUPLICATE_ID)
                throw spi.duplicateIdError((TcpDiscoveryDuplicateIdMessage) joinRes.get());
            else if (spiState == AUTH_FAILED)
                throw spi.authenticationFailedError((TcpDiscoveryAuthFailedMessage) joinRes.get());
            else if (spiState == CHECK_FAILED)
                throw spi.checkFailedError((TcpDiscoveryCheckFailedMessage) joinRes.get());
            else if (spiState == RING_FAILED) {
                throw new IgniteSpiException("Unable to connect to next nodes in a ring, it seems local node is " + "experiencing connectivity issues or the rest of the cluster is undergoing massive restarts. " + "Failing local node join to avoid case when one node fails a big part of cluster. To disable" + " this behavior set TcpDiscoverySpi.setConnectionRecoveryTimeout() to 0. " + "[connRecoveryTimeout=" + spi.connRecoveryTimeout + ", effectiveConnRecoveryTimeout=" + spi.getEffectiveConnectionRecoveryTimeout() + ']');
            } else if (spiState == LOOPBACK_PROBLEM) {
                TcpDiscoveryLoopbackProblemMessage msg = (TcpDiscoveryLoopbackProblemMessage) joinRes.get();
                boolean locHostLoopback = spi.locHost.isLoopbackAddress();
                String firstNode = locHostLoopback ? "local" : "remote";
                String secondNode = locHostLoopback ? "remote" : "local";
                throw new IgniteSpiException("Failed to add node to topology because " + firstNode + " node is configured to use loopback address, but " + secondNode + " node is not " + "(consider changing 'localAddress' configuration parameter) " + "[locNodeAddrs=" + U.addressesAsString(locNode) + ", rmtNodeAddrs=" + U.addressesAsString(msg.addresses(), msg.hostNames()) + ", creatorNodeId=" + msg.creatorNodeId() + ']');
            } else
                LT.warn(log, "Node has not been connected to topology and will repeat join process. " + "Check remote nodes logs for possible error messages. " + "Note that large topology may require significant time to start. " + "Increase 'TcpDiscoverySpi.networkTimeout' configuration property " + "if getting this message on the starting nodes [networkTimeout=" + spi.netTimeout + ']');
        }
    }
    assert locNode.order() != 0;
    assert locNode.internalOrder() != 0;
    if (log.isDebugEnabled())
        log.debug("Discovery SPI has been connected to topology with order: " + locNode.internalOrder());
    joinReqMsg.spanContainer().span().addTag(SpanTags.tag(SpanTags.NODE, SpanTags.ORDER), () -> String.valueOf(locNode.order())).addLog(() -> "Joined to ring").end();
}
Also used : TcpDiscoveryAuthFailedMessage(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryAuthFailedMessage) TcpDiscoveryLoopbackProblemMessage(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryLoopbackProblemMessage) SecurityCredentials(org.apache.ignite.plugin.security.SecurityCredentials) DiscoveryDataPacket(org.apache.ignite.spi.discovery.tcp.internal.DiscoveryDataPacket) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) TcpDiscoveryJoinRequestMessage(org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryJoinRequestMessage)

Example 90 with IgniteSpiException

use of org.apache.ignite.spi.IgniteSpiException in project ignite by apache.

the class TcpDiscoverySpi method resolvedAddresses.

/**
 * Resolves addresses registered in the IP finder, removes duplicates and local host
 * address and returns the collection of.
 *
 * @return Resolved addresses without duplicates and local address (potentially
 *      empty but never null).
 * @throws org.apache.ignite.spi.IgniteSpiException If an error occurs.
 */
protected Collection<InetSocketAddress> resolvedAddresses() throws IgniteSpiException {
    // Time when resolution process started.
    long resolutionStartNanos = System.nanoTime();
    List<InetSocketAddress> res = new ArrayList<>();
    Collection<InetSocketAddress> addrs;
    long timeout = isClientMode() && impl.getSpiState().equalsIgnoreCase("connected") ? netTimeout : joinTimeout;
    // Get consistent addresses collection.
    while (true) {
        try {
            addrs = registeredAddresses();
            break;
        } catch (IgniteSpiException e) {
            LT.error(log, e, "Failed to get registered addresses from IP finder " + "(retrying every " + getReconnectDelay() + "ms;" + " change 'reconnectDelay' to configure the frequency of retries) " + "[maxTimeout=" + timeout + "]", true);
        }
        try {
            if (timeout > 0 && U.millisSinceNanos(resolutionStartNanos) > timeout) {
                LT.warn(log, "Unable to get registered addresses from IP finder, timeout is reached " + "(consider increasing 'joinTimeout' for join process or 'netTimeout' for reconnection) " + "[joinTimeout=" + joinTimeout + ", netTimeout=" + netTimeout + "]");
                addrs = res;
                break;
            }
            U.sleep(getReconnectDelay());
        } catch (IgniteInterruptedCheckedException e) {
            throw new IgniteSpiException("Thread has been interrupted.", e);
        }
    }
    for (InetSocketAddress addr : addrs) {
        assert addr != null;
        try {
            if (addressFilter != null && !addressFilter.apply(addr))
                continue;
            InetSocketAddress resolved = addr.isUnresolved() ? new InetSocketAddress(InetAddress.getByName(addr.getHostName()), addr.getPort()) : addr;
            if (locNodeAddrs == null || !locNodeAddrs.contains(resolved))
                res.add(resolved);
        } catch (UnknownHostException ignored) {
            LT.warn(log, "Failed to resolve address from IP finder (host is unknown): " + addr);
            // Add address in any case.
            res.add(addr);
        }
    }
    if (!res.isEmpty() && !skipAddrsRandomization)
        Collections.shuffle(res);
    return res;
}
Also used : IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) UnknownHostException(java.net.UnknownHostException) InetSocketAddress(java.net.InetSocketAddress) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException)

Aggregations

IgniteSpiException (org.apache.ignite.spi.IgniteSpiException)131 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)59 IOException (java.io.IOException)32 InetSocketAddress (java.net.InetSocketAddress)22 ClusterNode (org.apache.ignite.cluster.ClusterNode)21 IgniteInterruptedCheckedException (org.apache.ignite.internal.IgniteInterruptedCheckedException)21 IgniteException (org.apache.ignite.IgniteException)20 ArrayList (java.util.ArrayList)14 ClusterTopologyCheckedException (org.apache.ignite.internal.cluster.ClusterTopologyCheckedException)14 HashMap (java.util.HashMap)13 UUID (java.util.UUID)13 Nullable (org.jetbrains.annotations.Nullable)12 Test (org.junit.Test)12 File (java.io.File)10 Message (org.apache.ignite.plugin.extensions.communication.Message)10 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)9 SSLException (javax.net.ssl.SSLException)8 IgniteConfiguration (org.apache.ignite.configuration.IgniteConfiguration)8 SocketTimeoutException (java.net.SocketTimeoutException)7 Ignite (org.apache.ignite.Ignite)7