Search in sources :

Example 1 with IgniteDiscoveryThread

use of org.apache.ignite.spi.discovery.IgniteDiscoveryThread in project ignite by apache.

the class GridDiscoveryManager method start.

/**
 * {@inheritDoc}
 */
@Override
public void start() throws IgniteCheckedException {
    ctx.addNodeAttribute(ATTR_OFFHEAP_SIZE, requiredOffheap());
    ctx.addNodeAttribute(ATTR_DATA_REGIONS_OFFHEAP_SIZE, configuredOffheap());
    DiscoverySpi spi = getSpi();
    discoOrdered = discoOrdered();
    histSupported = historySupported();
    isLocDaemon = ctx.isDaemon();
    hasRslvrs = !ctx.config().isClientMode() && !F.isEmpty(ctx.config().getSegmentationResolvers());
    segChkFreq = ctx.config().getSegmentCheckFrequency();
    if (hasRslvrs) {
        if (segChkFreq < 0)
            throw new IgniteCheckedException("Segment check frequency cannot be negative: " + segChkFreq);
        if (segChkFreq > 0 && segChkFreq < 2000)
            U.warn(log, "Configuration parameter 'segmentCheckFrequency' is too low " + "(at least 2000 ms recommended): " + segChkFreq);
        int segResAttemp = ctx.config().getSegmentationResolveAttempts();
        if (segResAttemp < 1)
            throw new IgniteCheckedException("Segment resolve attempts cannot be negative or zero: " + segResAttemp);
        checkSegmentOnStart();
    }
    spi.setMetricsProvider(createMetricsProvider());
    if (ctx.security().enabled()) {
        if (isSecurityCompatibilityMode())
            ctx.addNodeAttribute(ATTR_SECURITY_COMPATIBILITY_MODE, true);
        spi.setAuthenticator(new DiscoverySpiNodeAuthenticator() {

            @Override
            public SecurityContext authenticateNode(ClusterNode node, SecurityCredentials cred) {
                try {
                    return ctx.security().authenticateNode(node, cred);
                } catch (IgniteCheckedException e) {
                    throw U.convertException(e);
                }
            }

            @Override
            public boolean isGlobalNodeAuthentication() {
                return ctx.security().isGlobalNodeAuthentication();
            }
        });
    }
    if (ctx.config().getCommunicationFailureResolver() != null)
        ctx.resource().injectGeneric(ctx.config().getCommunicationFailureResolver());
    // Shared reference between DiscoverySpiListener and DiscoverySpiDataExchange.
    AtomicReference<IgniteFuture<?>> lastStateChangeEvtLsnrFutRef = new AtomicReference<>();
    spi.setListener(new DiscoverySpiListener() {

        private long gridStartTime;

        private final Marshaller marshaller = MarshallerUtils.jdkMarshaller(ctx.igniteInstanceName());

        /**
         * {@inheritDoc}
         */
        @Override
        public void onLocalNodeInitialized(ClusterNode locNode) {
            for (IgniteInClosure<ClusterNode> lsnr : locNodeInitLsnrs) lsnr.apply(locNode);
            if (locNode instanceof IgniteClusterNode) {
                final IgniteClusterNode node = (IgniteClusterNode) locNode;
                if (consistentId != null)
                    node.setConsistentId(consistentId);
            }
        }

        /**
         * {@inheritDoc}
         */
        @Override
        public IgniteFuture<?> onDiscovery(DiscoveryNotification notification) {
            GridFutureAdapter<?> notificationFut = new GridFutureAdapter<>();
            discoNtfWrk.submit(notificationFut, ctx.security().enabled() ? new SecurityAwareNotificationTask(notification) : new NotificationTask(notification));
            IgniteFuture<?> fut = new IgniteFutureImpl<>(notificationFut);
            // TODO could be optimized with more specific conditions.
            switch(notification.type()) {
                case EVT_NODE_JOINED:
                case EVT_NODE_LEFT:
                case EVT_NODE_FAILED:
                    if (!CU.isPersistenceEnabled(ctx.config()))
                        lastStateChangeEvtLsnrFutRef.set(fut);
                    break;
                case EVT_DISCOVERY_CUSTOM_EVT:
                    lastStateChangeEvtLsnrFutRef.set(fut);
            }
            return fut;
        }

        /**
         * @param notification Notification.
         */
        private void onDiscovery0(DiscoveryNotification notification) {
            int type = notification.type();
            ClusterNode node = notification.getNode();
            long topVer = notification.getTopVer();
            DiscoveryCustomMessage customMsg = notification.getCustomMsgData() == null ? null : ((CustomMessageWrapper) notification.getCustomMsgData()).delegate();
            if (skipMessage(notification.type(), customMsg))
                return;
            final ClusterNode locNode = localNode();
            if (notification.getTopHist() != null)
                topHist = notification.getTopHist();
            boolean verChanged;
            if (type == EVT_NODE_METRICS_UPDATED)
                verChanged = false;
            else {
                if (type != EVT_NODE_SEGMENTED && type != EVT_CLIENT_NODE_DISCONNECTED && type != EVT_CLIENT_NODE_RECONNECTED && type != EVT_DISCOVERY_CUSTOM_EVT) {
                    minorTopVer = 0;
                    verChanged = true;
                } else
                    verChanged = false;
            }
            if (type == EVT_NODE_FAILED || type == EVT_NODE_LEFT) {
                for (DiscoCache c : discoCacheHist.values()) c.updateAlives(node);
                updateClientNodes(node.id());
            }
            boolean locJoinEvt = type == EVT_NODE_JOINED && node.id().equals(locNode.id());
            ChangeGlobalStateFinishMessage stateFinishMsg = null;
            if (type == EVT_NODE_FAILED || type == EVT_NODE_LEFT)
                stateFinishMsg = ctx.state().onNodeLeft(node);
            final AffinityTopologyVersion nextTopVer;
            if (type == EVT_DISCOVERY_CUSTOM_EVT) {
                assert customMsg != null;
                boolean incMinorTopVer;
                if (customMsg instanceof ChangeGlobalStateMessage) {
                    incMinorTopVer = ctx.state().onStateChangeMessage(new AffinityTopologyVersion(topVer, minorTopVer), (ChangeGlobalStateMessage) customMsg, discoCache());
                } else if (customMsg instanceof ChangeGlobalStateFinishMessage) {
                    ctx.state().onStateFinishMessage((ChangeGlobalStateFinishMessage) customMsg);
                    Snapshot snapshot = topSnap.get();
                    // Topology version does not change, but need create DiscoCache with new state.
                    DiscoCache discoCache = snapshot.discoCache.copy(snapshot.topVer, ctx.state().clusterState());
                    topSnap.set(new Snapshot(snapshot.topVer, discoCache));
                    incMinorTopVer = false;
                } else {
                    incMinorTopVer = ctx.cache().onCustomEvent(customMsg, new AffinityTopologyVersion(topVer, minorTopVer), node);
                }
                if (incMinorTopVer) {
                    minorTopVer++;
                    verChanged = true;
                }
                nextTopVer = new AffinityTopologyVersion(topVer, minorTopVer);
                if (incMinorTopVer)
                    ctx.cache().onDiscoveryEvent(type, customMsg, node, nextTopVer, ctx.state().clusterState());
            } else {
                nextTopVer = new AffinityTopologyVersion(topVer, minorTopVer);
                ctx.cache().onDiscoveryEvent(type, customMsg, node, nextTopVer, ctx.state().clusterState());
            }
            DiscoCache discoCache;
            // event notifications, since SPI notifies manager about all events from this listener.
            if (verChanged) {
                Snapshot snapshot = topSnap.get();
                if (customMsg == null) {
                    discoCache = createDiscoCache(nextTopVer, ctx.state().clusterState(), locNode, notification.getTopSnapshot());
                } else if (customMsg instanceof ChangeGlobalStateMessage) {
                    discoCache = createDiscoCache(nextTopVer, ctx.state().pendingState((ChangeGlobalStateMessage) customMsg), locNode, notification.getTopSnapshot());
                } else
                    discoCache = customMsg.createDiscoCache(GridDiscoveryManager.this, nextTopVer, snapshot.discoCache);
                discoCacheHist.put(nextTopVer, discoCache);
                assert snapshot.topVer.compareTo(nextTopVer) < 0 : "Topology version out of order [this.topVer=" + topSnap + ", topVer=" + topVer + ", node=" + node + ", nextTopVer=" + nextTopVer + ", evt=" + U.gridEventName(type) + ']';
                topSnap.set(new Snapshot(nextTopVer, discoCache));
            } else
                // Current version.
                discoCache = discoCache();
            if (locJoinEvt || !node.isClient() && !node.isDaemon()) {
                if (type == EVT_NODE_LEFT || type == EVT_NODE_FAILED || type == EVT_NODE_JOINED) {
                    boolean discoCacheRecalculationRequired = ctx.state().autoAdjustInMemoryClusterState(node.id(), notification.getTopSnapshot(), discoCache, topVer, minorTopVer);
                    if (discoCacheRecalculationRequired) {
                        discoCache = createDiscoCache(nextTopVer, ctx.state().clusterState(), locNode, notification.getTopSnapshot());
                        discoCacheHist.put(nextTopVer, discoCache);
                        topSnap.set(new Snapshot(nextTopVer, discoCache));
                    }
                }
            }
            if (type == EVT_DISCOVERY_CUSTOM_EVT) {
                for (Class cls = customMsg.getClass(); cls != null; cls = cls.getSuperclass()) {
                    List<CustomEventListener<DiscoveryCustomMessage>> list = customEvtLsnrs.get(cls);
                    if (list != null) {
                        for (CustomEventListener<DiscoveryCustomMessage> lsnr : list) {
                            try {
                                lsnr.onCustomEvent(nextTopVer, node, customMsg);
                            } catch (Exception e) {
                                U.error(log, "Failed to notify direct custom event listener: " + customMsg, e);
                            }
                        }
                    }
                }
            }
            SecurityContext secCtx = remoteSecurityContext(ctx);
            // If this is a local join event, just save it and do not notify listeners.
            if (locJoinEvt) {
                if (gridStartTime == 0)
                    gridStartTime = getSpi().getGridStartTime();
                topSnap.set(new Snapshot(nextTopVer, discoCache));
                startLatch.countDown();
                DiscoveryEvent discoEvt = new DiscoveryEvent();
                discoEvt.node(ctx.discovery().localNode());
                discoEvt.eventNode(node);
                discoEvt.type(EVT_NODE_JOINED);
                discoEvt.topologySnapshot(topVer, new ArrayList<>(F.view(notification.getTopSnapshot(), FILTER_NOT_DAEMON)));
                if (notification.getSpanContainer() != null)
                    discoEvt.span(notification.getSpanContainer().span());
                discoWrk.discoCache = discoCache;
                if (!ctx.clientDisconnected()) {
                    // The security processor must be notified first, since {@link IgniteSecurity#onLocalJoin}
                    // finishes local node security context initialization that can be demanded by other Ignite
                    // components.
                    ctx.security().onLocalJoin();
                    if (!isLocDaemon) {
                        ctx.cache().context().versions().onLocalJoin(topVer);
                        ctx.cache().context().coordinators().onLocalJoin(discoEvt, discoCache);
                        ctx.cache().context().exchange().onLocalJoin(discoEvt, discoCache);
                        ctx.service().onLocalJoin(discoEvt, discoCache);
                        ctx.encryption().onLocalJoin();
                        ctx.cluster().onLocalJoin();
                    }
                }
                IgniteInternalFuture<Boolean> transitionWaitFut = ctx.state().onLocalJoin(discoCache);
                locJoin.onDone(new DiscoveryLocalJoinData(discoEvt, discoCache, transitionWaitFut, ctx.state().clusterState().active()));
                return;
            } else if (type == EVT_CLIENT_NODE_DISCONNECTED) {
                assert locNode.isClient() : locNode;
                assert node.isClient() : node;
                ((IgniteKernal) ctx.grid()).onDisconnected();
                if (!locJoin.isDone())
                    locJoin.onDone(new IgniteCheckedException("Node disconnected"));
                locJoin = new GridFutureAdapter<>();
                registeredCaches.clear();
                registeredCacheGrps.clear();
                for (AffinityTopologyVersion histVer : discoCacheHist.keySet()) {
                    Object rmvd = discoCacheHist.remove(histVer);
                    assert rmvd != null : histVer;
                }
                topHist.clear();
                topSnap.set(new Snapshot(AffinityTopologyVersion.ZERO, createDiscoCache(AffinityTopologyVersion.ZERO, ctx.state().clusterState(), locNode, Collections.singleton(locNode))));
            } else if (type == EVT_CLIENT_NODE_RECONNECTED) {
                assert locNode.isClient() : locNode;
                assert node.isClient() : node;
                ctx.security().onLocalJoin();
                boolean clusterRestarted = gridStartTime != getSpi().getGridStartTime();
                gridStartTime = getSpi().getGridStartTime();
                ((IgniteKernal) ctx.grid()).onReconnected(clusterRestarted);
                ctx.cache().context().coordinators().onLocalJoin(localJoinEvent(), discoCache);
                ctx.cache().context().exchange().onLocalJoin(localJoinEvent(), discoCache);
                ctx.service().onLocalJoin(localJoinEvent(), discoCache);
                DiscoCache discoCache0 = discoCache;
                ctx.cluster().clientReconnectFuture().listen(new CI1<IgniteFuture<?>>() {

                    @Override
                    public void apply(IgniteFuture<?> fut) {
                        try {
                            fut.get();
                            discoWrk.addEvent(new NotificationEvent(EVT_CLIENT_NODE_RECONNECTED, nextTopVer, node, discoCache0, notification.getTopSnapshot(), null, notification.getSpanContainer(), secCtx));
                        } catch (IgniteException ignore) {
                        // No-op.
                        }
                    }
                });
                return;
            }
            if (type == EVT_CLIENT_NODE_DISCONNECTED || type == EVT_NODE_SEGMENTED || !ctx.clientDisconnected())
                discoWrk.addEvent(new NotificationEvent(type, nextTopVer, node, discoCache, notification.getTopSnapshot(), customMsg, notification.getSpanContainer(), secCtx));
            if (stateFinishMsg != null)
                discoWrk.addEvent(new NotificationEvent(EVT_DISCOVERY_CUSTOM_EVT, nextTopVer, node, discoCache, notification.getTopSnapshot(), stateFinishMsg, notification.getSpanContainer(), secCtx));
            if (type == EVT_CLIENT_NODE_DISCONNECTED)
                discoWrk.awaitDisconnectEvent();
        }

        /**
         * Extends {@link NotificationTask} to run in a security context owned by the initiator of the
         * discovery event.
         */
        class SecurityAwareNotificationTask extends NotificationTask {

            /**
             */
            public SecurityAwareNotificationTask(DiscoveryNotification notification) {
                super(notification);
            }

            /**
             */
            @Override
            public void run() {
                DiscoverySpiCustomMessage customMsg = notification.getCustomMsgData();
                if (customMsg instanceof SecurityAwareCustomMessageWrapper) {
                    UUID secSubjId = ((SecurityAwareCustomMessageWrapper) customMsg).securitySubjectId();
                    try (OperationSecurityContext ignored = ctx.security().withContext(secSubjId)) {
                        super.run();
                    }
                } else {
                    SecurityContext initiatorNodeSecCtx = nodeSecurityContext(marshaller, U.resolveClassLoader(ctx.config()), notification.getNode());
                    try (OperationSecurityContext ignored = ctx.security().withContext(initiatorNodeSecCtx)) {
                        super.run();
                    }
                }
            }
        }

        /**
         * Represents task to handle discovery notification asynchronously.
         */
        class NotificationTask implements Runnable {

            /**
             */
            protected final DiscoveryNotification notification;

            /**
             */
            public NotificationTask(DiscoveryNotification notification) {
                this.notification = notification;
            }

            /**
             * {@inheritDoc}
             */
            @Override
            public void run() {
                synchronized (discoEvtMux) {
                    onDiscovery0(notification);
                }
            }
        }
    });
    spi.setDataExchange(new DiscoverySpiDataExchange() {

        @Override
        public DiscoveryDataBag collect(DiscoveryDataBag dataBag) {
            assert dataBag != null;
            assert dataBag.joiningNodeId() != null;
            if (ctx.localNodeId().equals(dataBag.joiningNodeId())) {
                for (GridComponent c : ctx.components()) c.collectJoiningNodeData(dataBag);
            } else {
                waitForLastStateChangeEventFuture();
                for (GridComponent c : ctx.components()) c.collectGridNodeData(dataBag);
            }
            return dataBag;
        }

        @Override
        public void onExchange(DiscoveryDataBag dataBag) {
            assert dataBag != null;
            assert dataBag.joiningNodeId() != null;
            if (ctx.localNodeId().equals(dataBag.joiningNodeId())) {
                // NodeAdded msg reached joining node after round-trip over the ring.
                IGridClusterStateProcessor stateProc = ctx.state();
                stateProc.onGridDataReceived(dataBag.gridDiscoveryData(stateProc.discoveryDataType().ordinal()));
                for (GridComponent c : ctx.components()) {
                    if (c.discoveryDataType() != null && c != stateProc)
                        c.onGridDataReceived(dataBag.gridDiscoveryData(c.discoveryDataType().ordinal()));
                }
            } else {
                // Discovery data from newly joined node has to be applied to the current old node.
                IGridClusterStateProcessor stateProc = ctx.state();
                JoiningNodeDiscoveryData data0 = dataBag.newJoinerDiscoveryData(stateProc.discoveryDataType().ordinal());
                assert data0 != null;
                stateProc.onJoiningNodeDataReceived(data0);
                for (GridComponent c : ctx.components()) {
                    if (c.discoveryDataType() != null && c != stateProc) {
                        JoiningNodeDiscoveryData data = dataBag.newJoinerDiscoveryData(c.discoveryDataType().ordinal());
                        if (data != null)
                            c.onJoiningNodeDataReceived(data);
                    }
                }
            }
        }

        /**
         */
        private void waitForLastStateChangeEventFuture() {
            IgniteFuture<?> lastStateChangeEvtLsnrFut = lastStateChangeEvtLsnrFutRef.get();
            if (lastStateChangeEvtLsnrFut != null) {
                Thread currThread = Thread.currentThread();
                GridWorker worker = currThread instanceof IgniteDiscoveryThread ? ((IgniteDiscoveryThread) currThread).worker() : null;
                if (worker != null)
                    worker.blockingSectionBegin();
                try {
                    lastStateChangeEvtLsnrFut.get();
                } finally {
                    // Guaranteed to be invoked in the same thread as DiscoverySpiListener#onDiscovery.
                    // No additional synchronization for reference is required.
                    lastStateChangeEvtLsnrFutRef.set(null);
                    if (worker != null)
                        worker.blockingSectionEnd();
                }
            }
        }
    });
    new DiscoveryMessageNotifierThread(discoNtfWrk).start();
    startSpi();
    registeredDiscoSpi = true;
    try {
        U.await(startLatch);
    } catch (IgniteInterruptedException e) {
        throw new IgniteCheckedException("Failed to start discovery manager (thread has been interrupted).", e);
    }
    // Start segment check worker only if frequency is greater than 0.
    if (hasRslvrs && segChkFreq > 0) {
        segChkWrk = new SegmentCheckWorker();
        segChkThread = new IgniteThread(segChkWrk);
        segChkThread.setUncaughtExceptionHandler(new OomExceptionHandler(ctx));
        segChkThread.start();
    }
    locNode = spi.getLocalNode();
    checkAttributes(discoCache().remoteNodes());
    // Start discovery worker.
    new IgniteThread(discoWrk).start();
    if (log.isDebugEnabled())
        log.debug(startInfo());
}
Also used : CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) CI1(org.apache.ignite.internal.util.typedef.CI1) IgniteInternalFuture(org.apache.ignite.internal.IgniteInternalFuture) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteException(org.apache.ignite.IgniteException) GridFutureAdapter(org.apache.ignite.internal.util.future.GridFutureAdapter) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) List(java.util.List) UUID(java.util.UUID) OperationSecurityContext(org.apache.ignite.internal.processors.security.OperationSecurityContext) ClusterNode(org.apache.ignite.cluster.ClusterNode) AffinityTopologyVersion(org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion) DiscoverySpiNodeAuthenticator(org.apache.ignite.spi.discovery.DiscoverySpiNodeAuthenticator) IgniteInterruptedException(org.apache.ignite.IgniteInterruptedException) IgniteDiscoveryThread(org.apache.ignite.spi.discovery.IgniteDiscoveryThread) SecurityCredentials(org.apache.ignite.plugin.security.SecurityCredentials) DiscoverySpiDataExchange(org.apache.ignite.spi.discovery.DiscoverySpiDataExchange) IgniteThread(org.apache.ignite.thread.IgniteThread) OomExceptionHandler(org.apache.ignite.thread.OomExceptionHandler) GridComponent(org.apache.ignite.internal.GridComponent) IgniteFuture(org.apache.ignite.lang.IgniteFuture) DiscoveryEvent(org.apache.ignite.events.DiscoveryEvent) JoiningNodeDiscoveryData(org.apache.ignite.spi.discovery.DiscoveryDataBag.JoiningNodeDiscoveryData) DiscoveryDataBag(org.apache.ignite.spi.discovery.DiscoveryDataBag) IGridClusterStateProcessor(org.apache.ignite.internal.processors.cluster.IGridClusterStateProcessor) IgniteKernal(org.apache.ignite.internal.IgniteKernal) Marshaller(org.apache.ignite.marshaller.Marshaller) ChangeGlobalStateMessage(org.apache.ignite.internal.processors.cluster.ChangeGlobalStateMessage) AtomicReference(java.util.concurrent.atomic.AtomicReference) DiscoverySpiListener(org.apache.ignite.spi.discovery.DiscoverySpiListener) GridWorker(org.apache.ignite.internal.util.worker.GridWorker) IgniteClientDisconnectedException(org.apache.ignite.IgniteClientDisconnectedException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) IgniteInterruptedException(org.apache.ignite.IgniteInterruptedException) IgniteInterruptedCheckedException(org.apache.ignite.internal.IgniteInterruptedCheckedException) IgniteClientDisconnectedCheckedException(org.apache.ignite.internal.IgniteClientDisconnectedCheckedException) IgniteException(org.apache.ignite.IgniteException) NodeStoppingException(org.apache.ignite.internal.NodeStoppingException) IgniteThread(org.apache.ignite.thread.IgniteThread) IgniteDiscoveryThread(org.apache.ignite.spi.discovery.IgniteDiscoveryThread) DiscoverySpiCustomMessage(org.apache.ignite.spi.discovery.DiscoverySpiCustomMessage) DiscoverySpi(org.apache.ignite.spi.discovery.DiscoverySpi) TcpDiscoverySpi(org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi) SecurityContext(org.apache.ignite.internal.processors.security.SecurityContext) SecurityUtils.remoteSecurityContext(org.apache.ignite.internal.processors.security.SecurityUtils.remoteSecurityContext) SecurityUtils.withRemoteSecurityContext(org.apache.ignite.internal.processors.security.SecurityUtils.withRemoteSecurityContext) OperationSecurityContext(org.apache.ignite.internal.processors.security.OperationSecurityContext) SecurityUtils.nodeSecurityContext(org.apache.ignite.internal.processors.security.SecurityUtils.nodeSecurityContext) IgniteInClosure(org.apache.ignite.lang.IgniteInClosure) DiscoveryNotification(org.apache.ignite.spi.discovery.DiscoveryNotification) ChangeGlobalStateFinishMessage(org.apache.ignite.internal.processors.cluster.ChangeGlobalStateFinishMessage)

Example 2 with IgniteDiscoveryThread

use of org.apache.ignite.spi.discovery.IgniteDiscoveryThread in project ignite by apache.

the class CacheObjectBinaryProcessorImpl method metadata0.

/**
 * @param typeId Type ID.
 * @return Metadata.
 * @throws IgniteException In case of error.
 */
@Nullable
public BinaryMetadata metadata0(final int typeId) {
    BinaryMetadataHolder holder = metadataLocCache.get(typeId);
    IgniteThread curThread = IgniteThread.current();
    if (holder == null && (curThread == null || !curThread.isForbiddenToRequestBinaryMetadata())) {
        if (ctx.clientNode()) {
            try {
                transport.requestUpToDateMetadata(typeId).get();
                holder = metadataLocCache.get(typeId);
            } catch (IgniteCheckedException ignored) {
            // No-op.
            }
        }
    }
    if (holder != null) {
        if (holder.removing()) {
            GridFutureAdapter<MetadataUpdateResult> fut = transport.awaitMetadataRemove(typeId);
            try {
                fut.get();
            } catch (IgniteCheckedException ignored) {
            // No-op.
            }
            return null;
        }
        if (curThread instanceof IgniteDiscoveryThread || (curThread != null && curThread.isForbiddenToRequestBinaryMetadata()))
            return holder.metadata();
        if (holder.pendingVersion() - holder.acceptedVersion() > 0) {
            GridFutureAdapter<MetadataUpdateResult> fut = transport.awaitMetadataUpdate(typeId, holder.pendingVersion());
            if (log.isDebugEnabled() && !fut.isDone())
                log.debug("Waiting for update for" + " [typeId=" + typeId + ", pendingVer=" + holder.pendingVersion() + ", acceptedVer=" + holder.acceptedVersion() + "]");
            try {
                fut.get();
            } catch (IgniteCheckedException ignored) {
            // No-op.
            }
        } else if (metadataFileStore != null) {
            try {
                metadataFileStore.waitForWriteCompletion(typeId, holder.pendingVersion());
            } catch (IgniteCheckedException e) {
                log.warning("Failed to wait for metadata write operation for [typeId=" + typeId + ", typeVer=" + holder.acceptedVersion() + ']', e);
                return null;
            }
        }
        return holder.metadata();
    } else
        return null;
}
Also used : IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteThread(org.apache.ignite.thread.IgniteThread) IgniteDiscoveryThread(org.apache.ignite.spi.discovery.IgniteDiscoveryThread) Nullable(org.jetbrains.annotations.Nullable)

Example 3 with IgniteDiscoveryThread

use of org.apache.ignite.spi.discovery.IgniteDiscoveryThread in project ignite by apache.

the class CacheObjectBinaryProcessorImpl method metadata.

/**
 * {@inheritDoc}
 */
@Nullable
@Override
public BinaryType metadata(final int typeId, final int schemaId) {
    BinaryMetadataHolder holder = metadataLocCache.get(typeId);
    if (ctx.clientNode()) {
        if (holder == null || !holder.metadata().hasSchema(schemaId)) {
            if (log.isDebugEnabled())
                log.debug("Waiting for client metadata update" + " [typeId=" + typeId + ", schemaId=" + schemaId + ", pendingVer=" + (holder == null ? "NA" : holder.pendingVersion()) + ", acceptedVer=" + (holder == null ? "NA" : holder.acceptedVersion()) + ']');
            try {
                transport.requestUpToDateMetadata(typeId).get();
            } catch (IgniteCheckedException ignored) {
            // No-op.
            }
            holder = metadataLocCache.get(typeId);
            IgniteFuture<?> reconnectFut0 = reconnectFut;
            if (holder == null && reconnectFut0 != null)
                throw new IgniteClientDisconnectedException(reconnectFut0, "Client node disconnected.");
            if (log.isDebugEnabled())
                log.debug("Finished waiting for client metadata update" + " [typeId=" + typeId + ", schemaId=" + schemaId + ", pendingVer=" + (holder == null ? "NA" : holder.pendingVersion()) + ", acceptedVer=" + (holder == null ? "NA" : holder.acceptedVersion()) + ']');
        }
    } else {
        if (holder != null && IgniteThread.current() instanceof IgniteDiscoveryThread)
            return holder.metadata().wrap(binaryCtx);
        else if (holder != null && (holder.pendingVersion() - holder.acceptedVersion() > 0)) {
            if (log.isDebugEnabled())
                log.debug("Waiting for metadata update" + " [typeId=" + typeId + ", schemaId=" + schemaId + ", pendingVer=" + holder.pendingVersion() + ", acceptedVer=" + holder.acceptedVersion() + ']');
            long t0 = System.nanoTime();
            GridFutureAdapter<MetadataUpdateResult> fut = transport.awaitMetadataUpdate(typeId, holder.pendingVersion());
            try {
                fut.get();
            } catch (IgniteCheckedException e) {
                log.error("Failed to wait for metadata update [typeId=" + typeId + ", schemaId=" + schemaId + ']', e);
            }
            if (log.isDebugEnabled())
                log.debug("Finished waiting for metadata update" + " [typeId=" + typeId + ", waitTime=" + NANOSECONDS.convert(System.nanoTime() - t0, MILLISECONDS) + "ms" + ", schemaId=" + schemaId + ", pendingVer=" + holder.pendingVersion() + ", acceptedVer=" + holder.acceptedVersion() + ']');
            holder = metadataLocCache.get(typeId);
        } else if (holder == null || !holder.metadata().hasSchema(schemaId)) {
            // Last resort waiting.
            U.warn(log, "Schema is missing while no metadata updates are in progress " + "(will wait for schema update within timeout defined by " + IGNITE_WAIT_SCHEMA_UPDATE + " system property)" + " [typeId=" + typeId + ", missingSchemaId=" + schemaId + ", pendingVer=" + (holder == null ? "NA" : holder.pendingVersion()) + ", acceptedVer=" + (holder == null ? "NA" : holder.acceptedVersion()) + ", binMetaUpdateTimeout=" + waitSchemaTimeout + ']');
            long t0 = System.nanoTime();
            GridFutureAdapter<?> fut = transport.awaitSchemaUpdate(typeId, schemaId);
            try {
                fut.get(waitSchemaTimeout);
            } catch (IgniteFutureTimeoutCheckedException e) {
                log.error("Timed out while waiting for schema update [typeId=" + typeId + ", schemaId=" + schemaId + ']');
            } catch (IgniteCheckedException ignored) {
            // No-op.
            }
            holder = metadataLocCache.get(typeId);
            if (log.isDebugEnabled() && holder != null && holder.metadata().hasSchema(schemaId))
                log.debug("Found the schema after wait" + " [typeId=" + typeId + ", waitTime=" + NANOSECONDS.convert(System.nanoTime() - t0, MILLISECONDS) + "ms" + ", schemaId=" + schemaId + ", pendingVer=" + holder.pendingVersion() + ", acceptedVer=" + holder.acceptedVersion() + ']');
        }
    }
    if (holder != null && metadataFileStore != null) {
        try {
            metadataFileStore.waitForWriteCompletion(typeId, holder.pendingVersion());
        } catch (IgniteCheckedException e) {
            log.warning("Failed to wait for metadata write operation for [typeId=" + typeId + ", typeVer=" + holder.acceptedVersion() + ']', e);
            return null;
        }
    }
    return holder != null ? holder.metadata().wrap(binaryCtx) : null;
}
Also used : IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteClientDisconnectedException(org.apache.ignite.IgniteClientDisconnectedException) GridFutureAdapter(org.apache.ignite.internal.util.future.GridFutureAdapter) IgniteFutureTimeoutCheckedException(org.apache.ignite.internal.IgniteFutureTimeoutCheckedException) IgniteDiscoveryThread(org.apache.ignite.spi.discovery.IgniteDiscoveryThread) Nullable(org.jetbrains.annotations.Nullable)

Example 4 with IgniteDiscoveryThread

use of org.apache.ignite.spi.discovery.IgniteDiscoveryThread in project ignite by apache.

the class GridNioServerWrapper method createNioSession.

/**
 * Returns the established TCP/IP connection between the current node and remote server. A handshake process of
 * negotiation between two communicating nodes will be performed before the {@link GridNioSession} created.
 * <p>
 * The handshaking process contains of these steps:
 *
 * <ol>
 * <li>The local node opens a new {@link SocketChannel} in the <em>blocking</em> mode.</li>
 * <li>The local node calls {@link SocketChannel#connect(SocketAddress)} to remote node.</li>
 * <li>The remote GridNioAcceptWorker thread accepts new connection.</li>
 * <li>The remote node sends back the {@link NodeIdMessage}.</li>
 * <li>The local node reads NodeIdMessage from created channel.</li>
 * <li>The local node sends the {@link HandshakeMessage2} to remote.</li>
 * <li>The remote node processes {@link HandshakeMessage2} in {@link GridNioServerListener#onMessage(GridNioSession,
 * Object)}.</li>
 * <li>The remote node sends back the {@link RecoveryLastReceivedMessage}.</li>
 * </ol>
 *
 * The handshaking process ends.
 * </p>
 * <p>
 * <em>Note.</em> The {@link HandshakeTimeoutObject} is created to control execution timeout during the
 * whole handshaking process.
 * </p>
 *
 * @param node Remote node identifier to connect with.
 * @param connIdx Connection index based on configured {@link ConnectionPolicy}.
 * @return A {@link GridNioSession} connection representation.
 * @throws IgniteCheckedException If establish connection fails.
 */
public GridNioSession createNioSession(ClusterNode node, int connIdx) throws IgniteCheckedException {
    boolean locNodeIsSrv = !locNodeSupplier.get().isClient() && !locNodeSupplier.get().isDaemon();
    if (!(Thread.currentThread() instanceof IgniteDiscoveryThread) && locNodeIsSrv) {
        if (node.isClient() && forceClientToServerConnections(node)) {
            String msg = "Failed to connect to node " + node.id() + " because it is started" + " in 'forceClientToServerConnections' mode; inverse connection will be requested.";
            throw new NodeUnreachableException(msg);
        }
    }
    Collection<InetSocketAddress> addrs = nodeAddresses(node, cfg.filterReachableAddresses(), attrs, locNodeSupplier);
    GridNioSession ses = null;
    IgniteCheckedException errs = null;
    long totalTimeout;
    if (cfg.failureDetectionTimeoutEnabled())
        totalTimeout = node.isClient() ? stateProvider.clientFailureDetectionTimeout() : cfg.failureDetectionTimeout();
    else {
        totalTimeout = ExponentialBackoffTimeoutStrategy.totalBackoffTimeout(cfg.connectionTimeout(), cfg.maxConnectionTimeout(), cfg.reconCount());
    }
    Set<InetSocketAddress> failedAddrsSet = new HashSet<>();
    int skippedAddrs = 0;
    for (InetSocketAddress addr : addrs) {
        if (addr.isUnresolved()) {
            failedAddrsSet.add(addr);
            continue;
        }
        TimeoutStrategy connTimeoutStgy = new ExponentialBackoffTimeoutStrategy(totalTimeout, cfg.failureDetectionTimeoutEnabled() ? DFLT_INITIAL_TIMEOUT : cfg.connectionTimeout(), cfg.maxConnectionTimeout());
        while (ses == null) {
            // Reconnection on handshake timeout.
            if (stopping)
                throw new IgniteSpiException("Node is stopping.");
            if (isLocalNodeAddress(addr)) {
                if (log.isDebugEnabled())
                    log.debug("Skipping local address [addr=" + addr + ", locAddrs=" + node.attribute(attrs.addresses()) + ", node=" + node + ']');
                skippedAddrs++;
                break;
            }
            long timeout = 0;
            connectGate.enter();
            try {
                if (nodeGetter.apply(node.id()) == null)
                    throw new ClusterTopologyCheckedException("Failed to send message (node left topology): " + node);
                SocketChannel ch = socketChannelFactory.get();
                ch.configureBlocking(true);
                ch.socket().setTcpNoDelay(cfg.tcpNoDelay());
                ch.socket().setKeepAlive(true);
                if (cfg.socketReceiveBuffer() > 0)
                    ch.socket().setReceiveBufferSize(cfg.socketReceiveBuffer());
                if (cfg.socketSendBuffer() > 0)
                    ch.socket().setSendBufferSize(cfg.socketSendBuffer());
                ConnectionKey connKey = new ConnectionKey(node.id(), connIdx, -1);
                GridNioRecoveryDescriptor recoveryDesc = outRecoveryDescriptor(node, connKey);
                assert recoveryDesc != null : "Recovery descriptor not found [connKey=" + connKey + ", rmtNode=" + node.id() + ']';
                if (!recoveryDesc.reserve()) {
                    U.closeQuiet(ch);
                    // Ensure the session is closed.
                    GridNioSession sesFromRecovery = recoveryDesc.session();
                    if (sesFromRecovery != null) {
                        while (sesFromRecovery.closeTime() == 0) sesFromRecovery.close();
                    }
                    return null;
                }
                long rcvCnt;
                Map<Integer, Object> meta = new HashMap<>();
                GridSslMeta sslMeta = null;
                try {
                    timeout = connTimeoutStgy.nextTimeout();
                    ch.socket().connect(addr, (int) timeout);
                    if (nodeGetter.apply(node.id()) == null)
                        throw new ClusterTopologyCheckedException("Failed to send message (node left topology): " + node);
                    if (stateProvider.isSslEnabled()) {
                        meta.put(SSL_META.ordinal(), sslMeta = new GridSslMeta());
                        SSLEngine sslEngine = stateProvider.createSSLEngine();
                        sslEngine.setUseClientMode(true);
                        sslMeta.sslEngine(sslEngine);
                    }
                    ClusterNode locNode = locNodeSupplier.get();
                    if (locNode == null)
                        throw new IgniteCheckedException("Local node has not been started or " + "fully initialized [isStopping=" + stateProvider.isStopping() + ']');
                    timeout = connTimeoutStgy.nextTimeout(timeout);
                    rcvCnt = safeTcpHandshake(ch, node.id(), timeout, sslMeta, new HandshakeMessage2(locNode.id(), recoveryDesc.incrementConnectCount(), recoveryDesc.received(), connIdx));
                    if (rcvCnt == ALREADY_CONNECTED)
                        return null;
                    else if (rcvCnt == NODE_STOPPING) {
                        // Safe to remap on remote node stopping.
                        throw new ClusterTopologyCheckedException("Remote node started stop procedure: " + node.id());
                    } else if (rcvCnt == UNKNOWN_NODE)
                        throw new IgniteCheckedException("Remote node does not observe current node " + "in topology : " + node.id());
                    else if (rcvCnt == NEED_WAIT) {
                        // scenarios with delayed client node join.
                        if (log.isDebugEnabled())
                            log.debug("NEED_WAIT received, handshake after delay [node = " + node + ", outOfTopologyDelay = " + DFLT_NEED_WAIT_DELAY + "ms]");
                        U.sleep(DFLT_NEED_WAIT_DELAY);
                        continue;
                    } else if (rcvCnt < 0)
                        throw new IgniteCheckedException("Unsupported negative receivedCount [rcvCnt=" + rcvCnt + ", senderNode=" + node + ']');
                    recoveryDesc.onHandshake(rcvCnt);
                    meta.put(CONSISTENT_ID_META, node.consistentId());
                    meta.put(CONN_IDX_META, connKey);
                    meta.put(GridNioServer.RECOVERY_DESC_META_KEY, recoveryDesc);
                    ses = nioSrv.createSession(ch, meta, false, null).get();
                } finally {
                    if (ses == null) {
                        U.closeQuiet(ch);
                        if (recoveryDesc != null)
                            recoveryDesc.release();
                    }
                }
            } catch (IgniteSpiOperationTimeoutException e) {
                // Handshake is timed out.
                if (ses != null) {
                    ses.close();
                    ses = null;
                }
                eRegistrySupplier.get().onException("Handshake timed out (will retry with increased timeout) [connTimeoutStrategy=" + connTimeoutStgy + ", addr=" + addr + ']', e);
                if (log.isDebugEnabled())
                    log.debug("Handshake timed out (will retry with increased timeout) [connTimeoutStrategy=" + connTimeoutStgy + ", addr=" + addr + ", err=" + e + ']');
                if (connTimeoutStgy.checkTimeout()) {
                    U.warn(log, "Handshake timed out (will stop attempts to perform the handshake) " + "[node=" + node.id() + ", connTimeoutStrategy=" + connTimeoutStgy + ", err=" + e.getMessage() + ", addr=" + addr + ", failureDetectionTimeoutEnabled=" + cfg.failureDetectionTimeoutEnabled() + ", timeout=" + timeout + ']');
                    String msg = "Failed to connect to node (is node still alive?). " + "Make sure that each ComputeTask and cache Transaction has a timeout set " + "in order to prevent parties from waiting forever in case of network issues " + "[nodeId=" + node.id() + ", addrs=" + addrs + ']';
                    if (errs == null)
                        errs = new IgniteCheckedException(msg, e);
                    else
                        errs.addSuppressed(new IgniteCheckedException(msg, e));
                    break;
                }
            } catch (ClusterTopologyCheckedException e) {
                throw e;
            } catch (Exception e) {
                // Most probably IO error on socket connect or handshake.
                if (ses != null) {
                    ses.close();
                    ses = null;
                }
                eRegistrySupplier.get().onException("Client creation failed [addr=" + addr + ", err=" + e + ']', e);
                if (log.isDebugEnabled())
                    log.debug("Client creation failed [addr=" + addr + ", err=" + e + ']');
                if (X.hasCause(e, "Too many open files", SocketException.class))
                    throw new IgniteTooManyOpenFilesException(e);
                // check if timeout occured in case of unrecoverable exception
                if (connTimeoutStgy.checkTimeout()) {
                    U.warn(log, "Connection timed out (will stop attempts to perform the connect) " + "[node=" + node.id() + ", connTimeoutStgy=" + connTimeoutStgy + ", failureDetectionTimeoutEnabled=" + cfg.failureDetectionTimeoutEnabled() + ", timeout=" + timeout + ", err=" + e.getMessage() + ", addr=" + addr + ']');
                    String msg = "Failed to connect to node (is node still alive?). " + "Make sure that each ComputeTask and cache Transaction has a timeout set " + "in order to prevent parties from waiting forever in case of network issues " + "[nodeId=" + node.id() + ", addrs=" + addrs + ']';
                    if (errs == null)
                        errs = new IgniteCheckedException(msg, e);
                    else
                        errs.addSuppressed(new IgniteCheckedException(msg, e));
                    break;
                }
                // Inverse communication protocol works only for client nodes.
                if (node.isClient() && isNodeUnreachableException(e))
                    failedAddrsSet.add(addr);
                if (isRecoverableException(e))
                    U.sleep(DFLT_RECONNECT_DELAY);
                else {
                    String msg = "Failed to connect to node due to unrecoverable exception (is node still alive?). " + "Make sure that each ComputeTask and cache Transaction has a timeout set " + "in order to prevent parties from waiting forever in case of network issues " + "[nodeId=" + node.id() + ", addrs=" + addrs + ", err= " + e + ']';
                    if (errs == null)
                        errs = new IgniteCheckedException(msg, e);
                    else
                        errs.addSuppressed(new IgniteCheckedException(msg, e));
                    break;
                }
            } finally {
                connectGate.leave();
            }
            CommunicationWorker commWorker0 = commWorker;
            if (commWorker0 != null && commWorker0.runner() == Thread.currentThread())
                commWorker0.updateHeartbeat();
        }
        if (ses != null)
            break;
    }
    if (ses == null) {
        // inverse connection so no point in throwing NodeUnreachableException
        if (!cfg.usePairedConnections() || !Boolean.TRUE.equals(node.attribute(attrs.pairedConnection()))) {
            if (!(Thread.currentThread() instanceof IgniteDiscoveryThread) && locNodeIsSrv) {
                if (node.isClient() && (addrs.size() - skippedAddrs == failedAddrsSet.size())) {
                    String msg = "Failed to connect to all addresses of node " + node.id() + ": " + failedAddrsSet + "; inverse connection will be requested.";
                    throw new NodeUnreachableException(msg);
                }
            }
        }
        processSessionCreationError(node, addrs, errs == null ? new IgniteCheckedException("No session found") : errs);
    }
    return ses;
}
Also used : SocketChannel(java.nio.channels.SocketChannel) SocketException(java.net.SocketException) GridNioSession(org.apache.ignite.internal.util.nio.GridNioSession) HashMap(java.util.HashMap) InetSocketAddress(java.net.InetSocketAddress) SSLEngine(javax.net.ssl.SSLEngine) GridSslMeta(org.apache.ignite.internal.util.nio.ssl.GridSslMeta) IgniteTooManyOpenFilesException(org.apache.ignite.internal.IgniteTooManyOpenFilesException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteSpiOperationTimeoutException(org.apache.ignite.spi.IgniteSpiOperationTimeoutException) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) HashSet(java.util.HashSet) ExponentialBackoffTimeoutStrategy(org.apache.ignite.spi.ExponentialBackoffTimeoutStrategy) ClusterNode(org.apache.ignite.cluster.ClusterNode) HandshakeMessage2(org.apache.ignite.spi.communication.tcp.messages.HandshakeMessage2) ExponentialBackoffTimeoutStrategy(org.apache.ignite.spi.ExponentialBackoffTimeoutStrategy) TimeoutStrategy(org.apache.ignite.spi.TimeoutStrategy) IgniteDiscoveryThread(org.apache.ignite.spi.discovery.IgniteDiscoveryThread) CommunicationTcpUtils.handshakeTimeoutException(org.apache.ignite.spi.communication.tcp.internal.CommunicationTcpUtils.handshakeTimeoutException) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) SSLException(javax.net.ssl.SSLException) IgniteSpiOperationTimeoutException(org.apache.ignite.spi.IgniteSpiOperationTimeoutException) IgniteSpiException(org.apache.ignite.spi.IgniteSpiException) SocketException(java.net.SocketException) SocketTimeoutException(java.net.SocketTimeoutException) IOException(java.io.IOException) CommunicationTcpUtils.isRecoverableException(org.apache.ignite.spi.communication.tcp.internal.CommunicationTcpUtils.isRecoverableException) IgniteTooManyOpenFilesException(org.apache.ignite.internal.IgniteTooManyOpenFilesException) ClusterTopologyCheckedException(org.apache.ignite.internal.cluster.ClusterTopologyCheckedException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) GridNioRecoveryDescriptor(org.apache.ignite.internal.util.nio.GridNioRecoveryDescriptor) ClusterTopologyCheckedException(org.apache.ignite.internal.cluster.ClusterTopologyCheckedException)

Aggregations

IgniteCheckedException (org.apache.ignite.IgniteCheckedException)4 IgniteDiscoveryThread (org.apache.ignite.spi.discovery.IgniteDiscoveryThread)4 IgniteClientDisconnectedException (org.apache.ignite.IgniteClientDisconnectedException)2 ClusterNode (org.apache.ignite.cluster.ClusterNode)2 GridFutureAdapter (org.apache.ignite.internal.util.future.GridFutureAdapter)2 IgniteSpiException (org.apache.ignite.spi.IgniteSpiException)2 IgniteThread (org.apache.ignite.thread.IgniteThread)2 IOException (java.io.IOException)1 InetSocketAddress (java.net.InetSocketAddress)1 SocketException (java.net.SocketException)1 SocketTimeoutException (java.net.SocketTimeoutException)1 SocketChannel (java.nio.channels.SocketChannel)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 UUID (java.util.UUID)1 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1