Search in sources :

Example 11 with NetView

use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.

the class GMSJoinLeave method processViewMessage.

private void processViewMessage(final InstallViewMessage m) {
    NetView view = m.getView();
    // If our current view doesn't contaion sender then we wanrt to ignore that view.
    if (currentView != null && !currentView.contains(m.getSender())) {
        // this may happen when we locator re-join and it take over coordinator's responsibility.
        if (this.preparedView == null || !this.preparedView.contains(m.getSender())) {
            logger.info("Ignoring the view {} from member {}, which is not in my current view {} ", view, m.getSender(), currentView);
            return;
        }
    }
    if (currentView != null && view.getViewId() < currentView.getViewId()) {
        // ignore old views
        ackView(m);
        return;
    }
    boolean viewContainsMyUnjoinedAddress = false;
    if (!this.isJoined) {
        // should install the view so join() can finish its work
        for (InternalDistributedMember mbr : view.getMembers()) {
            if (localAddress.compareTo(mbr) == 0) {
                viewContainsMyUnjoinedAddress = true;
                break;
            }
        }
    }
    if (m.isPreparing()) {
        if (this.preparedView != null && this.preparedView.getViewId() >= view.getViewId()) {
            services.getMessenger().send(new ViewAckMessage(m.getSender(), this.preparedView));
        } else {
            this.preparedView = view;
            if (viewContainsMyUnjoinedAddress) {
                // this will notifyAll the joinResponse
                installView(view);
            }
            ackView(m);
        }
    } else {
        // !preparing
        if (isJoined && currentView != null && !view.contains(this.localAddress)) {
            logger.fatal("This member is no longer in the membership view.  My ID is {} and the new view is {}", localAddress, view);
            forceDisconnect("This node is no longer in the membership view");
        } else {
            if (isJoined || viewContainsMyUnjoinedAddress) {
                installView(view);
            }
            if (!m.isRebroadcast()) {
                // no need to ack a rebroadcast view
                ackView(m);
            }
        }
    }
}
Also used : ViewAckMessage(org.apache.geode.distributed.internal.membership.gms.messages.ViewAckMessage) InternalDistributedMember(org.apache.geode.distributed.internal.membership.InternalDistributedMember) NetView(org.apache.geode.distributed.internal.membership.NetView)

Example 12 with NetView

use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.

the class GMSJoinLeave method findCoordinator.

/**
   * This contacts the locators to find out who the current coordinator is. All locators are
   * contacted. If they don't agree then we choose the oldest coordinator and return it.
   */
private boolean findCoordinator() {
    SearchState state = searchState;
    assert this.localAddress != null;
    // the coordinator
    if (!state.hasContactedAJoinedLocator && state.view != null) {
        return findCoordinatorFromView();
    }
    String dhalgo = services.getConfig().getDistributionConfig().getSecurityUDPDHAlgo();
    FindCoordinatorRequest request = new FindCoordinatorRequest(this.localAddress, state.alreadyTried, state.viewId, services.getMessenger().getPublicKey(localAddress), services.getMessenger().getRequestId(), dhalgo);
    Set<InternalDistributedMember> possibleCoordinators = new HashSet<InternalDistributedMember>();
    Set<InternalDistributedMember> coordinatorsWithView = new HashSet<InternalDistributedMember>();
    long giveUpTime = System.currentTimeMillis() + ((long) services.getConfig().getLocatorWaitTime() * 1000L);
    int connectTimeout = (int) services.getConfig().getMemberTimeout() * 2;
    boolean anyResponses = false;
    logger.debug("sending {} to {}", request, locators);
    state.hasContactedAJoinedLocator = false;
    state.locatorsContacted = 0;
    do {
        for (InetSocketAddress addr : locators) {
            try {
                Object o = tcpClientWrapper.sendCoordinatorFindRequest(addr, request, connectTimeout);
                FindCoordinatorResponse response = (o instanceof FindCoordinatorResponse) ? (FindCoordinatorResponse) o : null;
                if (response != null) {
                    if (response.getRejectionMessage() != null) {
                        throw new GemFireConfigException(response.getRejectionMessage());
                    }
                    setCoordinatorPublicKey(response);
                    state.locatorsContacted++;
                    if (!state.hasContactedAJoinedLocator && response.getSenderId() != null && response.getSenderId().getVmViewId() >= 0) {
                        logger.debug("Locator's address indicates it is part of a distributed system " + "so I will not become membership coordinator on this attempt to join");
                        state.hasContactedAJoinedLocator = true;
                    }
                    if (response.getCoordinator() != null) {
                        anyResponses = true;
                        NetView v = response.getView();
                        int viewId = v == null ? -1 : v.getViewId();
                        if (viewId > state.viewId) {
                            state.viewId = viewId;
                            state.view = v;
                            state.registrants.clear();
                            if (response.getRegistrants() != null) {
                                state.registrants.addAll(response.getRegistrants());
                            }
                        }
                        if (viewId > -1) {
                            coordinatorsWithView.add(response.getCoordinator());
                        }
                        possibleCoordinators.add(response.getCoordinator());
                    }
                }
            } catch (IOException | ClassNotFoundException problem) {
            }
        }
    } while (!anyResponses && System.currentTimeMillis() < giveUpTime);
    if (possibleCoordinators.isEmpty()) {
        return false;
    }
    if (coordinatorsWithView.size() > 0) {
        // lets check current coordinators in view only
        possibleCoordinators = coordinatorsWithView;
    }
    Iterator<InternalDistributedMember> it = possibleCoordinators.iterator();
    if (possibleCoordinators.size() == 1) {
        state.possibleCoordinator = it.next();
    } else {
        InternalDistributedMember oldest = it.next();
        while (it.hasNext()) {
            InternalDistributedMember candidate = it.next();
            if (oldest.compareTo(candidate) > 0) {
                oldest = candidate;
            }
        }
        state.possibleCoordinator = oldest;
    }
    InternalDistributedMember coord = null;
    boolean coordIsNoob = true;
    for (; it.hasNext(); ) {
        InternalDistributedMember mbr = it.next();
        if (!state.alreadyTried.contains(mbr)) {
            boolean mbrIsNoob = (mbr.getVmViewId() < 0);
            if (mbrIsNoob) {
                // member has not yet joined
                if (coordIsNoob && (coord == null || coord.compareTo(mbr) > 0)) {
                    coord = mbr;
                }
            } else {
                // member has already joined
                if (coordIsNoob || mbr.getVmViewId() > coord.getVmViewId()) {
                    coord = mbr;
                    coordIsNoob = false;
                }
            }
        }
    }
    return true;
}
Also used : FindCoordinatorResponse(org.apache.geode.distributed.internal.membership.gms.locator.FindCoordinatorResponse) InetSocketAddress(java.net.InetSocketAddress) NetView(org.apache.geode.distributed.internal.membership.NetView) IOException(java.io.IOException) InternalDistributedMember(org.apache.geode.distributed.internal.membership.InternalDistributedMember) GemFireConfigException(org.apache.geode.GemFireConfigException) FindCoordinatorRequest(org.apache.geode.distributed.internal.membership.gms.locator.FindCoordinatorRequest) HashSet(java.util.HashSet)

Example 13 with NetView

use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.

the class GMSJoinLeave method remove.

@Override
public void remove(InternalDistributedMember m, String reason) {
    NetView v = this.currentView;
    services.getCancelCriterion().checkCancelInProgress(null);
    if (v != null && v.contains(m)) {
        Set<InternalDistributedMember> filter = new HashSet<>();
        filter.add(m);
        RemoveMemberMessage msg = new RemoveMemberMessage(v.getPreferredCoordinators(filter, getMemberID(), 5), m, reason);
        msg.setSender(this.localAddress);
        processRemoveRequest(msg);
        if (!this.isCoordinator) {
            msg.resetRecipients();
            msg.setRecipients(v.getPreferredCoordinators(Collections.emptySet(), localAddress, 10));
            services.getMessenger().send(msg);
        }
    } else {
        RemoveMemberMessage msg = new RemoveMemberMessage(m, m, reason);
        services.getMessenger().send(msg);
    }
}
Also used : InternalDistributedMember(org.apache.geode.distributed.internal.membership.InternalDistributedMember) RemoveMemberMessage(org.apache.geode.distributed.internal.membership.gms.messages.RemoveMemberMessage) NetView(org.apache.geode.distributed.internal.membership.NetView) HashSet(java.util.HashSet)

Example 14 with NetView

use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.

the class GMSMembershipManager method join.

/**
   * Joins the distributed system
   *
   * @throws GemFireConfigException - configuration error
   * @throws SystemConnectException - problem joining
   */
private void join() {
    services.setShutdownCause(null);
    services.getCancelCriterion().cancel(null);
    latestViewWriteLock.lock();
    try {
        try {
            // added for bug #44373
            this.isJoining = true;
            // connect
            long start = System.currentTimeMillis();
            boolean ok = services.getJoinLeave().join();
            if (!ok) {
                throw new GemFireConfigException("Unable to join the distributed system.  " + "Operation either timed out, was stopped or Locator does not exist.");
            }
            long delta = System.currentTimeMillis() - start;
            logger.info(LogMarker.DISTRIBUTION, LocalizedMessage.create(LocalizedStrings.GroupMembershipService_JOINED_TOOK__0__MS, delta));
            NetView initialView = services.getJoinLeave().getView();
            latestView = new NetView(initialView, initialView.getViewId());
            listener.viewInstalled(latestView);
        } catch (RuntimeException ex) {
            throw ex;
        } catch (Exception ex) {
            if (ex.getCause() != null && ex.getCause().getCause() instanceof SystemConnectException) {
                throw (SystemConnectException) (ex.getCause().getCause());
            }
            throw new DistributionException(LocalizedStrings.GroupMembershipService_AN_EXCEPTION_WAS_THROWN_WHILE_JOINING.toLocalizedString(), ex);
        } finally {
            this.isJoining = false;
        }
    } finally {
        latestViewWriteLock.unlock();
    }
}
Also used : GemFireConfigException(org.apache.geode.GemFireConfigException) NetView(org.apache.geode.distributed.internal.membership.NetView) DistributionException(org.apache.geode.distributed.internal.DistributionException) MemberShunnedException(org.apache.geode.internal.tcp.MemberShunnedException) TimeoutException(java.util.concurrent.TimeoutException) ShunnedMemberException(org.apache.geode.distributed.internal.direct.ShunnedMemberException) DistributedSystemDisconnectedException(org.apache.geode.distributed.DistributedSystemDisconnectedException) CancelException(org.apache.geode.CancelException) DistributionException(org.apache.geode.distributed.internal.DistributionException) ForcedDisconnectException(org.apache.geode.ForcedDisconnectException) SystemConnectException(org.apache.geode.SystemConnectException) GemFireConfigException(org.apache.geode.GemFireConfigException) IOException(java.io.IOException) NotSerializableException(java.io.NotSerializableException) ToDataException(org.apache.geode.ToDataException) SystemConnectException(org.apache.geode.SystemConnectException)

Example 15 with NetView

use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.

the class GMSMembershipManager method directChannelSend.

/**
   * Perform the grossness associated with sending a message over a DirectChannel
   *
   * @param destinations the list of destinations
   * @param content the message
   * @param theStats the statistics object to update
   * @return all recipients who did not receive the message (null if all received it)
   * @throws NotSerializableException if the message is not serializable
   */
protected Set<InternalDistributedMember> directChannelSend(InternalDistributedMember[] destinations, DistributionMessage content, DMStats theStats) throws NotSerializableException {
    boolean allDestinations;
    InternalDistributedMember[] keys;
    if (content.forAll()) {
        allDestinations = true;
        latestViewReadLock.lock();
        try {
            List<InternalDistributedMember> keySet = latestView.getMembers();
            keys = new InternalDistributedMember[keySet.size()];
            keys = keySet.toArray(keys);
        } finally {
            latestViewReadLock.unlock();
        }
    } else {
        allDestinations = false;
        keys = destinations;
    }
    int sentBytes;
    try {
        sentBytes = directChannel.send(this, keys, content, this.services.getConfig().getDistributionConfig().getAckWaitThreshold(), this.services.getConfig().getDistributionConfig().getAckSevereAlertThreshold());
        if (theStats != null) {
            theStats.incSentBytes(sentBytes);
        }
        if (sentBytes == 0) {
            if (services.getCancelCriterion().isCancelInProgress()) {
                throw new DistributedSystemDisconnectedException();
            }
        }
    } catch (DistributedSystemDisconnectedException ex) {
        if (services.getShutdownCause() != null) {
            throw new DistributedSystemDisconnectedException("DistributedSystem is shutting down", services.getShutdownCause());
        } else {
            // see bug 41416
            throw ex;
        }
    } catch (ConnectExceptions ex) {
        // Check if the connect exception is due to system shutting down.
        if (shutdownInProgress()) {
            if (services.getShutdownCause() != null) {
                throw new DistributedSystemDisconnectedException("DistributedSystem is shutting down", services.getShutdownCause());
            } else {
                throw new DistributedSystemDisconnectedException();
            }
        }
        if (allDestinations)
            return null;
        // We
        List<InternalDistributedMember> members = (List<InternalDistributedMember>) ex.getMembers();
        // need
        // to
        // return
        // this
        // list
        // of
        // failures
        // SANITY CHECK: If we fail to send a message to an existing member
        // of the view, we have a serious error (bug36202).
        // grab a recent view, excluding shunned
        NetView view = services.getJoinLeave().getView();
        // members
        // Iterate through members and causes in tandem :-(
        Iterator it_mem = members.iterator();
        Iterator it_causes = ex.getCauses().iterator();
        while (it_mem.hasNext()) {
            InternalDistributedMember member = (InternalDistributedMember) it_mem.next();
            Throwable th = (Throwable) it_causes.next();
            if (!view.contains(member) || (th instanceof ShunnedMemberException)) {
                continue;
            }
            logger.fatal(LocalizedMessage.create(LocalizedStrings.GroupMembershipService_FAILED_TO_SEND_MESSAGE_0_TO_MEMBER_1_VIEW_2, new Object[] { content, member, view }), th);
        // Assert.assertTrue(false, "messaging contract failure");
        }
        return new HashSet<>(members);
    }// catch ConnectionExceptions
     catch (ToDataException | CancelException e) {
        throw e;
    } catch (IOException e) {
        if (logger.isDebugEnabled()) {
            logger.debug("Membership: directChannelSend caught exception: {}", e.getMessage(), e);
        }
        if (e instanceof NotSerializableException) {
            throw (NotSerializableException) e;
        }
    } catch (RuntimeException | Error e) {
        if (logger.isDebugEnabled()) {
            logger.debug("Membership: directChannelSend caught exception: {}", e.getMessage(), e);
        }
        throw e;
    }
    return null;
}
Also used : ShunnedMemberException(org.apache.geode.distributed.internal.direct.ShunnedMemberException) DistributedSystemDisconnectedException(org.apache.geode.distributed.DistributedSystemDisconnectedException) ConnectExceptions(org.apache.geode.internal.tcp.ConnectExceptions) NetView(org.apache.geode.distributed.internal.membership.NetView) InternalGemFireError(org.apache.geode.InternalGemFireError) IOException(java.io.IOException) NotSerializableException(java.io.NotSerializableException) InternalDistributedMember(org.apache.geode.distributed.internal.membership.InternalDistributedMember) ToDataException(org.apache.geode.ToDataException) Iterator(java.util.Iterator) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) CancelException(org.apache.geode.CancelException)

Aggregations

NetView (org.apache.geode.distributed.internal.membership.NetView)101 InternalDistributedMember (org.apache.geode.distributed.internal.membership.InternalDistributedMember)65 MembershipTest (org.apache.geode.test.junit.categories.MembershipTest)59 Test (org.junit.Test)59 IntegrationTest (org.apache.geode.test.junit.categories.IntegrationTest)57 HashSet (java.util.HashSet)25 InstallViewMessage (org.apache.geode.distributed.internal.membership.gms.messages.InstallViewMessage)18 Message (org.jgroups.Message)14 ArrayList (java.util.ArrayList)12 JoinResponseMessage (org.apache.geode.distributed.internal.membership.gms.messages.JoinResponseMessage)10 UnitTest (org.apache.geode.test.junit.categories.UnitTest)10 LinkedList (java.util.LinkedList)9 DistributionMessage (org.apache.geode.distributed.internal.DistributionMessage)9 IOException (java.io.IOException)8 FlakyTest (org.apache.geode.test.junit.categories.FlakyTest)8 Properties (java.util.Properties)7 FindCoordinatorResponse (org.apache.geode.distributed.internal.membership.gms.locator.FindCoordinatorResponse)7 CancelException (org.apache.geode.CancelException)6 ConfigurationProperties (org.apache.geode.distributed.ConfigurationProperties)6 JoinRequestMessage (org.apache.geode.distributed.internal.membership.gms.messages.JoinRequestMessage)6