Search in sources :

Example 6 with NetView

use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.

the class GMSJoinLeave method waitForJoinResponse.

private JoinResponseMessage waitForJoinResponse() throws InterruptedException {
    JoinResponseMessage response;
    synchronized (joinResponse) {
        if (joinResponse[0] == null && !isJoined) {
            // Note that if we give up waiting but a response is on
            // the way we will get the new view and join that way.
            // See installView()
            long timeout = Math.max(services.getConfig().getMemberTimeout(), services.getConfig().getJoinTimeout() / 5);
            joinResponse.wait(timeout);
        }
        response = joinResponse[0];
        if (response != null && response.getCurrentView() != null && !isJoined) {
            // reset joinResponse[0]
            joinResponse[0] = null;
            // we got view here that means either we have to wait for
            NetView v = response.getCurrentView();
            InternalDistributedMember coord = v.getCoordinator();
            if (searchState.alreadyTried.contains(coord)) {
                searchState.view = response.getCurrentView();
                // we already sent join request to it..so lets wait some more time here
                // assuming we got this response immediately, so wait for same timeout here..
                long timeout = Math.max(services.getConfig().getMemberTimeout(), services.getConfig().getJoinTimeout() / 5);
                joinResponse.wait(timeout);
                response = joinResponse[0];
            } else {
                // try on this coordinator
                searchState.view = response.getCurrentView();
                response = null;
            }
            searchState.view = v;
        }
        if (isJoined) {
            return null;
        }
    }
    return response;
}
Also used : InternalDistributedMember(org.apache.geode.distributed.internal.membership.InternalDistributedMember) NetView(org.apache.geode.distributed.internal.membership.NetView) JoinResponseMessage(org.apache.geode.distributed.internal.membership.gms.messages.JoinResponseMessage)

Example 7 with NetView

use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.

the class GMSJoinLeave method findCoordinatorFromView.

@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "WA_NOT_IN_LOOP")
boolean findCoordinatorFromView() {
    ArrayList<FindCoordinatorResponse> result;
    SearchState state = searchState;
    NetView v = state.view;
    List<InternalDistributedMember> recipients = new ArrayList<>(v.getMembers());
    if (recipients.size() > MAX_DISCOVERY_NODES && MAX_DISCOVERY_NODES > 0) {
        recipients = recipients.subList(0, MAX_DISCOVERY_NODES);
    }
    if (state.registrants != null) {
        recipients.addAll(state.registrants);
    }
    recipients.remove(localAddress);
    // FindCoordinatorRequest req = new FindCoordinatorRequest(localAddress, state.alreadyTried,
    // state.viewId, services.getMessenger().getPublickey(
    // localAddress), services.getMessenger().getRequestId());
    // req.setRecipients(v.getMembers());
    boolean testing = unitTesting.contains("findCoordinatorFromView");
    synchronized (state.responses) {
        if (!testing) {
            state.responses.clear();
        }
        String dhalgo = services.getConfig().getDistributionConfig().getSecurityUDPDHAlgo();
        if (!dhalgo.isEmpty()) {
            // Usually this happens when locator re-joins the cluster and it has saved view.
            for (InternalDistributedMember mbr : v.getMembers()) {
                Set<InternalDistributedMember> r = new HashSet<>();
                r.add(mbr);
                FindCoordinatorRequest req = new FindCoordinatorRequest(localAddress, state.alreadyTried, state.viewId, services.getMessenger().getPublicKey(localAddress), services.getMessenger().getRequestId(), dhalgo);
                req.setRecipients(r);
                services.getMessenger().send(req, v);
            }
        } else {
            FindCoordinatorRequest req = new FindCoordinatorRequest(localAddress, state.alreadyTried, state.viewId, services.getMessenger().getPublicKey(localAddress), services.getMessenger().getRequestId(), dhalgo);
            req.setRecipients(v.getMembers());
            services.getMessenger().send(req, v);
        }
        try {
            if (!testing) {
                state.responses.wait(DISCOVERY_TIMEOUT);
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            return false;
        }
        result = new ArrayList<>(state.responses);
        state.responses.clear();
    }
    InternalDistributedMember coord = null;
    if (localAddress.getNetMember().preferredForCoordinator()) {
        // it's possible that all other potential coordinators are gone
        // and this new member must become the coordinator
        coord = localAddress;
    }
    boolean coordIsNoob = true;
    for (FindCoordinatorResponse resp : result) {
        InternalDistributedMember mbr = resp.getCoordinator();
        if (!state.alreadyTried.contains(mbr)) {
            boolean mbrIsNoob = (mbr.getVmViewId() < 0);
            if (mbrIsNoob) {
                // member has not yet joined
                if (coordIsNoob && (coord == null || coord.compareTo(mbr, false) > 0)) {
                    coord = mbr;
                }
            } else {
                // member has already joined
                if (coordIsNoob || mbr.getVmViewId() > coord.getVmViewId()) {
                    coord = mbr;
                    coordIsNoob = false;
                }
            }
        }
    }
    state.possibleCoordinator = coord;
    return coord != null;
}
Also used : FindCoordinatorResponse(org.apache.geode.distributed.internal.membership.gms.locator.FindCoordinatorResponse) NetView(org.apache.geode.distributed.internal.membership.NetView) ArrayList(java.util.ArrayList) InternalDistributedMember(org.apache.geode.distributed.internal.membership.InternalDistributedMember) FindCoordinatorRequest(org.apache.geode.distributed.internal.membership.gms.locator.FindCoordinatorRequest) HashSet(java.util.HashSet)

Example 8 with NetView

use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.

the class GMSMembershipManager method addSurpriseMember.

/**
   * Logic for handling a direct connection event (message received from a member not in the view).
   * Does not employ the startup queue.
   * <p>
   * Must be called with {@link #latestViewLock} held. Waits until there is a stable view. If the
   * member has already been added, simply returns; else adds the member.
   *
   * @param dm the member joining
   */
public boolean addSurpriseMember(DistributedMember dm) {
    final InternalDistributedMember member = (InternalDistributedMember) dm;
    boolean warn = false;
    latestViewWriteLock.lock();
    try {
        // other means.
        if (latestView.contains(member)) {
            return true;
        }
        if (surpriseMembers.containsKey(member)) {
            return true;
        }
        if (member.getVmViewId() < 0) {
            logger.warn("adding a surprise member that has not yet joined the distributed system: " + member, new Exception("stack trace"));
        }
        if (latestView.getViewId() > member.getVmViewId()) {
            // tell the process that it should shut down distribution.
            // Run in a separate thread so we don't hold the view lock during the request. Bug #44995
            new Thread(Thread.currentThread().getThreadGroup(), "Removing shunned GemFire node " + member) {

                @Override
                public void run() {
                    // fix for bug #42548
                    // this is an old member that shouldn't be added
                    logger.warn(LocalizedMessage.create(LocalizedStrings.GroupMembershipService_Invalid_Surprise_Member, new Object[] { member, latestView }));
                    requestMemberRemoval(member, "this member is no longer in the view but is initiating connections");
                }
            }.start();
            addShunnedMember(member);
            return false;
        }
        // Adding him to this set ensures we won't remove him if a new
        // view comes in and he's still not visible.
        surpriseMembers.put(member, Long.valueOf(System.currentTimeMillis()));
        if (shutdownInProgress()) {
            // Force disconnect, esp. the TCPConduit
            String msg = LocalizedStrings.GroupMembershipService_THIS_DISTRIBUTED_SYSTEM_IS_SHUTTING_DOWN.toLocalizedString();
            if (directChannel != null) {
                try {
                    directChannel.closeEndpoint(member, msg);
                } catch (DistributedSystemDisconnectedException e) {
                // ignore - happens during shutdown
                }
            }
            // for good luck
            destroyMember(member, msg);
            // allow during shutdown
            return true;
        }
        if (isShunned(member)) {
            warn = true;
            surpriseMembers.remove(member);
        } else {
            // Ensure that the member is accounted for in the view
            // Conjure up a new view including the new member. This is necessary
            // because we are about to tell the listener about a new member, so
            // the listener should rightfully expect that the member is in our
            // membership view.
            // However, we put the new member at the end of the list. This
            // should ensure he's not chosen as an elder.
            // This will get corrected when he finally shows up in the
            // view.
            NetView newMembers = new NetView(latestView, latestView.getViewId());
            newMembers.add(member);
            latestView = newMembers;
        }
    } finally {
        latestViewWriteLock.unlock();
    }
    if (warn) {
        // fix for bug #41538 - deadlock while alerting
        logger.warn(LocalizedMessage.create(LocalizedStrings.GroupMembershipService_MEMBERSHIP_IGNORING_SURPRISE_CONNECT_FROM_SHUNNED_MEMBER_0, member));
    } else {
        listener.newMemberConnected(member);
    }
    return !warn;
}
Also used : DistributedSystemDisconnectedException(org.apache.geode.distributed.DistributedSystemDisconnectedException) InternalDistributedMember(org.apache.geode.distributed.internal.membership.InternalDistributedMember) NetView(org.apache.geode.distributed.internal.membership.NetView) MemberShunnedException(org.apache.geode.internal.tcp.MemberShunnedException) TimeoutException(java.util.concurrent.TimeoutException) ShunnedMemberException(org.apache.geode.distributed.internal.direct.ShunnedMemberException) DistributedSystemDisconnectedException(org.apache.geode.distributed.DistributedSystemDisconnectedException) CancelException(org.apache.geode.CancelException) DistributionException(org.apache.geode.distributed.internal.DistributionException) ForcedDisconnectException(org.apache.geode.ForcedDisconnectException) SystemConnectException(org.apache.geode.SystemConnectException) GemFireConfigException(org.apache.geode.GemFireConfigException) IOException(java.io.IOException) NotSerializableException(java.io.NotSerializableException) ToDataException(org.apache.geode.ToDataException)

Example 9 with NetView

use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.

the class GMSJoinLeave method becomeCoordinator.

/**
   * Transitions this member into the coordinator role. This must be invoked under a synch on
   * viewInstallationLock that was held at the time the decision was made to become coordinator so
   * that the decision is atomic with actually becoming coordinator.
   *
   * @param oldCoordinator may be null
   */
private void becomeCoordinator(InternalDistributedMember oldCoordinator) {
    assert Thread.holdsLock(viewInstallationLock);
    if (isCoordinator) {
        return;
    }
    logger.info("This member is becoming the membership coordinator with address {}", localAddress);
    isCoordinator = true;
    if (currentView == null) {
        // create the initial membership view
        NetView newView = new NetView(this.localAddress);
        newView.setFailureDetectionPort(localAddress, services.getHealthMonitor().getFailureDetectionPort());
        this.localAddress.setVmViewId(0);
        installView(newView);
        isJoined = true;
        createAndStartViewCreator(newView);
        startViewBroadcaster();
    } else {
        // create and send out a new view
        NetView newView = addMemberToNetView(oldCoordinator);
        createAndStartViewCreator(newView);
        startViewBroadcaster();
    }
}
Also used : NetView(org.apache.geode.distributed.internal.membership.NetView)

Example 10 with NetView

use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.

the class GMSJoinLeave method processLeaveRequest.

/**
   * Process a Leave request from another member. This may cause this member to become the new
   * membership coordinator. If this is the coordinator a new view will be triggered.
   *
   * @param incomingRequest the request to be processed
   */
private void processLeaveRequest(LeaveRequestMessage incomingRequest) {
    logger.info("received leave request from {} for {}", incomingRequest.getSender(), incomingRequest.getMemberID());
    NetView v = currentView;
    if (v == null) {
        recordViewRequest(incomingRequest);
        return;
    }
    InternalDistributedMember mbr = incomingRequest.getMemberID();
    if (logger.isDebugEnabled()) {
        logger.debug("JoinLeave.processLeaveRequest invoked.  isCoordinator=" + isCoordinator + "; isStopping=" + isStopping + "; cancelInProgress=" + services.getCancelCriterion().isCancelInProgress());
    }
    if (!v.contains(mbr) && mbr.getVmViewId() < v.getViewId()) {
        logger.debug("ignoring leave request from old member");
        return;
    }
    if (incomingRequest.getMemberID().equals(this.localAddress)) {
        logger.info("I am being told to leave the distributed system by {}", incomingRequest.getSender());
        forceDisconnect(incomingRequest.getReason());
        return;
    }
    if (!isCoordinator && !isStopping && !services.getCancelCriterion().isCancelInProgress()) {
        logger.debug("Checking to see if I should become coordinator");
        NetView check = new NetView(v, v.getViewId() + 1);
        check.remove(incomingRequest.getMemberID());
        synchronized (removedMembers) {
            check.removeAll(removedMembers);
            check.addCrashedMembers(removedMembers);
        }
        synchronized (leftMembers) {
            leftMembers.add(mbr);
            check.removeAll(leftMembers);
        }
        if (check.getCoordinator().equals(localAddress)) {
            synchronized (viewInstallationLock) {
                becomeCoordinator(incomingRequest.getMemberID());
            }
        }
    } else {
        if (!isStopping && !services.getCancelCriterion().isCancelInProgress()) {
            recordViewRequest(incomingRequest);
            this.viewProcessor.processLeaveRequest(incomingRequest.getMemberID());
            this.prepareProcessor.processLeaveRequest(incomingRequest.getMemberID());
        }
    }
}
Also used : InternalDistributedMember(org.apache.geode.distributed.internal.membership.InternalDistributedMember) NetView(org.apache.geode.distributed.internal.membership.NetView)

Aggregations

NetView (org.apache.geode.distributed.internal.membership.NetView)101 InternalDistributedMember (org.apache.geode.distributed.internal.membership.InternalDistributedMember)65 MembershipTest (org.apache.geode.test.junit.categories.MembershipTest)59 Test (org.junit.Test)59 IntegrationTest (org.apache.geode.test.junit.categories.IntegrationTest)57 HashSet (java.util.HashSet)25 InstallViewMessage (org.apache.geode.distributed.internal.membership.gms.messages.InstallViewMessage)18 Message (org.jgroups.Message)14 ArrayList (java.util.ArrayList)12 JoinResponseMessage (org.apache.geode.distributed.internal.membership.gms.messages.JoinResponseMessage)10 UnitTest (org.apache.geode.test.junit.categories.UnitTest)10 LinkedList (java.util.LinkedList)9 DistributionMessage (org.apache.geode.distributed.internal.DistributionMessage)9 IOException (java.io.IOException)8 FlakyTest (org.apache.geode.test.junit.categories.FlakyTest)8 Properties (java.util.Properties)7 FindCoordinatorResponse (org.apache.geode.distributed.internal.membership.gms.locator.FindCoordinatorResponse)7 CancelException (org.apache.geode.CancelException)6 ConfigurationProperties (org.apache.geode.distributed.ConfigurationProperties)6 JoinRequestMessage (org.apache.geode.distributed.internal.membership.gms.messages.JoinRequestMessage)6