use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.
the class GMSJoinLeave method waitForJoinResponse.
private JoinResponseMessage waitForJoinResponse() throws InterruptedException {
JoinResponseMessage response;
synchronized (joinResponse) {
if (joinResponse[0] == null && !isJoined) {
// Note that if we give up waiting but a response is on
// the way we will get the new view and join that way.
// See installView()
long timeout = Math.max(services.getConfig().getMemberTimeout(), services.getConfig().getJoinTimeout() / 5);
joinResponse.wait(timeout);
}
response = joinResponse[0];
if (response != null && response.getCurrentView() != null && !isJoined) {
// reset joinResponse[0]
joinResponse[0] = null;
// we got view here that means either we have to wait for
NetView v = response.getCurrentView();
InternalDistributedMember coord = v.getCoordinator();
if (searchState.alreadyTried.contains(coord)) {
searchState.view = response.getCurrentView();
// we already sent join request to it..so lets wait some more time here
// assuming we got this response immediately, so wait for same timeout here..
long timeout = Math.max(services.getConfig().getMemberTimeout(), services.getConfig().getJoinTimeout() / 5);
joinResponse.wait(timeout);
response = joinResponse[0];
} else {
// try on this coordinator
searchState.view = response.getCurrentView();
response = null;
}
searchState.view = v;
}
if (isJoined) {
return null;
}
}
return response;
}
use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.
the class GMSJoinLeave method findCoordinatorFromView.
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "WA_NOT_IN_LOOP")
boolean findCoordinatorFromView() {
ArrayList<FindCoordinatorResponse> result;
SearchState state = searchState;
NetView v = state.view;
List<InternalDistributedMember> recipients = new ArrayList<>(v.getMembers());
if (recipients.size() > MAX_DISCOVERY_NODES && MAX_DISCOVERY_NODES > 0) {
recipients = recipients.subList(0, MAX_DISCOVERY_NODES);
}
if (state.registrants != null) {
recipients.addAll(state.registrants);
}
recipients.remove(localAddress);
// FindCoordinatorRequest req = new FindCoordinatorRequest(localAddress, state.alreadyTried,
// state.viewId, services.getMessenger().getPublickey(
// localAddress), services.getMessenger().getRequestId());
// req.setRecipients(v.getMembers());
boolean testing = unitTesting.contains("findCoordinatorFromView");
synchronized (state.responses) {
if (!testing) {
state.responses.clear();
}
String dhalgo = services.getConfig().getDistributionConfig().getSecurityUDPDHAlgo();
if (!dhalgo.isEmpty()) {
// Usually this happens when locator re-joins the cluster and it has saved view.
for (InternalDistributedMember mbr : v.getMembers()) {
Set<InternalDistributedMember> r = new HashSet<>();
r.add(mbr);
FindCoordinatorRequest req = new FindCoordinatorRequest(localAddress, state.alreadyTried, state.viewId, services.getMessenger().getPublicKey(localAddress), services.getMessenger().getRequestId(), dhalgo);
req.setRecipients(r);
services.getMessenger().send(req, v);
}
} else {
FindCoordinatorRequest req = new FindCoordinatorRequest(localAddress, state.alreadyTried, state.viewId, services.getMessenger().getPublicKey(localAddress), services.getMessenger().getRequestId(), dhalgo);
req.setRecipients(v.getMembers());
services.getMessenger().send(req, v);
}
try {
if (!testing) {
state.responses.wait(DISCOVERY_TIMEOUT);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
return false;
}
result = new ArrayList<>(state.responses);
state.responses.clear();
}
InternalDistributedMember coord = null;
if (localAddress.getNetMember().preferredForCoordinator()) {
// it's possible that all other potential coordinators are gone
// and this new member must become the coordinator
coord = localAddress;
}
boolean coordIsNoob = true;
for (FindCoordinatorResponse resp : result) {
InternalDistributedMember mbr = resp.getCoordinator();
if (!state.alreadyTried.contains(mbr)) {
boolean mbrIsNoob = (mbr.getVmViewId() < 0);
if (mbrIsNoob) {
// member has not yet joined
if (coordIsNoob && (coord == null || coord.compareTo(mbr, false) > 0)) {
coord = mbr;
}
} else {
// member has already joined
if (coordIsNoob || mbr.getVmViewId() > coord.getVmViewId()) {
coord = mbr;
coordIsNoob = false;
}
}
}
}
state.possibleCoordinator = coord;
return coord != null;
}
use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.
the class GMSMembershipManager method addSurpriseMember.
/**
* Logic for handling a direct connection event (message received from a member not in the view).
* Does not employ the startup queue.
* <p>
* Must be called with {@link #latestViewLock} held. Waits until there is a stable view. If the
* member has already been added, simply returns; else adds the member.
*
* @param dm the member joining
*/
public boolean addSurpriseMember(DistributedMember dm) {
final InternalDistributedMember member = (InternalDistributedMember) dm;
boolean warn = false;
latestViewWriteLock.lock();
try {
// other means.
if (latestView.contains(member)) {
return true;
}
if (surpriseMembers.containsKey(member)) {
return true;
}
if (member.getVmViewId() < 0) {
logger.warn("adding a surprise member that has not yet joined the distributed system: " + member, new Exception("stack trace"));
}
if (latestView.getViewId() > member.getVmViewId()) {
// tell the process that it should shut down distribution.
// Run in a separate thread so we don't hold the view lock during the request. Bug #44995
new Thread(Thread.currentThread().getThreadGroup(), "Removing shunned GemFire node " + member) {
@Override
public void run() {
// fix for bug #42548
// this is an old member that shouldn't be added
logger.warn(LocalizedMessage.create(LocalizedStrings.GroupMembershipService_Invalid_Surprise_Member, new Object[] { member, latestView }));
requestMemberRemoval(member, "this member is no longer in the view but is initiating connections");
}
}.start();
addShunnedMember(member);
return false;
}
// Adding him to this set ensures we won't remove him if a new
// view comes in and he's still not visible.
surpriseMembers.put(member, Long.valueOf(System.currentTimeMillis()));
if (shutdownInProgress()) {
// Force disconnect, esp. the TCPConduit
String msg = LocalizedStrings.GroupMembershipService_THIS_DISTRIBUTED_SYSTEM_IS_SHUTTING_DOWN.toLocalizedString();
if (directChannel != null) {
try {
directChannel.closeEndpoint(member, msg);
} catch (DistributedSystemDisconnectedException e) {
// ignore - happens during shutdown
}
}
// for good luck
destroyMember(member, msg);
// allow during shutdown
return true;
}
if (isShunned(member)) {
warn = true;
surpriseMembers.remove(member);
} else {
// Ensure that the member is accounted for in the view
// Conjure up a new view including the new member. This is necessary
// because we are about to tell the listener about a new member, so
// the listener should rightfully expect that the member is in our
// membership view.
// However, we put the new member at the end of the list. This
// should ensure he's not chosen as an elder.
// This will get corrected when he finally shows up in the
// view.
NetView newMembers = new NetView(latestView, latestView.getViewId());
newMembers.add(member);
latestView = newMembers;
}
} finally {
latestViewWriteLock.unlock();
}
if (warn) {
// fix for bug #41538 - deadlock while alerting
logger.warn(LocalizedMessage.create(LocalizedStrings.GroupMembershipService_MEMBERSHIP_IGNORING_SURPRISE_CONNECT_FROM_SHUNNED_MEMBER_0, member));
} else {
listener.newMemberConnected(member);
}
return !warn;
}
use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.
the class GMSJoinLeave method becomeCoordinator.
/**
* Transitions this member into the coordinator role. This must be invoked under a synch on
* viewInstallationLock that was held at the time the decision was made to become coordinator so
* that the decision is atomic with actually becoming coordinator.
*
* @param oldCoordinator may be null
*/
private void becomeCoordinator(InternalDistributedMember oldCoordinator) {
assert Thread.holdsLock(viewInstallationLock);
if (isCoordinator) {
return;
}
logger.info("This member is becoming the membership coordinator with address {}", localAddress);
isCoordinator = true;
if (currentView == null) {
// create the initial membership view
NetView newView = new NetView(this.localAddress);
newView.setFailureDetectionPort(localAddress, services.getHealthMonitor().getFailureDetectionPort());
this.localAddress.setVmViewId(0);
installView(newView);
isJoined = true;
createAndStartViewCreator(newView);
startViewBroadcaster();
} else {
// create and send out a new view
NetView newView = addMemberToNetView(oldCoordinator);
createAndStartViewCreator(newView);
startViewBroadcaster();
}
}
use of org.apache.geode.distributed.internal.membership.NetView in project geode by apache.
the class GMSJoinLeave method processLeaveRequest.
/**
* Process a Leave request from another member. This may cause this member to become the new
* membership coordinator. If this is the coordinator a new view will be triggered.
*
* @param incomingRequest the request to be processed
*/
private void processLeaveRequest(LeaveRequestMessage incomingRequest) {
logger.info("received leave request from {} for {}", incomingRequest.getSender(), incomingRequest.getMemberID());
NetView v = currentView;
if (v == null) {
recordViewRequest(incomingRequest);
return;
}
InternalDistributedMember mbr = incomingRequest.getMemberID();
if (logger.isDebugEnabled()) {
logger.debug("JoinLeave.processLeaveRequest invoked. isCoordinator=" + isCoordinator + "; isStopping=" + isStopping + "; cancelInProgress=" + services.getCancelCriterion().isCancelInProgress());
}
if (!v.contains(mbr) && mbr.getVmViewId() < v.getViewId()) {
logger.debug("ignoring leave request from old member");
return;
}
if (incomingRequest.getMemberID().equals(this.localAddress)) {
logger.info("I am being told to leave the distributed system by {}", incomingRequest.getSender());
forceDisconnect(incomingRequest.getReason());
return;
}
if (!isCoordinator && !isStopping && !services.getCancelCriterion().isCancelInProgress()) {
logger.debug("Checking to see if I should become coordinator");
NetView check = new NetView(v, v.getViewId() + 1);
check.remove(incomingRequest.getMemberID());
synchronized (removedMembers) {
check.removeAll(removedMembers);
check.addCrashedMembers(removedMembers);
}
synchronized (leftMembers) {
leftMembers.add(mbr);
check.removeAll(leftMembers);
}
if (check.getCoordinator().equals(localAddress)) {
synchronized (viewInstallationLock) {
becomeCoordinator(incomingRequest.getMemberID());
}
}
} else {
if (!isStopping && !services.getCancelCriterion().isCancelInProgress()) {
recordViewRequest(incomingRequest);
this.viewProcessor.processLeaveRequest(incomingRequest.getMemberID());
this.prepareProcessor.processLeaveRequest(incomingRequest.getMemberID());
}
}
}
Aggregations