use of org.apache.geode.distributed.internal.membership.gms.messages.SuspectRequest in project geode by apache.
the class GMSHealthMonitor method recordSuspectRequests.
/***
* This method make sure that records suspectRequest. We need to make sure this on preferred
* coordinators, as elder coordinator might be in suspected list next.
*/
private void recordSuspectRequests(List<SuspectRequest> sMembers, NetView cv) {
// record suspect requests
Set<SuspectRequest> viewVsMembers;
synchronized (viewVsSuspectedMembers) {
viewVsMembers = viewVsSuspectedMembers.get(cv);
if (viewVsMembers == null) {
viewVsMembers = new HashSet<>();
viewVsSuspectedMembers.put(cv, viewVsMembers);
}
for (SuspectRequest sr : sMembers) {
viewVsMembers.add(sr);
}
}
}
use of org.apache.geode.distributed.internal.membership.gms.messages.SuspectRequest in project geode by apache.
the class GMSHealthMonitor method initiateSuspicion.
private void initiateSuspicion(InternalDistributedMember mbr, String reason) {
if (services.getJoinLeave().isMemberLeaving(mbr)) {
return;
}
SuspectRequest sr = new SuspectRequest(mbr, reason);
List<SuspectRequest> sl = new ArrayList<>();
sl.add(sr);
sendSuspectRequest(sl);
}
use of org.apache.geode.distributed.internal.membership.gms.messages.SuspectRequest in project geode by apache.
the class GMSHealthMonitorJUnitTest method testRemoveMemberCalledAfterDoingFinalCheckOnCoordinator.
/***
* Send remove member message after doing final check for coordinator, ping timeout This test
* trying to remove coordinator
*/
@Test
public void testRemoveMemberCalledAfterDoingFinalCheckOnCoordinator() throws Exception {
NetView v = new NetView(mockMembers.get(0), 2, mockMembers);
// preferred coordinators are 0 and 1
// next preferred coordinator
when(messenger.getMemberID()).thenReturn(mockMembers.get(1));
gmsHealthMonitor.started();
gmsHealthMonitor.installView(v);
Thread.sleep(memberTimeout / GMSHealthMonitor.LOGICAL_INTERVAL);
ArrayList<InternalDistributedMember> recipient = new ArrayList<InternalDistributedMember>();
recipient.add(mockMembers.get(0));
recipient.add(mockMembers.get(1));
ArrayList<SuspectRequest> as = new ArrayList<SuspectRequest>();
// removing
SuspectRequest sr = new SuspectRequest(mockMembers.get(0), "Not Responding");
// coordinator
as.add(sr);
SuspectMembersMessage sm = new SuspectMembersMessage(recipient, as);
// member 4 sends suspect message
sm.setSender(mockMembers.get(myAddressIndex + 1));
gmsHealthMonitor.processMessage(sm);
// this happens after final check, ping timeout = 1000 ms
Thread.sleep(memberTimeout + 200);
verify(joinLeave, atLeastOnce()).remove(any(InternalDistributedMember.class), any(String.class));
Assert.assertTrue(gmsHealthMonitor.getStats().getSuspectsReceived() > 0);
}
use of org.apache.geode.distributed.internal.membership.gms.messages.SuspectRequest in project geode by apache.
the class GMSHealthMonitor method processSuspectMembersRequest.
/**
* Process a Suspect request from another member. This may cause this member to become the new
* membership coordinator. it will to final check on that member and then it will send remove
* request for that member
*/
private void processSuspectMembersRequest(SuspectMembersMessage incomingRequest) {
this.stats.incSuspectsReceived();
NetView cv = currentView;
if (cv == null) {
return;
}
List<SuspectRequest> sMembers = incomingRequest.getMembers();
InternalDistributedMember sender = incomingRequest.getSender();
int viewId = sender.getVmViewId();
if (cv.getViewId() >= viewId && !cv.contains(incomingRequest.getSender())) {
logger.info("Membership ignoring suspect request for " + incomingRequest + " from non-member " + incomingRequest.getSender());
services.getJoinLeave().remove(sender, "this process is initiating suspect processing but is no longer a member");
return;
}
// take care of any suspicion of this member by sending a heartbeat back
if (!playingDead) {
for (Iterator<SuspectRequest> it = incomingRequest.getMembers().iterator(); it.hasNext(); ) {
SuspectRequest req = it.next();
if (req.getSuspectMember().equals(localAddress)) {
HeartbeatMessage message = new HeartbeatMessage(-1);
message.setRecipient(sender);
try {
services.getMessenger().send(message);
this.stats.incHeartbeatsSent();
it.remove();
} catch (CancelException e) {
return;
}
}
}
}
if (cv.getCoordinator().equals(localAddress)) {
for (SuspectRequest req : incomingRequest.getMembers()) {
logger.info("received suspect message from {} for {}: {}", sender, req.getSuspectMember(), req.getReason());
}
checkIfAvailable(sender, sMembers, cv);
} else // coordinator ends
{
NetView check = new NetView(cv, cv.getViewId() + 1);
ArrayList<SuspectRequest> smbr = new ArrayList<>();
synchronized (viewVsSuspectedMembers) {
recordSuspectRequests(sMembers, cv);
Set<SuspectRequest> viewVsMembers = viewVsSuspectedMembers.get(cv);
for (final SuspectRequest sr : viewVsMembers) {
check.remove(sr.getSuspectMember());
smbr.add(sr);
}
}
InternalDistributedMember coordinator = check.getCoordinator();
if (coordinator != null && coordinator.equals(localAddress)) {
// new coordinator
for (SuspectRequest req : incomingRequest.getMembers()) {
logger.info("received suspect message from {} for {}: {}", sender, req.getSuspectMember(), req.getReason());
}
checkIfAvailable(sender, smbr, cv);
} else {
recordSuspectRequests(sMembers, cv);
}
}
}
use of org.apache.geode.distributed.internal.membership.gms.messages.SuspectRequest in project geode by apache.
the class GMSHealthMonitor method start.
public void start() {
scheduler = Executors.newScheduledThreadPool(1, r -> {
Thread th = new Thread(Services.getThreadGroup(), r, "Geode Failure Detection Scheduler");
th.setDaemon(true);
return th;
});
checkExecutor = Executors.newCachedThreadPool(new ThreadFactory() {
final AtomicInteger threadIdx = new AtomicInteger();
@Override
public Thread newThread(Runnable r) {
int id = threadIdx.getAndIncrement();
Thread th = new Thread(Services.getThreadGroup(), r, "Geode Failure Detection thread " + id);
th.setDaemon(true);
return th;
}
});
Monitor m = this.new Monitor(memberTimeout);
long delay = memberTimeout / LOGICAL_INTERVAL;
monitorFuture = scheduler.scheduleAtFixedRate(m, delay, delay, TimeUnit.MILLISECONDS);
// suspectRequestCollectorThread = this.new RequestCollector<SuspectRequest>("Geode Suspect
// Message Collector", Services.getThreadGroup(), suspectRequests,
// new Callback<SuspectRequest>() {
// @Override
// public void process(List<SuspectRequest> requests) {
// GMSHealthMonitor.this.sendSuspectRequest(requests);
//
// }
// }, MEMBER_SUSPECT_COLLECTION_INTERVAL);
// suspectRequestCollectorThread.setDaemon(true);
// suspectRequestCollectorThread.start()
serverSocketExecutor = Executors.newCachedThreadPool(new ThreadFactory() {
final AtomicInteger threadIdx = new AtomicInteger();
@Override
public Thread newThread(Runnable r) {
int id = threadIdx.getAndIncrement();
Thread th = new Thread(Services.getThreadGroup(), r, "Geode Failure Detection Server thread " + id);
th.setDaemon(true);
return th;
}
});
}
Aggregations