Search in sources :

Example 6 with SuspectRequest

use of org.apache.geode.distributed.internal.membership.gms.messages.SuspectRequest in project geode by apache.

the class GMSHealthMonitor method sendSuspectRequest.

private void sendSuspectRequest(final List<SuspectRequest> requests) {
    // the background suspect-collector thread is currently disabled
    // synchronized (suspectRequests) {
    // if (suspectRequests.size() > 0) {
    // for (SuspectRequest sr: suspectRequests) {
    // if (!requests.contains(sr)) {
    // requests.add(sr);
    // }
    // }
    // suspectRequests.clear();
    // }
    // }
    logger.debug("Sending suspect request for members {}", requests);
    List<InternalDistributedMember> recipients;
    if (currentView.size() > 4) {
        HashSet<InternalDistributedMember> filter = new HashSet<>();
        for (Enumeration<InternalDistributedMember> e = suspectedMemberInView.keys(); e.hasMoreElements(); ) {
            filter.add(e.nextElement());
        }
        filter.addAll(requests.stream().map(SuspectRequest::getSuspectMember).collect(Collectors.toList()));
        recipients = currentView.getPreferredCoordinators(filter, services.getJoinLeave().getMemberID(), 5);
    } else {
        recipients = currentView.getMembers();
    }
    SuspectMembersMessage smm = new SuspectMembersMessage(recipients, requests);
    Set<InternalDistributedMember> failedRecipients;
    try {
        failedRecipients = services.getMessenger().send(smm);
        this.stats.incSuspectsSent();
    } catch (CancelException e) {
        return;
    }
    if (failedRecipients != null && failedRecipients.size() > 0) {
        logger.info("Unable to send suspect message to {}", recipients);
    }
}
Also used : InternalDistributedMember(org.apache.geode.distributed.internal.membership.InternalDistributedMember) CancelException(org.apache.geode.CancelException) SuspectMembersMessage(org.apache.geode.distributed.internal.membership.gms.messages.SuspectMembersMessage) SuspectRequest(org.apache.geode.distributed.internal.membership.gms.messages.SuspectRequest) HashSet(java.util.HashSet)

Example 7 with SuspectRequest

use of org.apache.geode.distributed.internal.membership.gms.messages.SuspectRequest in project geode by apache.

the class GMSHealthMonitor method checkIfAvailable.

/**
   * performs a "final" health check on the member. If failure-detection socket information is
   * available for the member (in the view) then we attempt to connect to its socket and ask if it's
   * the expected member. Otherwise we send a heartbeat request and wait for a reply.
   */
private void checkIfAvailable(final InternalDistributedMember initiator, List<SuspectRequest> sMembers, final NetView cv) {
    for (final SuspectRequest sr : sMembers) {
        final InternalDistributedMember mbr = sr.getSuspectMember();
        if (!cv.contains(mbr) || membersInFinalCheck.contains(mbr)) {
            continue;
        }
        if (mbr.equals(localAddress)) {
            // self
            continue;
        }
        // suspectMemberInView is now set by the heartbeat monitoring code
        // to allow us to move on from watching members we've already
        // suspected. Since that code is updating this collection we
        // cannot use it here as an indication that a member is currently
        // undergoing a final check.
        // NetView view;
        // view = suspectedMemberInView.putIfAbsent(mbr, cv);
        // if (view == null || !view.equals(cv)) {
        final String reason = sr.getReason();
        logger.debug("Scheduling final check for member {}; reason={}", mbr, reason);
        // its a coordinator
        checkExecutor.execute(() -> {
            try {
                inlineCheckIfAvailable(initiator, cv, true, mbr, reason);
            } catch (CancelException e) {
            // shutting down
            } catch (Exception e) {
                logger.info("Unexpected exception while verifying member", e);
            } finally {
                GMSHealthMonitor.this.suspectedMemberInView.remove(mbr);
            }
        });
    // }// scheduling for final check and removing it..
    }
}
Also used : InternalDistributedMember(org.apache.geode.distributed.internal.membership.InternalDistributedMember) CancelException(org.apache.geode.CancelException) SuspectRequest(org.apache.geode.distributed.internal.membership.gms.messages.SuspectRequest) CancelException(org.apache.geode.CancelException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) SystemConnectException(org.apache.geode.SystemConnectException) SocketTimeoutException(java.net.SocketTimeoutException) GemFireConfigException(org.apache.geode.GemFireConfigException) IOException(java.io.IOException)

Example 8 with SuspectRequest

use of org.apache.geode.distributed.internal.membership.gms.messages.SuspectRequest in project geode by apache.

the class GMSHealthMonitorJUnitTest method testRemoveMemberCalled.

/***
   * Send remove member message after doing final check, ping Timeout
   */
@Test
public void testRemoveMemberCalled() throws Exception {
    System.out.println("testRemoveMemberCalled starting");
    NetView v = new NetView(mockMembers.get(0), 2, mockMembers);
    // 3rd is current member
    // coordinator and local member
    when(messenger.getMemberID()).thenReturn(mockMembers.get(0));
    gmsHealthMonitor.started();
    gmsHealthMonitor.installView(v);
    Thread.sleep(memberTimeout / GMSHealthMonitor.LOGICAL_INTERVAL);
    ArrayList<InternalDistributedMember> recipient = new ArrayList<InternalDistributedMember>();
    recipient.add(mockMembers.get(0));
    ArrayList<SuspectRequest> as = new ArrayList<SuspectRequest>();
    // removing member
    SuspectRequest sr = new SuspectRequest(mockMembers.get(1), "Not Responding");
    // 1
    as.add(sr);
    SuspectMembersMessage sm = new SuspectMembersMessage(recipient, as);
    sm.setSender(mockMembers.get(0));
    gmsHealthMonitor.processMessage(sm);
    Awaitility.await("waiting for remove(member) to be invoked").atMost(3 * memberTimeout, TimeUnit.SECONDS).until(() -> {
        verify(joinLeave, atLeastOnce()).remove(any(InternalDistributedMember.class), any(String.class));
    });
    Assert.assertTrue(gmsHealthMonitor.getStats().getSuspectsReceived() > 0);
}
Also used : InternalDistributedMember(org.apache.geode.distributed.internal.membership.InternalDistributedMember) NetView(org.apache.geode.distributed.internal.membership.NetView) ArrayList(java.util.ArrayList) SuspectMembersMessage(org.apache.geode.distributed.internal.membership.gms.messages.SuspectMembersMessage) SuspectRequest(org.apache.geode.distributed.internal.membership.gms.messages.SuspectRequest) FlakyTest(org.apache.geode.test.junit.categories.FlakyTest) Test(org.junit.Test) MembershipTest(org.apache.geode.test.junit.categories.MembershipTest) IntegrationTest(org.apache.geode.test.junit.categories.IntegrationTest)

Example 9 with SuspectRequest

use of org.apache.geode.distributed.internal.membership.gms.messages.SuspectRequest in project geode by apache.

the class GMSHealthMonitorJUnitTest method testRemoveMemberNotCalledBeforeTimeout.

/***
   * Shouldn't send remove member message before doing final check, or before ping Timeout
   */
@Test
public void testRemoveMemberNotCalledBeforeTimeout() {
    System.out.println("testRemoveMemberNotCalledBeforeTimeout starting");
    NetView v = new NetView(mockMembers.get(0), 2, mockMembers);
    // 3rd is current member
    // coordinator and local member
    when(messenger.getMemberID()).thenReturn(mockMembers.get(0));
    // coordinator and local member
    when(joinLeave.getMemberID()).thenReturn(mockMembers.get(0));
    gmsHealthMonitor.started();
    gmsHealthMonitor.installView(v);
    ArrayList<InternalDistributedMember> recipient = new ArrayList<InternalDistributedMember>();
    recipient.add(mockMembers.get(0));
    ArrayList<SuspectRequest> as = new ArrayList<SuspectRequest>();
    // removing member
    SuspectRequest sr = new SuspectRequest(mockMembers.get(1), "Not Responding");
    // 1
    as.add(sr);
    SuspectMembersMessage sm = new SuspectMembersMessage(recipient, as);
    sm.setSender(mockMembers.get(0));
    gmsHealthMonitor.processMessage(sm);
    try {
        // this happens after final check, ping timeout
        Thread.sleep(memberTimeout - 100);
    } catch (InterruptedException e) {
    }
    System.out.println("testRemoveMemberNotCalledBeforeTimeout ending");
    verify(joinLeave, never()).remove(any(InternalDistributedMember.class), any(String.class));
    Assert.assertTrue(gmsHealthMonitor.getStats().getSuspectsReceived() > 0);
}
Also used : InternalDistributedMember(org.apache.geode.distributed.internal.membership.InternalDistributedMember) NetView(org.apache.geode.distributed.internal.membership.NetView) ArrayList(java.util.ArrayList) SuspectMembersMessage(org.apache.geode.distributed.internal.membership.gms.messages.SuspectMembersMessage) SuspectRequest(org.apache.geode.distributed.internal.membership.gms.messages.SuspectRequest) FlakyTest(org.apache.geode.test.junit.categories.FlakyTest) Test(org.junit.Test) MembershipTest(org.apache.geode.test.junit.categories.MembershipTest) IntegrationTest(org.apache.geode.test.junit.categories.IntegrationTest)

Aggregations

SuspectRequest (org.apache.geode.distributed.internal.membership.gms.messages.SuspectRequest)9 InternalDistributedMember (org.apache.geode.distributed.internal.membership.InternalDistributedMember)7 ArrayList (java.util.ArrayList)6 NetView (org.apache.geode.distributed.internal.membership.NetView)5 SuspectMembersMessage (org.apache.geode.distributed.internal.membership.gms.messages.SuspectMembersMessage)5 CancelException (org.apache.geode.CancelException)4 FlakyTest (org.apache.geode.test.junit.categories.FlakyTest)3 IntegrationTest (org.apache.geode.test.junit.categories.IntegrationTest)3 MembershipTest (org.apache.geode.test.junit.categories.MembershipTest)3 Test (org.junit.Test)3 IOException (java.io.IOException)2 SocketTimeoutException (java.net.SocketTimeoutException)2 HashSet (java.util.HashSet)2 RejectedExecutionException (java.util.concurrent.RejectedExecutionException)2 GemFireConfigException (org.apache.geode.GemFireConfigException)2 SystemConnectException (org.apache.geode.SystemConnectException)2 HeartbeatMessage (org.apache.geode.distributed.internal.membership.gms.messages.HeartbeatMessage)2 DataInputStream (java.io.DataInputStream)1 DataOutputStream (java.io.DataOutputStream)1 InputStream (java.io.InputStream)1