use of org.apache.geode.distributed.DistributedSystemDisconnectedException in project geode by apache.
the class GMSMembershipManager method processView.
/**
* Analyze a given view object, generate events as appropriate
*/
protected void processView(long newViewId, NetView newView) {
// Sanity check...
if (logger.isDebugEnabled()) {
StringBuilder msg = new StringBuilder(200);
msg.append("Membership: Processing view ");
msg.append(newView);
msg.append("} on ").append(address.toString());
logger.debug(msg);
if (!newView.contains(address)) {
logger.info(LocalizedMessage.create(LocalizedStrings.GroupMembershipService_THE_MEMBER_WITH_ID_0_IS_NO_LONGER_IN_MY_OWN_VIEW_1, new Object[] { address, newView }));
}
}
// We perform the update under a global lock so that other
// incoming events will not be lost in terms of our global view.
latestViewWriteLock.lock();
try {
// first determine the version for multicast message serialization
Version version = Version.CURRENT;
for (final Entry<InternalDistributedMember, Long> internalDistributedMemberLongEntry : surpriseMembers.entrySet()) {
InternalDistributedMember mbr = internalDistributedMemberLongEntry.getKey();
Version itsVersion = mbr.getVersionObject();
if (itsVersion != null && version.compareTo(itsVersion) < 0) {
version = itsVersion;
}
}
for (InternalDistributedMember mbr : newView.getMembers()) {
Version itsVersion = mbr.getVersionObject();
if (itsVersion != null && itsVersion.compareTo(version) < 0) {
version = mbr.getVersionObject();
}
}
disableMulticastForRollingUpgrade = !version.equals(Version.CURRENT);
if (newViewId < latestViewId) {
// ignore this view since it is old news
return;
}
// Save previous view, for delta analysis
NetView priorView = latestView;
// update the view to reflect our changes, so that
// callbacks will see the new (updated) view.
latestViewId = newViewId;
latestView = new NetView(newView, newView.getViewId());
// look for additions
for (int i = 0; i < newView.getMembers().size(); i++) {
// additions
InternalDistributedMember m = newView.getMembers().get(i);
// Once a member has been seen via a view, remove them from the
// newborn set. Replace the netmember of the surpriseMember ID
// in case it was a partial ID and is being retained by DistributionManager
// or some other object
boolean wasSurprise = surpriseMembers.containsKey(m);
if (wasSurprise) {
for (Iterator<Map.Entry<InternalDistributedMember, Long>> iterator = surpriseMembers.entrySet().iterator(); iterator.hasNext(); ) {
Entry<InternalDistributedMember, Long> entry = iterator.next();
if (entry.getKey().equals(m)) {
entry.getKey().setNetMember(m.getNetMember());
iterator.remove();
break;
}
}
}
// if it's in a view, it's no longer suspect
suspectedMembers.remove(m);
if (priorView.contains(m) || wasSurprise) {
// already seen
continue;
}
// unblock any waiters for this particular member.
// i.e. signal any waiting threads in tcpconduit.
String authInit = this.services.getConfig().getDistributionConfig().getSecurityPeerAuthInit();
boolean isSecure = authInit != null && authInit.length() != 0;
if (isSecure) {
CountDownLatch currentLatch;
if ((currentLatch = memberLatch.get(m)) != null) {
currentLatch.countDown();
}
}
if (shutdownInProgress()) {
addShunnedMember(m);
// no additions processed after shutdown begins
continue;
} else {
// bug #45158 - no longer shun a process that is now in
boolean wasShunned = endShun(m);
// view
if (wasShunned && logger.isDebugEnabled()) {
logger.debug("No longer shunning {} as it is in the current membership view", m);
}
}
logger.info(LocalizedMessage.create(LocalizedStrings.GroupMembershipService_MEMBERSHIP_PROCESSING_ADDITION__0_, m));
try {
listener.newMemberConnected(m);
} catch (VirtualMachineError err) {
SystemFailure.initiateFailure(err);
// now, so don't let this thread continue.
throw err;
} catch (DistributedSystemDisconnectedException e) {
// don't log shutdown exceptions
} catch (Throwable t) {
// Whenever you catch Error or Throwable, you must also
// catch VirtualMachineError (see above). However, there is
// _still_ a possibility that you are dealing with a cascading
// error condition, so you also need to check to see if the JVM
// is still usable:
SystemFailure.checkFailure();
logger.info(LocalizedMessage.create(LocalizedStrings.GroupMembershipService_MEMBERSHIP_FAULT_WHILE_PROCESSING_VIEW_ADDITION_OF__0, m), t);
}
}
// look for departures
for (int i = 0; i < priorView.getMembers().size(); i++) {
// departures
InternalDistributedMember m = priorView.getMembers().get(i);
if (newView.contains(m)) {
// still alive
continue;
}
if (surpriseMembers.containsKey(m)) {
// member has not yet appeared in a view
continue;
}
try {
removeWithViewLock(m, newView.getCrashedMembers().contains(m) || suspectedMembers.containsKey(m), "departed membership view");
} catch (VirtualMachineError err) {
SystemFailure.initiateFailure(err);
// now, so don't let this thread continue.
throw err;
} catch (Throwable t) {
// Whenever you catch Error or Throwable, you must also
// catch VirtualMachineError (see above). However, there is
// _still_ a possibility that you are dealing with a cascading
// error condition, so you also need to check to see if the JVM
// is still usable:
SystemFailure.checkFailure();
logger.info(LocalizedMessage.create(LocalizedStrings.GroupMembershipService_MEMBERSHIP_FAULT_WHILE_PROCESSING_VIEW_REMOVAL_OF__0, m), t);
}
}
// departures
// expire surprise members, add others to view
long oldestAllowed = System.currentTimeMillis() - this.surpriseMemberTimeout;
for (Iterator<Map.Entry<InternalDistributedMember, Long>> it = surpriseMembers.entrySet().iterator(); it.hasNext(); ) {
Map.Entry<InternalDistributedMember, Long> entry = it.next();
Long birthtime = entry.getValue();
if (birthtime.longValue() < oldestAllowed) {
it.remove();
InternalDistributedMember m = entry.getKey();
logger.info(LocalizedMessage.create(LocalizedStrings.GroupMembershipService_MEMBERSHIP_EXPIRING_MEMBERSHIP_OF_SURPRISE_MEMBER_0, m));
removeWithViewLock(m, true, "not seen in membership view in " + this.surpriseMemberTimeout + "ms");
} else {
if (!latestView.contains(entry.getKey())) {
latestView.add(entry.getKey());
}
}
}
// expire suspected members
/*
* the timeout interval for suspected members
*/
final long suspectMemberTimeout = 180000;
oldestAllowed = System.currentTimeMillis() - suspectMemberTimeout;
for (Iterator it = suspectedMembers.entrySet().iterator(); it.hasNext(); ) {
Map.Entry entry = (Map.Entry) it.next();
Long birthtime = (Long) entry.getValue();
if (birthtime.longValue() < oldestAllowed) {
it.remove();
}
}
try {
listener.viewInstalled(latestView);
} catch (DistributedSystemDisconnectedException se) {
}
} finally {
latestViewWriteLock.unlock();
}
}
use of org.apache.geode.distributed.DistributedSystemDisconnectedException in project geode by apache.
the class RemoteOperationMessage method process.
/**
* Upon receipt of the message, both process the message and send an acknowledgement, not
* necessarily in that order. Note: Any hang in this message may cause a distributed deadlock for
* those threads waiting for an acknowledgement.
*
* @throws PartitionedRegionException if the region does not exist (typically, if it has been
* destroyed)
*/
@Override
public void process(final DistributionManager dm) {
Throwable thr = null;
boolean sendReply = true;
LocalRegion r = null;
long startTime = 0;
try {
if (checkCacheClosing(dm) || checkDSClosing(dm)) {
thr = new CacheClosedException(LocalizedStrings.PartitionMessage_REMOTE_CACHE_IS_CLOSED_0.toLocalizedString(dm.getId()));
return;
}
InternalCache cache = getCache(dm);
r = getRegionByPath(cache);
if (r == null && failIfRegionMissing()) {
// if the distributed system is disconnecting, don't send a reply saying
// the partitioned region can't be found (bug 36585)
thr = new RegionDestroyedException(LocalizedStrings.RemoteOperationMessage_0_COULD_NOT_FIND_REGION_1.toLocalizedString(dm.getDistributionManagerId(), regionPath), regionPath);
// reply sent in finally block below
return;
}
thr = UNHANDLED_EXCEPTION;
// [bruce] r might be null here, so we have to go to the cache instance to get the txmgr
TXManagerImpl txMgr = getTXManager(cache);
TXStateProxy tx = txMgr.masqueradeAs(this);
if (tx == null) {
sendReply = operateOnRegion(dm, r, startTime);
} else {
try {
if (txMgr.isClosed()) {
// NO DISTRIBUTED MESSAGING CAN BE DONE HERE!
sendReply = false;
} else if (tx.isInProgress()) {
sendReply = operateOnRegion(dm, r, startTime);
tx.updateProxyServer(this.getSender());
}
} finally {
txMgr.unmasquerade(tx);
}
}
thr = null;
} catch (RemoteOperationException fre) {
thr = fre;
} catch (DistributedSystemDisconnectedException se) {
// bug 37026: this is too noisy...
// throw new CacheClosedException("remote system shutting down");
// thr = se; cache is closed, no point trying to send a reply
thr = null;
sendReply = false;
if (logger.isDebugEnabled()) {
logger.debug("shutdown caught, abandoning message: {}", se.getMessage(), se);
}
} catch (RegionDestroyedException rde) {
// [bruce] RDE does not always mean that the sender's region is also
// destroyed, so we must send back an exception. If the sender's
// region is also destroyed, who cares if we send it an exception
// if (pr != null && pr.isClosed) {
thr = new ForceReattemptException(LocalizedStrings.PartitionMessage_REGION_IS_DESTROYED_IN_0.toLocalizedString(dm.getDistributionManagerId()), rde);
// }
} catch (VirtualMachineError err) {
SystemFailure.initiateFailure(err);
// now, so don't let this thread continue.
throw err;
} catch (Throwable t) {
// Whenever you catch Error or Throwable, you must also
// catch VirtualMachineError (see above). However, there is
// _still_ a possibility that you are dealing with a cascading
// error condition, so you also need to check to see if the JVM
// is still usable:
SystemFailure.checkFailure();
// log the exception at fine level if there is no reply to the message
thr = null;
if (sendReply) {
if (!checkDSClosing(dm)) {
thr = t;
} else {
// don't pass arbitrary runtime exceptions and errors back if this
// cache/vm is closing
thr = new ForceReattemptException(LocalizedStrings.PartitionMessage_DISTRIBUTED_SYSTEM_IS_DISCONNECTING.toLocalizedString());
}
}
if (logger.isTraceEnabled(LogMarker.DM) && (t instanceof RuntimeException)) {
logger.trace(LogMarker.DM, "Exception caught while processing message", t);
}
} finally {
if (sendReply) {
ReplyException rex = null;
if (thr != null) {
// don't transmit the exception if this message was to a listener
// and this listener is shutting down
rex = new ReplyException(thr);
}
// Send the reply if the operateOnPartitionedRegion returned true
sendReply(getSender(), this.processorId, dm, rex, r, startTime);
}
}
}
use of org.apache.geode.distributed.DistributedSystemDisconnectedException in project geode by apache.
the class BaseCommand method readRequest.
static Message readRequest(ServerConnection servConn) {
Message requestMsg = null;
try {
requestMsg = servConn.getRequestMessage();
requestMsg.recv(servConn, MAX_INCOMING_DATA, INCOMING_DATA_LIMITER, INCOMING_MSG_LIMITER);
return requestMsg;
} catch (EOFException eof) {
handleEOFException(null, servConn, eof);
// TODO: Check if there is any need for explicitly returning
} catch (InterruptedIOException e) {
// Solaris only
handleInterruptedIOException(servConn, e);
} catch (IOException e) {
handleIOException(null, servConn, e);
} catch (DistributedSystemDisconnectedException e) {
handleShutdownException(null, servConn, e);
} catch (VirtualMachineError err) {
SystemFailure.initiateFailure(err);
// now, so don't let this thread continue.
throw err;
} catch (Throwable e) {
SystemFailure.checkFailure();
handleThrowable(null, servConn, e);
}
return requestMsg;
}
use of org.apache.geode.distributed.DistributedSystemDisconnectedException in project geode by apache.
the class BaseCommand method execute.
@Override
public void execute(Message clientMessage, ServerConnection serverConnection) {
// Read the request and update the statistics
long start = DistributionStats.getStatTime();
if (EntryLogger.isEnabled() && serverConnection != null) {
EntryLogger.setSource(serverConnection.getMembershipID(), "c2s");
}
boolean shouldMasquerade = shouldMasqueradeForTx(clientMessage, serverConnection);
try {
if (shouldMasquerade) {
InternalCache cache = serverConnection.getCache();
InternalDistributedMember member = (InternalDistributedMember) serverConnection.getProxyID().getDistributedMember();
TXManagerImpl txMgr = cache.getTxManager();
TXStateProxy tx = null;
try {
tx = txMgr.masqueradeAs(clientMessage, member, false);
cmdExecute(clientMessage, serverConnection, start);
tx.updateProxyServer(txMgr.getMemberId());
} finally {
txMgr.unmasquerade(tx);
}
} else {
cmdExecute(clientMessage, serverConnection, start);
}
} catch (TransactionException | CopyException | SerializationException | CacheWriterException | CacheLoaderException | GemFireSecurityException | PartitionOfflineException | MessageTooLargeException e) {
handleExceptionNoDisconnect(clientMessage, serverConnection, e);
} catch (EOFException eof) {
BaseCommand.handleEOFException(clientMessage, serverConnection, eof);
} catch (InterruptedIOException e) {
// Solaris only
BaseCommand.handleInterruptedIOException(serverConnection, e);
} catch (IOException e) {
BaseCommand.handleIOException(clientMessage, serverConnection, e);
} catch (DistributedSystemDisconnectedException e) {
BaseCommand.handleShutdownException(clientMessage, serverConnection, e);
} catch (VirtualMachineError err) {
SystemFailure.initiateFailure(err);
// now, so don't let this thread continue.
throw err;
} catch (Throwable e) {
BaseCommand.handleThrowable(clientMessage, serverConnection, e);
} finally {
EntryLogger.clearSource();
}
}
use of org.apache.geode.distributed.DistributedSystemDisconnectedException in project geode by apache.
the class TCPConduit method getConnection.
/**
* Return a connection to the given member. This method must continue to attempt to create a
* connection to the given member as long as that member is in the membership view and the system
* is not shutting down.
*
* @param memberAddress the IDS associated with the remoteId
* @param preserveOrder whether this is an ordered or unordered connection
* @param retry false if this is the first attempt
* @param startTime the time this operation started
* @param ackTimeout the ack-wait-threshold * 1000 for the operation to be transmitted (or zero)
* @param ackSATimeout the ack-severe-alert-threshold * 1000 for the operation to be transmitted
* (or zero)
*
* @return the connection
*/
public Connection getConnection(InternalDistributedMember memberAddress, final boolean preserveOrder, boolean retry, long startTime, long ackTimeout, long ackSATimeout) throws java.io.IOException, DistributedSystemDisconnectedException {
// (processorType == DistributionManager.PARTITIONED_REGION_EXECUTOR);
if (stopped) {
throw new DistributedSystemDisconnectedException(LocalizedStrings.TCPConduit_THE_CONDUIT_IS_STOPPED.toLocalizedString());
}
Connection conn = null;
InternalDistributedMember memberInTrouble = null;
boolean breakLoop = false;
for (; ; ) {
stopper.checkCancelInProgress(null);
boolean interrupted = Thread.interrupted();
try {
// rebuilt.
if (retry || conn != null) {
// not first time in loop
if (!membershipManager.memberExists(memberAddress) || membershipManager.isShunned(memberAddress) || membershipManager.shutdownInProgress()) {
throw new IOException(LocalizedStrings.TCPConduit_TCPIP_CONNECTION_LOST_AND_MEMBER_IS_NOT_IN_VIEW.toLocalizedString());
}
// Pause just a tiny bit...
try {
Thread.sleep(100);
} catch (InterruptedException e) {
interrupted = true;
stopper.checkCancelInProgress(e);
}
// try again after sleep
if (!membershipManager.memberExists(memberAddress) || membershipManager.isShunned(memberAddress)) {
// OK, the member left. Just register an error.
throw new IOException(LocalizedStrings.TCPConduit_TCPIP_CONNECTION_LOST_AND_MEMBER_IS_NOT_IN_VIEW.toLocalizedString());
}
// Print a warning (once)
if (memberInTrouble == null) {
memberInTrouble = memberAddress;
logger.warn(LocalizedMessage.create(LocalizedStrings.TCPConduit_ATTEMPTING_TCPIP_RECONNECT_TO__0, memberInTrouble));
} else {
if (logger.isDebugEnabled()) {
logger.debug("Attempting TCP/IP reconnect to {}", memberInTrouble);
}
}
// Close the connection (it will get rebuilt later).
this.stats.incReconnectAttempts();
if (conn != null) {
try {
if (logger.isDebugEnabled()) {
logger.debug("Closing old connection. conn={} before retrying. memberInTrouble={}", conn, memberInTrouble);
}
conn.closeForReconnect("closing before retrying");
} catch (CancelException ex) {
throw ex;
} catch (Exception ex) {
}
}
}
// not first time in loop
Exception problem = null;
try {
// Get (or regenerate) the connection
// bug36202: this could generate a ConnectionException, so it
// must be caught and retried
boolean retryForOldConnection;
boolean debugRetry = false;
do {
retryForOldConnection = false;
conn = getConTable().get(memberAddress, preserveOrder, startTime, ackTimeout, ackSATimeout);
if (conn == null) {
// conduit may be closed - otherwise an ioexception would be thrown
problem = new IOException(LocalizedStrings.TCPConduit_UNABLE_TO_RECONNECT_TO_SERVER_POSSIBLE_SHUTDOWN_0.toLocalizedString(memberAddress));
} else if (conn.isClosing() || !conn.getRemoteAddress().equals(memberAddress)) {
if (logger.isDebugEnabled()) {
logger.debug("Got an old connection for {}: {}@{}", memberAddress, conn, conn.hashCode());
}
conn.closeOldConnection("closing old connection");
conn = null;
retryForOldConnection = true;
debugRetry = true;
}
} while (retryForOldConnection);
if (debugRetry && logger.isDebugEnabled()) {
logger.debug("Done removing old connections");
}
// we have a connection; fall through and return it
} catch (ConnectionException e) {
// Race condition between acquiring the connection and attempting
// to use it: another thread closed it.
problem = e;
// [sumedh] No need to retry since Connection.createSender has already
// done retries and now member is really unreachable for some reason
// even though it may be in the view
breakLoop = true;
} catch (IOException e) {
problem = e;
// bug #43962 don't keep trying to connect to an alert listener
if (AlertAppender.isThreadAlerting()) {
if (logger.isDebugEnabled()) {
logger.debug("Giving up connecting to alert listener {}", memberAddress);
}
breakLoop = true;
}
}
if (problem != null) {
// Some problems are not recoverable; check and error out early.
if (!membershipManager.memberExists(memberAddress) || membershipManager.isShunned(memberAddress)) {
// Bracket our original warning
if (memberInTrouble != null) {
// make this msg info to bracket warning
logger.info(LocalizedMessage.create(LocalizedStrings.TCPConduit_ENDING_RECONNECT_ATTEMPT_BECAUSE_0_HAS_DISAPPEARED, memberInTrouble));
}
throw new IOException(LocalizedStrings.TCPConduit_PEER_HAS_DISAPPEARED_FROM_VIEW.toLocalizedString(memberAddress));
}
if (membershipManager.shutdownInProgress()) {
// Bracket our original warning
if (memberInTrouble != null) {
// make this msg info to bracket warning
logger.info(LocalizedMessage.create(LocalizedStrings.TCPConduit_ENDING_RECONNECT_ATTEMPT_TO_0_BECAUSE_SHUTDOWN_HAS_STARTED, memberInTrouble));
}
stopper.checkCancelInProgress(null);
throw new DistributedSystemDisconnectedException(LocalizedStrings.TCPConduit_ABANDONED_BECAUSE_SHUTDOWN_IS_IN_PROGRESS.toLocalizedString());
}
// to have m defined for a nice message...
if (memberInTrouble == null) {
logger.warn(LocalizedMessage.create(LocalizedStrings.TCPConduit_ERROR_SENDING_MESSAGE_TO_0_WILL_REATTEMPT_1, new Object[] { memberAddress, problem }));
memberInTrouble = memberAddress;
} else {
if (logger.isDebugEnabled()) {
logger.debug("Error sending message to {}", memberAddress, problem);
}
}
if (breakLoop) {
if (!problem.getMessage().startsWith("Cannot form connection to alert listener")) {
logger.warn(LocalizedMessage.create(LocalizedStrings.TCPConduit_THROWING_IOEXCEPTION_AFTER_FINDING_BREAKLOOP_TRUE), problem);
}
if (problem instanceof IOException) {
throw (IOException) problem;
} else {
IOException ioe = new IOException(LocalizedStrings.TCPConduit_PROBLEM_CONNECTING_TO_0.toLocalizedString(memberAddress));
ioe.initCause(problem);
throw ioe;
}
}
// Retry the operation (indefinitely)
continue;
}
// Make sure our logging is bracketed if there was a problem
if (memberInTrouble != null) {
logger.info(LocalizedMessage.create(LocalizedStrings.TCPConduit_SUCCESSFULLY_RECONNECTED_TO_MEMBER_0, memberInTrouble));
if (logger.isTraceEnabled()) {
logger.trace("new connection is {} memberAddress={}", conn, memberAddress);
}
}
return conn;
} finally {
if (interrupted) {
Thread.currentThread().interrupt();
}
}
}
// for(;;)
}
Aggregations