use of org.apache.geode.CancelException in project geode by apache.
the class PartitionMessage method process.
/**
* Upon receipt of the message, both process the message and send an acknowledgement, not
* necessarily in that order. Note: Any hang in this message may cause a distributed deadlock for
* those threads waiting for an acknowledgement.
*
* @throws PartitionedRegionException if the region does not exist (typically, if it has been
* destroyed)
*/
@Override
public void process(final DistributionManager dm) {
Throwable thr = null;
boolean sendReply = true;
PartitionedRegion pr = null;
long startTime = 0;
EntryLogger.setSource(getSender(), "PR");
try {
if (checkCacheClosing(dm) || checkDSClosing(dm)) {
thr = new CacheClosedException(LocalizedStrings.PartitionMessage_REMOTE_CACHE_IS_CLOSED_0.toLocalizedString(dm.getId()));
return;
}
pr = getPartitionedRegion();
if (pr == null && failIfRegionMissing()) {
// if the distributed system is disconnecting, don't send a reply saying
// the partitioned region can't be found (bug 36585)
thr = new ForceReattemptException(LocalizedStrings.PartitionMessage_0_COULD_NOT_FIND_PARTITIONED_REGION_WITH_ID_1.toLocalizedString(dm.getDistributionManagerId(), regionId));
// reply sent in finally block below
return;
}
if (pr != null) {
startTime = getStartPartitionMessageProcessingTime(pr);
}
thr = UNHANDLED_EXCEPTION;
InternalCache cache = getInternalCache();
if (cache == null) {
throw new ForceReattemptException(LocalizedStrings.PartitionMessage_REMOTE_CACHE_IS_CLOSED_0.toLocalizedString());
}
TXManagerImpl txMgr = getTXManagerImpl(cache);
TXStateProxy tx = txMgr.masqueradeAs(this);
if (tx == null) {
sendReply = operateOnPartitionedRegion(dm, pr, startTime);
} else {
try {
if (txMgr.isClosed()) {
// NO DISTRIBUTED MESSAGING CAN BE DONE HERE!
sendReply = false;
} else if (tx.isInProgress()) {
sendReply = operateOnPartitionedRegion(dm, pr, startTime);
tx.updateProxyServer(this.getSender());
}
} finally {
txMgr.unmasquerade(tx);
}
}
thr = null;
} catch (ForceReattemptException fre) {
thr = fre;
} catch (DataLocationException fre) {
thr = new ForceReattemptException(fre.getMessage(), fre);
} catch (DistributedSystemDisconnectedException se) {
// bug 37026: this is too noisy...
// throw new CacheClosedException("remote system shutting down");
// thr = se; cache is closed, no point trying to send a reply
thr = null;
sendReply = false;
if (logger.isDebugEnabled()) {
logger.debug("shutdown caught, abandoning message: {}", se.getMessage(), se);
}
} catch (RegionDestroyedException | RegionNotFoundException rde) {
// [bruce] RDE does not always mean that the sender's region is also
// destroyed, so we must send back an exception. If the sender's
// region is also destroyed, who cares if we send it an exception
// if (pr != null && pr.isClosed) {
thr = new ForceReattemptException(LocalizedStrings.PartitionMessage_REGION_IS_DESTROYED_IN_0.toLocalizedString(dm.getDistributionManagerId()), rde);
// }
} catch (VirtualMachineError err) {
SystemFailure.initiateFailure(err);
// now, so don't let this thread continue.
throw err;
} catch (Throwable t) {
// Whenever you catch Error or Throwable, you must also
// catch VirtualMachineError (see above). However, there is
// _still_ a possibility that you are dealing with a cascading
// error condition, so you also need to check to see if the JVM
// is still usable:
SystemFailure.checkFailure();
// log the exception at fine level if there is no reply to the message
thr = null;
if (sendReply) {
if (!checkDSClosing(dm)) {
thr = t;
} else {
// don't pass arbitrary runtime exceptions and errors back if this
// cache/vm is closing
thr = new ForceReattemptException(LocalizedStrings.PartitionMessage_DISTRIBUTED_SYSTEM_IS_DISCONNECTING.toLocalizedString());
}
}
if (logger.isTraceEnabled(LogMarker.DM) && t instanceof RuntimeException) {
logger.trace(LogMarker.DM, "Exception caught while processing message: {}", t.getMessage(), t);
}
} finally {
if (sendReply) {
ReplyException rex = null;
if (thr != null) {
// don't transmit the exception if this message was to a listener
// and this listener is shutting down
boolean excludeException = this.notificationOnly && ((thr instanceof CancelException) || (thr instanceof ForceReattemptException));
if (!excludeException) {
rex = new ReplyException(thr);
}
}
// Send the reply if the operateOnPartitionedRegion returned true
sendReply(getSender(), this.processorId, dm, rex, pr, startTime);
EntryLogger.clearSource();
}
}
}
use of org.apache.geode.CancelException in project geode by apache.
the class PartitionedRegionRebalanceOp method execute.
/**
* Do the actual rebalance
*
* @return the details of the rebalance.
*/
public Set<PartitionRebalanceInfo> execute() {
long start = System.nanoTime();
InternalResourceManager resourceManager = InternalResourceManager.getInternalResourceManager(leaderRegion.getCache());
MembershipListener listener = new MembershipChangeListener();
if (isRebalance) {
InternalResourceManager.getResourceObserver().rebalancingStarted(targetRegion);
} else {
InternalResourceManager.getResourceObserver().recoveryStarted(targetRegion);
}
RecoveryLock lock = null;
try {
if (!checkAndSetColocatedRegions()) {
return Collections.emptySet();
}
// have full redundancy.
if (!isRebalanceNecessary()) {
return Collections.emptySet();
}
if (!simulate) {
lock = leaderRegion.getRecoveryLock();
lock.lock();
}
// have fixed it already.
if (!isRebalanceNecessary()) {
return Collections.emptySet();
}
// register a listener to notify us if the new members leave or join.
// When a membership change occurs, we want to restart the rebalancing
// from the beginning.
// TODO rebalance - we should really add a membership listener to ALL of
// the colocated regions.
leaderRegion.getRegionAdvisor().addMembershipListener(listener);
PartitionedRegionLoadModel model = null;
InternalCache cache = leaderRegion.getCache();
Map<PartitionedRegion, InternalPRInfo> detailsMap = fetchDetails(cache);
BucketOperatorWrapper serialOperator = getBucketOperator(detailsMap);
ParallelBucketOperator parallelOperator = new ParallelBucketOperator(MAX_PARALLEL_OPERATIONS, cache.getDistributionManager().getWaitingThreadPool(), serialOperator);
model = buildModel(parallelOperator, detailsMap, resourceManager);
for (PartitionRebalanceDetailsImpl details : serialOperator.getDetailSet()) {
details.setPartitionMemberDetailsBefore(model.getPartitionedMemberDetails(details.getRegionPath()));
}
director.initialize(model);
for (; ; ) {
if (cancelled.get()) {
return Collections.emptySet();
}
if (membershipChange) {
membershipChange = false;
// refetch the partitioned region details after
// a membership change.
debug("Rebalancing {} detected membership changes. Refetching details", leaderRegion);
if (this.stats != null) {
this.stats.incRebalanceMembershipChanges(1);
}
model.waitForOperations();
detailsMap = fetchDetails(cache);
model = buildModel(parallelOperator, detailsMap, resourceManager);
director.membershipChanged(model);
}
leaderRegion.checkClosed();
cache.getCancelCriterion().checkCancelInProgress(null);
if (logger.isDebugEnabled()) {
logger.debug("Rebalancing {} Model:{}\n", leaderRegion, model);
}
if (!director.nextStep()) {
// Stop when the director says we can't rebalance any more.
break;
}
}
debug("Rebalancing {} complete. Model:{}\n", leaderRegion, model);
long end = System.nanoTime();
for (PartitionRebalanceDetailsImpl details : serialOperator.getDetailSet()) {
if (!simulate) {
details.setTime(end - start);
}
details.setPartitionMemberDetailsAfter(model.getPartitionedMemberDetails(details.getRegionPath()));
}
return Collections.<PartitionRebalanceInfo>unmodifiableSet(serialOperator.getDetailSet());
} finally {
if (lock != null) {
try {
lock.unlock();
} catch (CancelException e) {
// lock service has been destroyed
} catch (Exception e) {
logger.error(LocalizedMessage.create(LocalizedStrings.PartitionedRegionRebalanceOp_UNABLE_TO_RELEASE_RECOVERY_LOCK), e);
}
}
try {
if (isRebalance) {
InternalResourceManager.getResourceObserver().rebalancingFinished(targetRegion);
} else {
InternalResourceManager.getResourceObserver().recoveryFinished(targetRegion);
}
} catch (Exception e) {
logger.error(LocalizedMessage.create(LocalizedStrings.PartitionedRegionRebalanceOp_ERROR_IN_RESOURCE_OBSERVER), e);
}
try {
leaderRegion.getRegionAdvisor().removeMembershipListener(listener);
} catch (Exception e) {
logger.error(LocalizedMessage.create(LocalizedStrings.PartitionedRegionRebalanceOp_ERROR_IN_RESOURCE_OBSERVER), e);
}
}
}
use of org.apache.geode.CancelException in project geode by apache.
the class IndexCreationMsg method process.
/**
* Process this index creation message on the receiver.
*/
@Override
public void process(final DistributionManager dm) {
final boolean isDebugEnabled = logger.isDebugEnabled();
Throwable thr = null;
boolean sendReply = true;
PartitionedRegion pr = null;
try {
if (isDebugEnabled) {
logger.debug("Trying to get pr with id: {}", this.regionId);
}
try {
if (isDebugEnabled) {
logger.debug("Again trying to get pr with id : {}", this.regionId);
}
pr = PartitionedRegion.getPRFromId(this.regionId);
if (isDebugEnabled) {
logger.debug("Index creation message got the pr {}", pr);
}
if (null == pr) {
boolean wait = true;
int attempts = 0;
while (wait && attempts < 30) {
// max 30 seconds of wait.
dm.getCancelCriterion().checkCancelInProgress(null);
if (isDebugEnabled) {
logger.debug("Waiting for Partitioned Region to be intialized with id {}for processing index creation messages", this.regionId);
}
try {
boolean interrupted = Thread.interrupted();
try {
Thread.sleep(500);
} catch (InterruptedException e) {
interrupted = true;
dm.getCancelCriterion().checkCancelInProgress(e);
} finally {
if (interrupted)
Thread.currentThread().interrupt();
}
pr = PartitionedRegion.getPRFromId(this.regionId);
if (null != pr) {
wait = false;
if (isDebugEnabled) {
logger.debug("Indexcreation message got the pr {}", pr);
}
}
attempts++;
} catch (CancelException ignorAndLoopWait) {
if (isDebugEnabled) {
logger.debug("IndexCreationMsg waiting for pr to be properly created with prId : {}", this.regionId);
}
}
}
}
} catch (CancelException letPRInitialized) {
// to the PR being initialized.
if (logger.isDebugEnabled()) {
logger.debug("Waiting for notification from pr being properly created on {}", this.regionId);
}
boolean wait = true;
while (wait) {
dm.getCancelCriterion().checkCancelInProgress(null);
try {
boolean interrupted = Thread.interrupted();
try {
Thread.sleep(500);
} catch (InterruptedException e) {
interrupted = true;
dm.getCancelCriterion().checkCancelInProgress(e);
} finally {
if (interrupted)
Thread.currentThread().interrupt();
}
pr = PartitionedRegion.getPRFromId(this.regionId);
wait = false;
if (logger.isDebugEnabled()) {
logger.debug("Indexcreation message got the pr {}", pr);
}
} catch (CancelException ignorAndLoopWait) {
if (logger.isDebugEnabled()) {
logger.debug("IndexCreationMsg waiting for pr to be properly created with prId : {}", this.regionId);
}
}
}
}
if (pr == null) /* && failIfRegionMissing() */
{
String msg = LocalizedStrings.IndexCreationMsg_COULD_NOT_GET_PARTITIONED_REGION_FROM_ID_0_FOR_MESSAGE_1_RECEIVED_ON_MEMBER_2_MAP_3.toLocalizedString(new Object[] { Integer.valueOf(this.regionId), this, dm.getId(), PartitionedRegion.dumpPRId() });
throw new PartitionedRegionException(msg, new RegionNotFoundException(msg));
}
sendReply = operateOnPartitionedRegion(dm, pr, 0);
} catch (PRLocallyDestroyedException pre) {
if (isDebugEnabled) {
logger.debug("Region is locally Destroyed ");
}
thr = pre;
} catch (VirtualMachineError err) {
SystemFailure.initiateFailure(err);
// now, so don't let this thread continue.
throw err;
} catch (Throwable t) {
// Whenever you catch Error or Throwable, you must also
// catch VirtualMachineError (see above). However, there is
// _still_ a possibility that you are dealing with a cascading
// error condition, so you also need to check to see if the JVM
// is still usable:
SystemFailure.checkFailure();
// log the exception at fine level if there is no reply to the message
if (this.processorId == 0) {
logger.debug("{} exception while processing message:{}", this, t.getMessage(), t);
} else if (logger.isDebugEnabled(LogMarker.DM) && (t instanceof RuntimeException)) {
logger.debug(LogMarker.DM, "Exception caught while processing message: {}", t.getMessage(), t);
}
if (t instanceof RegionDestroyedException && pr != null) {
if (pr.isClosed) {
logger.info(LocalizedMessage.create(LocalizedStrings.IndexCreationMsg_REGION_IS_LOCALLY_DESTROYED_THROWING_REGIONDESTROYEDEXCEPTION_FOR__0, pr));
thr = new RegionDestroyedException(LocalizedStrings.IndexCreationMsg_REGION_IS_LOCALLY_DESTROYED_ON_0.toLocalizedString(dm.getId()), pr.getFullPath());
}
} else {
thr = t;
}
} finally {
if (sendReply && this.processorId != 0) {
ReplyException rex = null;
if (thr != null) {
rex = new ReplyException(thr);
}
sendReply(getSender(), this.processorId, dm, rex, pr, 0);
}
}
}
use of org.apache.geode.CancelException in project geode by apache.
the class PRSanityCheckMessage method schedule.
/**
* Send a sanity check message and schedule a timer to send another one in
* gemfire.PRSanityCheckInterval (default 5000) milliseconds. This can be enabled with
* gemfire.PRSanityCheckEnabled=true.
*/
public static void schedule(final PartitionedRegion pr) {
if (Boolean.getBoolean(DistributionConfig.GEMFIRE_PREFIX + "PRSanityCheckEnabled")) {
final DM dm = pr.getDistributionManager();
// RegionAdvisor ra = pr.getRegionAdvisor();
// final Set recipients = ra.adviseAllPRNodes();
DistributedRegion prRoot = (DistributedRegion) PartitionedRegionHelper.getPRRoot(pr.getCache(), false);
if (prRoot == null) {
return;
}
final Set recipients = prRoot.getDistributionAdvisor().adviseGeneric();
if (recipients.size() <= 0) {
return;
}
final PRSanityCheckMessage delayedInstance = new PRSanityCheckMessage(recipients, pr.getPRId(), null, pr.getRegionIdentifier());
PRSanityCheckMessage instance = new PRSanityCheckMessage(recipients, pr.getPRId(), null, pr.getRegionIdentifier());
dm.putOutgoing(instance);
int sanityCheckInterval = Integer.getInteger(DistributionConfig.GEMFIRE_PREFIX + "PRSanityCheckInterval", 5000).intValue();
if (sanityCheckInterval != 0) {
final SystemTimer tm = new SystemTimer(dm.getSystem(), true);
SystemTimer.SystemTimerTask st = new SystemTimer.SystemTimerTask() {
@Override
public void run2() {
try {
if (!pr.isLocallyDestroyed && !pr.isClosed && !pr.isDestroyed()) {
dm.putOutgoing(delayedInstance);
}
} catch (CancelException cce) {
// cache is closed - can't send the message
} finally {
tm.cancel();
}
}
};
tm.schedule(st, sanityCheckInterval);
}
}
}
use of org.apache.geode.CancelException in project geode by apache.
the class DumpB2NRegion method process.
@Override
public void process(final DistributionManager dm) {
PartitionedRegion pr = null;
// Get the region, or die trying...
final long finish = System.currentTimeMillis() + 10 * 1000;
try {
for (; ; ) {
dm.getCancelCriterion().checkCancelInProgress(null);
// pr = null; (redundant assignment)
pr = PartitionedRegion.getPRFromId(this.regionId);
if (pr != null) {
break;
}
if (System.currentTimeMillis() > finish) {
ReplyException rex = new ReplyException(new TimeoutException("Waited too long for region to initialize"));
sendReply(getSender(), this.processorId, dm, rex, null, 0);
return;
}
// wait a little
boolean interrupted = Thread.interrupted();
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
interrupted = true;
pr.checkReadiness();
} finally {
if (interrupted)
Thread.currentThread().interrupt();
}
}
// Now, wait for the PR to finish initializing
pr.waitForData();
// OK, now it's safe to process this.
super.process(dm);
} catch (CancelException e) {
sendReply(this.sender, this.processorId, dm, new ReplyException(e), pr, 0);
} catch (PRLocallyDestroyedException e) {
sendReply(this.sender, this.processorId, dm, new ReplyException(e), pr, 0);
return;
} catch (RegionDestroyedException rde) {
sendReply(this.sender, this.processorId, dm, new ReplyException(rde), pr, 0);
return;
}
}
Aggregations