use of org.infinispan.distribution.ch.ConsistentHashFactory in project infinispan by infinispan.
the class TestingUtil method waitForNoRebalance.
/**
* Waits until pendingCH() is null on all caches, currentCH.getMembers() contains all caches provided as the param
* and all segments have numOwners owners.
*/
public static void waitForNoRebalance(Cache... caches) {
// Needs to be rather large to prevent sporadic failures on CI
final int REBALANCE_TIMEOUT_SECONDS = 60;
final long giveup = System.nanoTime() + TimeUnit.SECONDS.toNanos(REBALANCE_TIMEOUT_SECONDS);
int zeroCapacityCaches = 0;
for (Cache<?, ?> c : caches) {
if (c.getCacheConfiguration().clustering().hash().capacityFactor() == 0f || c.getCacheManager().getCacheManagerConfiguration().isZeroCapacityNode()) {
zeroCapacityCaches++;
}
}
for (Cache<?, ?> c : caches) {
c = unwrapSecureCache(c);
int numOwners = c.getCacheConfiguration().clustering().hash().numOwners();
DistributionManager distributionManager = c.getAdvancedCache().getDistributionManager();
Address cacheAddress = c.getAdvancedCache().getRpcManager().getAddress();
CacheTopology cacheTopology;
while (true) {
cacheTopology = distributionManager.getCacheTopology();
boolean rebalanceInProgress;
boolean chContainsAllMembers;
boolean currentChIsBalanced;
if (cacheTopology != null) {
rebalanceInProgress = cacheTopology.getPhase() != CacheTopology.Phase.NO_REBALANCE;
ConsistentHash currentCH = cacheTopology.getCurrentCH();
ConsistentHashFactory chf = StateTransferManagerImpl.pickConsistentHashFactory(extractGlobalConfiguration(c.getCacheManager()), c.getCacheConfiguration());
chContainsAllMembers = currentCH.getMembers().size() == caches.length;
currentChIsBalanced = true;
int actualNumOwners = Math.min(numOwners, currentCH.getMembers().size() - zeroCapacityCaches);
for (int i = 0; i < currentCH.getNumSegments(); i++) {
if (currentCH.locateOwnersForSegment(i).size() < actualNumOwners) {
currentChIsBalanced = false;
break;
}
}
// We need to check that the topologyId > 1 to account for nodes restarting
if (chContainsAllMembers && !rebalanceInProgress && cacheTopology.getTopologyId() > 1) {
rebalanceInProgress = !chf.rebalance(currentCH).equals(currentCH);
}
if (chContainsAllMembers && !rebalanceInProgress && currentChIsBalanced)
break;
} else {
rebalanceInProgress = false;
chContainsAllMembers = false;
currentChIsBalanced = true;
}
if (System.nanoTime() - giveup > 0) {
String message;
if (!chContainsAllMembers) {
Address[] addresses = new Address[caches.length];
for (int i = 0; i < caches.length; i++) {
addresses[i] = caches[i].getCacheManager().getAddress();
}
message = String.format("Cache %s timed out waiting for rebalancing to complete on node %s, " + "expected member list is %s, current member list is %s!", c.getName(), cacheAddress, Arrays.toString(addresses), cacheTopology == null ? "N/A" : cacheTopology.getCurrentCH().getMembers());
} else {
message = String.format("Cache %s timed out waiting for rebalancing to complete on node %s, " + "current topology is %s. rebalanceInProgress=%s, currentChIsBalanced=%s", c.getName(), c.getCacheManager().getAddress(), cacheTopology, rebalanceInProgress, currentChIsBalanced);
}
log.error(message);
throw new RuntimeException(message);
}
LockSupport.parkNanos(TimeUnit.MILLISECONDS.toNanos(100));
}
log.trace("Node " + cacheAddress + " finished state transfer, has topology " + cacheTopology);
}
}
use of org.infinispan.distribution.ch.ConsistentHashFactory in project infinispan by infinispan.
the class LocalCacheStatus method doHandleTopologyUpdate.
/**
* Update the cache topology in the LocalCacheStatus and pass it to the CacheTopologyHandler.
*
* @return {@code true} if the topology was applied, {@code false} if it was ignored.
*/
private CompletionStage<Boolean> doHandleTopologyUpdate(String cacheName, CacheTopology cacheTopology, AvailabilityMode availabilityMode, int viewId, Address sender, LocalCacheStatus cacheStatus) {
CacheTopology existingTopology;
synchronized (cacheStatus) {
if (cacheTopology == null) {
// Still, return true because we don't want to re-send the join request.
return CompletableFutures.completedTrue();
}
// Register all persistent UUIDs locally
registerPersistentUUID(cacheTopology);
existingTopology = cacheStatus.getCurrentTopology();
if (existingTopology != null && cacheTopology.getTopologyId() <= existingTopology.getTopologyId()) {
log.debugf("Ignoring late consistent hash update for cache %s, current topology is %s: %s", cacheName, existingTopology.getTopologyId(), cacheTopology);
return CompletableFutures.completedFalse();
}
if (!updateCacheTopology(cacheName, cacheTopology, viewId, sender, cacheStatus))
return CompletableFutures.completedFalse();
}
CacheTopologyHandler handler = cacheStatus.getHandler();
ConsistentHash currentCH = cacheTopology.getCurrentCH();
ConsistentHash pendingCH = cacheTopology.getPendingCH();
ConsistentHash unionCH;
if (pendingCH != null) {
ConsistentHashFactory chf = cacheStatus.getJoinInfo().getConsistentHashFactory();
switch(cacheTopology.getPhase()) {
case READ_NEW_WRITE_ALL:
// When removing members from topology, we have to make sure that the unionCH has
// owners from pendingCH (which is used as the readCH in this phase) before
// owners from currentCH, as primary owners must match in readCH and writeCH.
unionCH = chf.union(pendingCH, currentCH);
break;
default:
unionCH = chf.union(currentCH, pendingCH);
}
} else {
unionCH = null;
}
List<PersistentUUID> persistentUUIDs = persistentUUIDManager.mapAddresses(cacheTopology.getActualMembers());
CacheTopology unionTopology = new CacheTopology(cacheTopology.getTopologyId(), cacheTopology.getRebalanceId(), currentCH, pendingCH, unionCH, cacheTopology.getPhase(), cacheTopology.getActualMembers(), persistentUUIDs);
boolean updateAvailabilityModeFirst = availabilityMode != AvailabilityMode.AVAILABLE;
CompletionStage<Void> stage = resetLocalTopologyBeforeRebalance(cacheName, cacheTopology, existingTopology, handler);
stage = stage.thenCompose(ignored -> {
unionTopology.logRoutingTableInformation(cacheName);
if (updateAvailabilityModeFirst && availabilityMode != null) {
return cacheStatus.getPartitionHandlingManager().setAvailabilityMode(availabilityMode);
}
return CompletableFutures.completedNull();
});
stage = stage.thenCompose(ignored -> {
boolean startConflictResolution = cacheTopology.getPhase() == CacheTopology.Phase.CONFLICT_RESOLUTION;
if (!startConflictResolution && unionCH != null && (existingTopology == null || existingTopology.getRebalanceId() != cacheTopology.getRebalanceId())) {
// This CH_UPDATE command was sent after a REBALANCE_START command, but arrived first.
// We will start the rebalance now and ignore the REBALANCE_START command when it arrives.
log.tracef("This topology update has a pending CH, starting the rebalance now");
return handler.rebalance(unionTopology);
} else {
return handler.updateConsistentHash(unionTopology);
}
});
if (!updateAvailabilityModeFirst) {
stage = stage.thenCompose(ignored -> cacheStatus.getPartitionHandlingManager().setAvailabilityMode(availabilityMode));
}
return stage.thenApply(ignored -> true);
}
use of org.infinispan.distribution.ch.ConsistentHashFactory in project infinispan by infinispan.
the class ClusterCacheStatus method startQueuedRebalance.
public synchronized void startQueuedRebalance() {
// We cannot start rebalance until queued CR is complete
if (conflictResolution != null) {
log.tracef("Postponing rebalance for cache %s as conflict resolution is in progress", cacheName);
return;
}
if (queuedRebalanceMembers == null) {
// We don't have a queued rebalance. We may need to broadcast a stable topology update
if (stableTopology == null || stableTopology.getTopologyId() < currentTopology.getTopologyId()) {
stableTopology = currentTopology;
log.updatingStableTopology(cacheName, stableTopology);
clusterTopologyManager.broadcastStableTopologyUpdate(cacheName, stableTopology);
}
return;
}
CacheTopology cacheTopology = getCurrentTopology();
if (!isRebalanceEnabled()) {
log.tracef("Postponing rebalance for cache %s, rebalancing is disabled", cacheName);
return;
}
if (rebalanceConfirmationCollector != null) {
log.tracef("Postponing rebalance for cache %s, there's already a topology change in progress: %s", cacheName, rebalanceConfirmationCollector);
return;
}
if (queuedRebalanceMembers.isEmpty()) {
log.tracef("Ignoring request to rebalance cache %s, it doesn't have any member", cacheName);
return;
}
if (cacheTopology == null) {
createInitialCacheTopology();
return;
}
List<Address> newMembers = updateMembersPreservingOrder(cacheTopology.getMembers(), queuedRebalanceMembers);
queuedRebalanceMembers = null;
log.tracef("Rebalancing consistent hash for cache %s, members are %s", cacheName, newMembers);
int newTopologyId = cacheTopology.getTopologyId() + 1;
int newRebalanceId = cacheTopology.getRebalanceId() + 1;
ConsistentHash currentCH = cacheTopology.getCurrentCH();
if (currentCH == null) {
// There was one node in the cache before, and it left after the rebalance was triggered
// but before the rebalance actually started.
log.tracef("Ignoring request to rebalance cache %s, it doesn't have a consistent hash", cacheName);
return;
}
if (!expectedMembers.containsAll(newMembers)) {
newMembers.removeAll(expectedMembers);
log.tracef("Ignoring request to rebalance cache %s, we have new leavers: %s", cacheName, newMembers);
return;
}
ConsistentHashFactory chFactory = getJoinInfo().getConsistentHashFactory();
// This update will only add the joiners to the CH, we have already checked that we don't have leavers
ConsistentHash updatedMembersCH = chFactory.updateMembers(currentCH, newMembers, getCapacityFactors());
ConsistentHash balancedCH = chFactory.rebalance(updatedMembersCH);
boolean removeMembers = !expectedMembers.containsAll(currentCH.getMembers());
if (removeMembers) {
// Leavers should have been removed before starting a rebalance, but that might have failed
// e.g. if all the remaining members had capacity factor 0
Collection<Address> unwantedMembers = new LinkedList<>(currentCH.getMembers());
unwantedMembers.removeAll(expectedMembers);
CLUSTER.debugf("Removing unwanted members from the current consistent hash: %s", unwantedMembers);
currentCH = updatedMembersCH;
}
boolean updateTopology = false;
boolean rebalance = false;
boolean updateStableTopology = false;
if (rebalanceType == RebalanceType.NONE) {
updateTopology = true;
} else if (balancedCH.equals(currentCH)) {
if (log.isTraceEnabled())
log.tracef("The balanced CH is the same as the current CH, not rebalancing");
updateTopology = cacheTopology.getPendingCH() != null || removeMembers;
// After a cluster view change that leaves only 1 node, we don't need either a topology update or a rebalance
// but we must still update the stable topology
updateStableTopology = cacheTopology.getPendingCH() == null && (stableTopology == null || cacheTopology.getTopologyId() != stableTopology.getTopologyId());
} else {
rebalance = true;
}
if (updateTopology) {
CacheTopology newTopology = new CacheTopology(newTopologyId, cacheTopology.getRebalanceId(), balancedCH, null, CacheTopology.Phase.NO_REBALANCE, balancedCH.getMembers(), persistentUUIDManager.mapAddresses(balancedCH.getMembers()));
log.tracef("Updating cache %s topology without rebalance: %s", cacheName, newTopology);
setCurrentTopology(newTopology);
CLUSTER.updatingTopology(cacheName, newTopology, availabilityMode);
eventLogger.info(EventLogCategory.CLUSTER, MESSAGES.cacheMembersUpdated(newTopology.getMembers(), newTopology.getTopologyId()));
clusterTopologyManager.broadcastTopologyUpdate(cacheName, newTopology, getAvailabilityMode());
} else if (rebalance) {
CacheTopology.Phase newPhase;
switch(rebalanceType) {
case FOUR_PHASE:
newPhase = CacheTopology.Phase.READ_OLD_WRITE_ALL;
break;
case TWO_PHASE:
newPhase = CacheTopology.Phase.TRANSITORY;
break;
default:
throw new IllegalStateException();
}
CacheTopology newTopology = new CacheTopology(newTopologyId, newRebalanceId, currentCH, balancedCH, newPhase, balancedCH.getMembers(), persistentUUIDManager.mapAddresses(balancedCH.getMembers()));
log.tracef("Updating cache %s topology for rebalance: %s", cacheName, newTopology);
setCurrentTopology(newTopology);
rebalanceInProgress = true;
assert rebalanceConfirmationCollector == null;
rebalanceConfirmationCollector = new RebalanceConfirmationCollector(cacheName, newTopology.getTopologyId(), newTopology.getMembers(), this::endRebalance);
CLUSTER.startingRebalancePhase(cacheName, newTopology);
eventLogger.info(EventLogCategory.CLUSTER, MESSAGES.cacheRebalanceStart(newTopology.getMembers(), newTopology.getPhase(), newTopology.getTopologyId()));
clusterTopologyManager.broadcastRebalanceStart(cacheName, newTopology);
} else if (updateStableTopology) {
stableTopology = currentTopology;
clusterTopologyManager.broadcastStableTopologyUpdate(cacheName, stableTopology);
}
}
use of org.infinispan.distribution.ch.ConsistentHashFactory in project infinispan by infinispan.
the class ClusterCacheStatus method restartConflictResolution.
@Override
public synchronized boolean restartConflictResolution(List<Address> members) {
// If conflictResolution is null then no CR in progress
if (!resolveConflictsOnMerge() || conflictResolution == null)
return false;
// No need to reattempt CR if only one node remains, so cancel CR, cleanup and queue rebalance
if (members.size() == 1) {
log.debugf("Cache %s cancelling conflict resolution as only one cluster member: members=%s", cacheName, members);
conflictResolution.cancelCurrentAttempt();
conflictResolution = null;
return false;
}
// CR members are the same as newMembers, so no need to restart
if (!conflictResolution.restartRequired(members)) {
if (log.isTraceEnabled())
log.tracef("Cache %s not restarting conflict resolution, existing conflict topology contains all members (%s)", cacheName, members);
return false;
}
CacheTopology conflictTopology = conflictResolution.topology;
ConsistentHashFactory chf = getJoinInfo().getConsistentHashFactory();
ConsistentHash newHash = chf.updateMembers(conflictTopology.getCurrentCH(), members, capacityFactors);
conflictTopology = new CacheTopology(currentTopology.getTopologyId() + 1, currentTopology.getRebalanceId(), newHash, null, CacheTopology.Phase.CONFLICT_RESOLUTION, members, persistentUUIDManager.mapAddresses(members));
currentTopology = conflictTopology;
log.debugf("Cache %s restarting conflict resolution with topology %s", cacheName, currentTopology);
clusterTopologyManager.broadcastTopologyUpdate(cacheName, conflictTopology, availabilityMode);
queueConflictResolution(conflictTopology, conflictResolution.preferredNodes);
return true;
}
use of org.infinispan.distribution.ch.ConsistentHashFactory in project infinispan by infinispan.
the class StateTransferGetGroupKeysTest method createConfigurationBuilder.
private ConfigurationBuilder createConfigurationBuilder() {
ConfigurationBuilder builder = getDefaultClusteredCacheConfig(cacheMode, false);
builder.clustering().stateTransfer().fetchInMemoryState(true);
builder.clustering().hash().groups().enabled(true);
builder.clustering().hash().numSegments(1);
if (cacheMode.isScattered()) {
builder.clustering().hash().consistentHashFactory(new CustomConsistentHashFactory(new BaseControlledConsistentHashFactory.ScatteredTrait(), cacheMode));
} else {
ConsistentHashFactory chf = new CustomConsistentHashFactory(new BaseControlledConsistentHashFactory.DefaultTrait(), cacheMode);
builder.clustering().hash().consistentHashFactory(chf);
}
return builder;
}
Aggregations