use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.
the class CacheGroupMetricsImpl method nodePartitionsCountByState.
/**
* Count of partitions with a given state on the node.
*
* @param nodeId Node id.
* @param state State.
*/
private int nodePartitionsCountByState(UUID nodeId, GridDhtPartitionState state) {
int parts = ctx.topology().partitions();
GridDhtPartitionMap partMap = ctx.topology().partitionMap(false).get(nodeId);
int cnt = 0;
for (int part = 0; part < parts; part++) if (partMap.get(part) == state)
cnt++;
return cnt;
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.
the class GridDhtPartitionTopologyImpl method beforeExchange.
/**
* {@inheritDoc}
*/
@Override
public void beforeExchange(GridDhtPartitionsExchangeFuture exchFut, boolean affReady, boolean updateMoving) throws IgniteCheckedException {
ctx.database().checkpointReadLock();
try {
U.writeLock(lock);
try {
if (stopping)
return;
assert lastTopChangeVer.equals(exchFut.initialVersion()) : "Invalid topology version [topVer=" + lastTopChangeVer + ", exchId=" + exchFut.exchangeId() + ']';
ExchangeDiscoveryEvents evts = exchFut.context().events();
if (affReady) {
assert grp.affinity().lastVersion().equals(evts.topologyVersion()) : "Invalid affinity version [" + "grp=" + grp.cacheOrGroupName() + ", affVer=" + grp.affinity().lastVersion() + ", evtsVer=" + evts.topologyVersion() + ']';
lastTopChangeVer = readyTopVer = evts.topologyVersion();
discoCache = evts.discoveryCache();
}
if (log.isDebugEnabled()) {
log.debug("Partition map beforeExchange [grp=" + grp.cacheOrGroupName() + ", exchId=" + exchFut.exchangeId() + ", fullMap=" + fullMapString() + ']');
}
long updateSeq = this.updateSeq.incrementAndGet();
if (exchFut.exchangeType() == ALL && !exchFut.rebalanced())
cntrMap.clear();
initializeFullMap(updateSeq);
boolean grpStarted = exchFut.cacheGroupAddedOnExchange(grp.groupId(), grp.receivedFrom());
if (evts.hasServerLeft()) {
for (DiscoveryEvent evt : evts.events()) {
if (ExchangeDiscoveryEvents.serverLeftEvent(evt))
removeNode(evt.eventNode().id());
}
} else if (affReady && grpStarted && exchFut.exchangeType() == NONE) {
assert !exchFut.context().mergeExchanges() : exchFut;
assert node2part != null && node2part.valid() : exchFut;
// Initialize node maps if group was started from joining client.
final List<ClusterNode> nodes = exchFut.firstEventCache().cacheGroupAffinityNodes(grp.groupId());
for (ClusterNode node : nodes) {
if (!node2part.containsKey(node.id()) && ctx.discovery().alive(node)) {
final GridDhtPartitionMap partMap = new GridDhtPartitionMap(node.id(), 1L, exchFut.initialVersion(), new GridPartitionStateMap(), false);
final AffinityAssignment aff = grp.affinity().cachedAffinity(exchFut.initialVersion());
for (Integer p0 : aff.primaryPartitions(node.id())) partMap.put(p0, OWNING);
for (Integer p0 : aff.backupPartitions(node.id())) partMap.put(p0, OWNING);
node2part.put(node.id(), partMap);
}
}
}
if (grp.affinityNode()) {
if (grpStarted || exchFut.firstEvent().type() == EVT_DISCOVERY_CUSTOM_EVT || exchFut.serverNodeDiscoveryEvent()) {
AffinityTopologyVersion affVer;
List<List<ClusterNode>> affAssignment;
if (affReady) {
affVer = evts.topologyVersion();
assert grp.affinity().lastVersion().equals(affVer) : "Invalid affinity [topVer=" + grp.affinity().lastVersion() + ", grp=" + grp.cacheOrGroupName() + ", affVer=" + affVer + ", fut=" + exchFut + ']';
affAssignment = grp.affinity().readyAssignments(affVer);
} else {
assert !exchFut.context().mergeExchanges();
affVer = exchFut.initialVersion();
affAssignment = grp.affinity().idealAssignmentRaw();
}
initPartitions(affVer, affAssignment, exchFut, updateSeq);
}
}
consistencyCheck();
if (updateMoving) {
assert grp.affinity().lastVersion().equals(evts.topologyVersion());
createMovingPartitions(grp.affinity().readyAffinity(evts.topologyVersion()));
}
if (log.isDebugEnabled()) {
log.debug("Partition map after beforeExchange [grp=" + grp.cacheOrGroupName() + ", " + "exchId=" + exchFut.exchangeId() + ", fullMap=" + fullMapString() + ']');
}
if (log.isTraceEnabled()) {
log.trace("Partition states after beforeExchange [grp=" + grp.cacheOrGroupName() + ", exchId=" + exchFut.exchangeId() + ", states=" + dumpPartitionStates() + ']');
}
} finally {
lock.writeLock().unlock();
}
} finally {
ctx.database().checkpointReadUnlock();
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.
the class GridDhtPartitionTopologyImpl method allOwners.
/**
* {@inheritDoc}
*/
@Override
public List<List<ClusterNode>> allOwners() {
lock.readLock().lock();
try {
int parts = partitions();
List<List<ClusterNode>> res = new ArrayList<>(parts);
for (int i = 0; i < parts; i++) res.add(new ArrayList<>());
List<ClusterNode> allNodes = discoCache.cacheGroupAffinityNodes(grp.groupId());
for (int i = 0; i < allNodes.size(); i++) {
ClusterNode node = allNodes.get(i);
GridDhtPartitionMap nodeParts = node2part.get(node.id());
if (nodeParts != null) {
for (Map.Entry<Integer, GridDhtPartitionState> e : nodeParts.map().entrySet()) {
if (e.getValue() == OWNING) {
int part = e.getKey();
List<ClusterNode> owners = res.get(part);
owners.add(node);
}
}
}
}
return res;
} finally {
lock.readLock().unlock();
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.
the class GridDhtPartitionTopologyImpl method resetOwners.
/**
* {@inheritDoc}
*/
@Override
public Map<UUID, Set<Integer>> resetOwners(Map<Integer, Set<UUID>> ownersByUpdCounters, Set<Integer> haveHist, GridDhtPartitionsExchangeFuture exchFut) {
Map<UUID, Set<Integer>> res = new HashMap<>();
Collection<DiscoveryEvent> evts = exchFut.events().events();
Set<UUID> joinedNodes = U.newHashSet(evts.size());
for (DiscoveryEvent evt : evts) {
if (evt.type() == EVT_NODE_JOINED)
joinedNodes.add(evt.eventNode().id());
}
ctx.database().checkpointReadLock();
try {
Map<UUID, Set<Integer>> addToWaitGroups = new HashMap<>();
lock.writeLock().lock();
try {
// First process local partitions.
UUID locNodeId = ctx.localNodeId();
for (Map.Entry<Integer, Set<UUID>> entry : ownersByUpdCounters.entrySet()) {
int part = entry.getKey();
Set<UUID> maxCounterPartOwners = entry.getValue();
GridDhtLocalPartition locPart = localPartition(part);
if (locPart == null || locPart.state() != OWNING)
continue;
// Partition state should be mutated only on joining nodes if they are exists for the exchange.
if (joinedNodes.isEmpty() && !maxCounterPartOwners.contains(locNodeId)) {
rebalancePartition(part, !haveHist.contains(part), exchFut);
res.computeIfAbsent(locNodeId, n -> new HashSet<>()).add(part);
}
}
// Then process node maps.
for (Map.Entry<Integer, Set<UUID>> entry : ownersByUpdCounters.entrySet()) {
int part = entry.getKey();
Set<UUID> maxCounterPartOwners = entry.getValue();
for (Map.Entry<UUID, GridDhtPartitionMap> remotes : node2part.entrySet()) {
UUID remoteNodeId = remotes.getKey();
if (!joinedNodes.isEmpty() && !joinedNodes.contains(remoteNodeId))
continue;
GridDhtPartitionMap partMap = remotes.getValue();
GridDhtPartitionState state = partMap.get(part);
if (state != OWNING)
continue;
if (!maxCounterPartOwners.contains(remoteNodeId)) {
partMap.put(part, MOVING);
partMap.updateSequence(partMap.updateSequence() + 1, partMap.topologyVersion());
if (partMap.nodeId().equals(locNodeId))
updateSeq.setIfGreater(partMap.updateSequence());
res.computeIfAbsent(remoteNodeId, n -> new HashSet<>()).add(part);
}
}
}
for (Map.Entry<UUID, Set<Integer>> entry : res.entrySet()) {
UUID nodeId = entry.getKey();
Set<Integer> rebalancedParts = entry.getValue();
addToWaitGroups.put(nodeId, new HashSet<>(rebalancedParts));
if (!rebalancedParts.isEmpty()) {
Set<Integer> historical = rebalancedParts.stream().filter(haveHist::contains).collect(Collectors.toSet());
// Filter out partitions having WAL history.
rebalancedParts.removeAll(historical);
U.warn(log, "Partitions have been scheduled for rebalancing due to outdated update counter " + "[grp=" + grp.cacheOrGroupName() + ", readyTopVer=" + readyTopVer + ", topVer=" + exchFut.initialVersion() + ", nodeId=" + nodeId + ", partsFull=" + S.compact(rebalancedParts) + ", partsHistorical=" + S.compact(historical) + "]");
}
}
node2part = new GridDhtPartitionFullMap(node2part, updateSeq.incrementAndGet());
} finally {
lock.writeLock().unlock();
}
List<List<ClusterNode>> ideal = ctx.affinity().affinity(groupId()).idealAssignmentRaw();
for (Map.Entry<UUID, Set<Integer>> entry : addToWaitGroups.entrySet()) {
// Add to wait groups to ensure late assignment switch after all partitions are rebalanced.
for (Integer part : entry.getValue()) {
ctx.cache().context().affinity().addToWaitGroup(groupId(), part, topologyVersionFuture().initialVersion(), ideal.get(part));
}
}
} finally {
ctx.database().checkpointReadUnlock();
}
return res;
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.
the class GridClientPartitionTopology method allOwners.
/**
* {@inheritDoc}
*/
@Override
public List<List<ClusterNode>> allOwners() {
lock.readLock().lock();
try {
int parts = partitions();
List<List<ClusterNode>> res = new ArrayList<>(parts);
for (int i = 0; i < parts; i++) res.add(new ArrayList<>());
List<ClusterNode> allNodes = discoCache.cacheGroupAffinityNodes(grpId);
for (int i = 0; i < allNodes.size(); i++) {
ClusterNode node = allNodes.get(i);
GridDhtPartitionMap nodeParts = node2part.get(node.id());
if (nodeParts != null) {
for (Map.Entry<Integer, GridDhtPartitionState> e : nodeParts.map().entrySet()) {
if (e.getValue() == OWNING) {
int part = e.getKey();
List<ClusterNode> owners = res.get(part);
owners.add(node);
}
}
}
}
return res;
} finally {
lock.readLock().unlock();
}
}
Aggregations