use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.
the class GridDhtPartitionTopologyImpl method removeNode.
/**
* Removes node from local {@link #node2part} map and recalculates {@link #diffFromAffinity}.
*
* @param nodeId Node to remove.
*/
private void removeNode(UUID nodeId) {
assert nodeId != null;
assert lock.isWriteLockedByCurrentThread();
ClusterNode oldest = discoCache.oldestAliveServerNode();
assert oldest != null || ctx.kernalContext().clientNode();
ClusterNode loc = ctx.localNode();
if (node2part != null) {
if (loc.equals(oldest) && !node2part.nodeId().equals(loc.id()))
node2part = new GridDhtPartitionFullMap(loc.id(), loc.order(), updateSeq.get(), node2part, false);
else
node2part = new GridDhtPartitionFullMap(node2part, node2part.updateSequence());
GridDhtPartitionMap parts = node2part.remove(nodeId);
if (!grp.isReplicated()) {
if (parts != null) {
for (Integer p : parts.keySet()) {
Set<UUID> diffIds = diffFromAffinity.get(p);
if (diffIds != null)
diffIds.remove(nodeId);
}
}
}
consistencyCheck();
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.
the class GridDhtPartitionTopologyImpl method rebuildDiff.
/**
* Rebuilds {@link #diffFromAffinity} from given assignment.
*
* @param affAssignment New affinity assignment.
*/
private void rebuildDiff(AffinityAssignment affAssignment) {
assert lock.isWriteLockedByCurrentThread();
if (node2part == null)
return;
if (FAST_DIFF_REBUILD) {
Collection<UUID> affNodes = F.nodeIds(ctx.discovery().cacheGroupAffinityNodes(grp.groupId(), affAssignment.topologyVersion()));
for (Map.Entry<Integer, Set<UUID>> e : diffFromAffinity.entrySet()) {
int p = e.getKey();
Iterator<UUID> iter = e.getValue().iterator();
while (iter.hasNext()) {
UUID nodeId = iter.next();
if (!affNodes.contains(nodeId) || affAssignment.getIds(p).contains(nodeId))
iter.remove();
}
}
} else {
for (Map.Entry<UUID, GridDhtPartitionMap> e : node2part.entrySet()) {
UUID nodeId = e.getKey();
for (Map.Entry<Integer, GridDhtPartitionState> e0 : e.getValue().entrySet()) {
Integer p0 = e0.getKey();
GridDhtPartitionState state = e0.getValue();
Set<UUID> ids = diffFromAffinity.get(p0);
if ((state == MOVING || state == OWNING || state == RENTING) && !affAssignment.getIds(p0).contains(nodeId)) {
if (ids == null)
diffFromAffinity.put(p0, ids = U.newHashSet(3));
ids.add(nodeId);
} else {
if (ids != null)
ids.remove(nodeId);
}
}
}
}
diffFromAffinityVer = affAssignment.topologyVersion();
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.
the class GridDhtPartitionTopologyImpl method createMovingPartitions.
/**
* @param aff Affinity.
*/
private void createMovingPartitions(AffinityAssignment aff) {
for (Map.Entry<UUID, GridDhtPartitionMap> e : node2part.entrySet()) {
GridDhtPartitionMap map = e.getValue();
addMoving(map, aff.backupPartitions(e.getKey()));
addMoving(map, aff.primaryPartitions(e.getKey()));
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.
the class GridDhtPartitionTopologyImpl method update.
/**
* {@inheritDoc}
*/
@Override
public boolean update(@Nullable AffinityTopologyVersion exchangeVer, GridDhtPartitionFullMap partMap, @Nullable CachePartitionFullCountersMap incomeCntrMap, Set<Integer> partsToReload, @Nullable Map<Integer, Long> partSizes, @Nullable AffinityTopologyVersion msgTopVer, @Nullable GridDhtPartitionsExchangeFuture exchFut, @Nullable Set<Integer> lostParts) {
if (log.isDebugEnabled()) {
log.debug("Updating full partition map " + "[grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", fullMap=" + fullMapString() + ']');
}
assert partMap != null;
ctx.database().checkpointReadLock();
try {
lock.writeLock().lock();
try {
if (log.isTraceEnabled() && exchangeVer != null) {
log.trace("Partition states before full update [grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", states=" + dumpPartitionStates() + ']');
}
if (stopping || !lastTopChangeVer.initialized() || // Ignore message not-related to exchange if exchange is in progress.
(exchangeVer == null && !lastTopChangeVer.equals(readyTopVer)))
return false;
if (incomeCntrMap != null) {
// update local counters in partitions
for (int i = 0; i < locParts.length(); i++) {
cntrMap.updateCounter(i, incomeCntrMap.updateCounter(i));
GridDhtLocalPartition part = locParts.get(i);
if (part == null)
continue;
if (part.state() == OWNING || part.state() == MOVING) {
long updCntr = incomeCntrMap.updateCounter(part.id());
long curCntr = part.updateCounter();
// Avoid zero counter update to empty partition to prevent lazy init.
if (updCntr != 0 || curCntr != 0) {
part.updateCounter(updCntr);
if (updCntr > curCntr) {
if (log.isDebugEnabled())
log.debug("Partition update counter has updated [grp=" + grp.cacheOrGroupName() + ", p=" + part.id() + ", state=" + part.state() + ", prevCntr=" + curCntr + ", nextCntr=" + updCntr + "]");
}
}
}
}
}
// TODO FIXME https://issues.apache.org/jira/browse/IGNITE-11800
if (exchangeVer != null) {
// Ignore if exchange already finished or new exchange started.
if (readyTopVer.after(exchangeVer) || lastTopChangeVer.after(exchangeVer)) {
U.warn(log, "Stale exchange id for full partition map update (will ignore) [" + "grp=" + grp.cacheOrGroupName() + ", lastTopChange=" + lastTopChangeVer + ", readTopVer=" + readyTopVer + ", exchVer=" + exchangeVer + ']');
return false;
}
}
boolean fullMapUpdated = node2part == null;
if (node2part != null) {
// Merge maps.
for (GridDhtPartitionMap part : node2part.values()) {
GridDhtPartitionMap newPart = partMap.get(part.nodeId());
if (shouldOverridePartitionMap(part, newPart)) {
fullMapUpdated = true;
if (log.isDebugEnabled()) {
log.debug("Overriding partition map in full update map [" + "node=" + part.nodeId() + ", grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", curPart=" + mapString(part) + ", newPart=" + mapString(newPart) + ']');
}
if (newPart.nodeId().equals(ctx.localNodeId()))
updateSeq.setIfGreater(newPart.updateSequence());
} else {
// If for some nodes current partition has a newer map, then we keep the newer value.
if (log.isDebugEnabled()) {
log.debug("Partitions map for the node keeps newer value than message [" + "node=" + part.nodeId() + ", grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", curPart=" + mapString(part) + ", newPart=" + mapString(newPart) + ']');
}
partMap.put(part.nodeId(), part);
}
}
// Check that we have new nodes.
for (GridDhtPartitionMap part : partMap.values()) {
if (fullMapUpdated)
break;
fullMapUpdated = !node2part.containsKey(part.nodeId());
}
GridDhtPartitionsExchangeFuture topFut = exchFut == null ? ctx.exchange().lastFinishedFuture() : exchFut;
// topFut can be null if lastFinishedFuture has completed with error.
if (topFut != null) {
for (Iterator<UUID> it = partMap.keySet().iterator(); it.hasNext(); ) {
UUID nodeId = it.next();
final ClusterNode node = topFut.events().discoveryCache().node(nodeId);
if (node == null) {
if (log.isTraceEnabled())
log.trace("Removing left node from full map update [grp=" + grp.cacheOrGroupName() + ", exchTopVer=" + exchangeVer + ", futVer=" + topFut.initialVersion() + ", nodeId=" + nodeId + ", partMap=" + partMap + ']');
it.remove();
}
}
}
} else {
GridDhtPartitionMap locNodeMap = partMap.get(ctx.localNodeId());
if (locNodeMap != null)
updateSeq.setIfGreater(locNodeMap.updateSequence());
}
if (!fullMapUpdated) {
if (log.isTraceEnabled()) {
log.trace("No updates for full partition map (will ignore) [" + "grp=" + grp.cacheOrGroupName() + ", lastExch=" + lastTopChangeVer + ", exchVer=" + exchangeVer + ", curMap=" + node2part + ", newMap=" + partMap + ']');
}
return false;
}
if (exchangeVer != null) {
assert exchangeVer.compareTo(readyTopVer) >= 0 && exchangeVer.compareTo(lastTopChangeVer) >= 0;
lastTopChangeVer = readyTopVer = exchangeVer;
// Apply lost partitions from full message.
if (lostParts != null) {
this.lostParts = new HashSet<>(lostParts);
for (Integer part : lostParts) {
GridDhtLocalPartition locPart = localPartition(part);
// New partition should be created instead.
if (locPart != null && locPart.state() != EVICTED) {
locPart.markLost();
GridDhtPartitionMap locMap = partMap.get(ctx.localNodeId());
locMap.put(part, LOST);
}
}
}
}
node2part = partMap;
if (log.isDebugEnabled()) {
log.debug("Partition map after processFullMessage [grp=" + grp.cacheOrGroupName() + ", exchId=" + (exchFut == null ? null : exchFut.exchangeId()) + ", fullMap=" + fullMapString() + ']');
}
if (exchangeVer == null && !grp.isReplicated() && (readyTopVer.initialized() && readyTopVer.compareTo(diffFromAffinityVer) >= 0)) {
AffinityAssignment affAssignment = grp.affinity().readyAffinity(readyTopVer);
for (Map.Entry<UUID, GridDhtPartitionMap> e : partMap.entrySet()) {
for (Map.Entry<Integer, GridDhtPartitionState> e0 : e.getValue().entrySet()) {
int p = e0.getKey();
Set<UUID> diffIds = diffFromAffinity.get(p);
if ((e0.getValue() == MOVING || e0.getValue() == OWNING || e0.getValue() == RENTING) && !affAssignment.getIds(p).contains(e.getKey())) {
if (diffIds == null)
diffFromAffinity.put(p, diffIds = U.newHashSet(3));
diffIds.add(e.getKey());
} else {
if (diffIds != null && diffIds.remove(e.getKey())) {
if (diffIds.isEmpty())
diffFromAffinity.remove(p);
}
}
}
}
diffFromAffinityVer = readyTopVer;
}
boolean changed = false;
GridDhtPartitionMap nodeMap = partMap.get(ctx.localNodeId());
// Only in real exchange occurred.
if (exchangeVer != null && nodeMap != null && grp.persistenceEnabled() && readyTopVer.initialized()) {
assert exchFut != null;
for (Map.Entry<Integer, GridDhtPartitionState> e : nodeMap.entrySet()) {
int p = e.getKey();
GridDhtPartitionState state = e.getValue();
if (state == OWNING) {
GridDhtLocalPartition locPart = locParts.get(p);
assert locPart != null : grp.cacheOrGroupName();
if (locPart.state() == MOVING) {
boolean success = locPart.own();
assert success : locPart;
changed |= success;
}
} else if (state == MOVING) {
GridDhtLocalPartition locPart = locParts.get(p);
rebalancePartition(p, partsToReload.contains(p) || locPart != null && locPart.state() == MOVING && exchFut.localJoinExchange(), exchFut);
changed = true;
}
}
}
long updateSeq = this.updateSeq.incrementAndGet();
if (readyTopVer.initialized() && readyTopVer.equals(lastTopChangeVer)) {
AffinityAssignment aff = grp.affinity().readyAffinity(readyTopVer);
// Evictions on exchange are checked in exchange worker thread before rebalancing.
if (exchangeVer == null)
changed |= checkEvictions(updateSeq, aff);
updateRebalanceVersion(aff.topologyVersion(), aff.assignment());
}
if (partSizes != null)
this.globalPartSizes = partSizes;
consistencyCheck();
if (log.isDebugEnabled()) {
log.debug("Partition map after full update [grp=" + grp.cacheOrGroupName() + ", map=" + fullMapString() + ']');
}
if (log.isTraceEnabled() && exchangeVer != null) {
log.trace("Partition states after full update [grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", states=" + dumpPartitionStates() + ']');
}
if (changed) {
if (log.isDebugEnabled())
log.debug("Partitions have been scheduled to resend [reason=" + "Full map update [grp" + grp.cacheOrGroupName() + "]");
ctx.exchange().scheduleResendPartitions();
}
return changed;
} finally {
lock.writeLock().unlock();
}
} finally {
ctx.database().checkpointReadUnlock();
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionMap in project ignite by apache.
the class GridDhtPartitionTopologyImpl method detectLostPartitions.
/**
* {@inheritDoc}
*/
@Override
public boolean detectLostPartitions(AffinityTopologyVersion resTopVer, GridDhtPartitionsExchangeFuture fut) {
ctx.database().checkpointReadLock();
try {
lock.writeLock().lock();
try {
if (node2part == null)
return false;
// Do not trigger lost partition events on activation.
DiscoveryEvent discoEvt = fut.activateCluster() ? null : fut.firstEvent();
final GridClusterStateProcessor state = grp.shared().kernalContext().state();
boolean isInMemoryCluster = CU.isInMemoryCluster(grp.shared().kernalContext().discovery().allNodes(), grp.shared().kernalContext().marshallerContext().jdkMarshaller(), U.resolveClassLoader(grp.shared().kernalContext().config()));
boolean compatibleWithIgnorePlc = isInMemoryCluster && state.isBaselineAutoAdjustEnabled() && state.baselineAutoAdjustTimeout() == 0L;
// Calculate how data loss is handled.
boolean safe = grp.config().getPartitionLossPolicy() != IGNORE || !compatibleWithIgnorePlc;
int parts = grp.affinity().partitions();
Set<Integer> recentlyLost = null;
boolean changed = false;
for (int part = 0; part < parts; part++) {
boolean lost = F.contains(lostParts, part);
if (!lost) {
boolean hasOwner = false;
// Detect if all owners are left.
for (GridDhtPartitionMap partMap : node2part.values()) {
if (partMap.get(part) == OWNING) {
hasOwner = true;
break;
}
}
if (!hasOwner) {
lost = true;
// Do not detect and record lost partition in IGNORE mode.
if (safe) {
if (lostParts == null)
lostParts = new TreeSet<>();
lostParts.add(part);
if (discoEvt != null) {
if (recentlyLost == null)
recentlyLost = new HashSet<>();
recentlyLost.add(part);
if (grp.eventRecordable(EventType.EVT_CACHE_REBALANCE_PART_DATA_LOST)) {
grp.addRebalanceEvent(part, EVT_CACHE_REBALANCE_PART_DATA_LOST, discoEvt.eventNode(), discoEvt.type(), discoEvt.timestamp());
}
}
}
}
}
if (lost) {
GridDhtLocalPartition locPart = localPartition(part, resTopVer, false, true);
if (locPart != null) {
if (locPart.state() == LOST)
continue;
final GridDhtPartitionState prevState = locPart.state();
changed = safe ? locPart.markLost() : locPart.own();
if (changed) {
long updSeq = updateSeq.incrementAndGet();
updateLocal(locPart.id(), locPart.state(), updSeq, resTopVer);
// If a partition was lost while rebalancing reset it's counter to force demander mode.
if (prevState == MOVING)
locPart.resetUpdateCounter();
}
}
// Update remote maps according to policy.
for (Map.Entry<UUID, GridDhtPartitionMap> entry : node2part.entrySet()) {
if (entry.getKey().equals(ctx.localNodeId()))
continue;
GridDhtPartitionState p0 = entry.getValue().get(part);
if (p0 != null && p0 != EVICTED)
entry.getValue().put(part, safe ? LOST : OWNING);
}
}
}
if (recentlyLost != null) {
U.warn(log, "Detected lost partitions" + (!safe ? " (will ignore)" : "") + " [grp=" + grp.cacheOrGroupName() + ", parts=" + S.compact(recentlyLost) + ", topVer=" + resTopVer + "]");
}
return changed;
} finally {
lock.writeLock().unlock();
}
} finally {
ctx.database().checkpointReadUnlock();
}
}
Aggregations