use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtAtomicCache method updateSingle.
/**
* Updates locked entries one-by-one.
*
* @param nearNode Originating node.
* @param hasNear {@code True} if originating node has near cache.
* @param req Update request.
* @param res Update response.
* @param locked Locked entries.
* @param ver Assigned update version.
* @param dhtFut Optional DHT future.
* @param replicate Whether DR is enabled for that cache.
* @param taskName Task name.
* @param expiry Expiry policy.
* @param sndPrevVal If {@code true} sends previous value to backups.
* @return Return value.
* @throws GridCacheEntryRemovedException Should be never thrown.
*/
private DhtAtomicUpdateResult updateSingle(ClusterNode nearNode, boolean hasNear, GridNearAtomicAbstractUpdateRequest req, GridNearAtomicUpdateResponse res, List<GridDhtCacheEntry> locked, GridCacheVersion ver, @Nullable GridDhtAtomicAbstractUpdateFuture dhtFut, boolean replicate, String taskName, @Nullable IgniteCacheExpiryPolicy expiry, boolean sndPrevVal) throws GridCacheEntryRemovedException {
GridCacheReturn retVal = null;
Collection<IgniteBiTuple<GridDhtCacheEntry, GridCacheVersion>> deleted = null;
AffinityTopologyVersion topVer = req.topologyVersion();
boolean intercept = ctx.config().getInterceptor() != null;
AffinityAssignment affAssignment = ctx.affinity().assignment(topVer);
// Avoid iterator creation.
for (int i = 0; i < req.size(); i++) {
KeyCacheObject k = req.key(i);
GridCacheOperation op = req.operation();
// No GridCacheEntryRemovedException can be thrown.
try {
GridDhtCacheEntry entry = locked.get(i);
GridCacheVersion newConflictVer = req.conflictVersion(i);
long newConflictTtl = req.conflictTtl(i);
long newConflictExpireTime = req.conflictExpireTime(i);
assert !(newConflictVer instanceof GridCacheVersionEx) : newConflictVer;
Object writeVal = op == TRANSFORM ? req.entryProcessor(i) : req.writeValue(i);
// Get readers before innerUpdate (reader cleared after remove).
GridDhtCacheEntry.ReaderId[] readers = entry.readersLocked();
GridCacheUpdateAtomicResult updRes = entry.innerUpdate(ver, nearNode.id(), locNodeId, op, writeVal, req.invokeArguments(), writeThrough() && !req.skipStore(), !req.skipStore(), sndPrevVal || req.returnValue(), req.keepBinary(), expiry, /*event*/
true, /*metrics*/
true, /*primary*/
true, /*verCheck*/
false, topVer, req.filter(), replicate ? DR_PRIMARY : DR_NONE, newConflictTtl, newConflictExpireTime, newConflictVer, /*conflictResolve*/
true, intercept, req.subjectId(), taskName, /*prevVal*/
null, /*updateCntr*/
null, dhtFut);
if (dhtFut != null) {
if (updRes.sendToDht()) {
// Send to backups even in case of remove-remove scenarios.
GridCacheVersionConflictContext<?, ?> conflictCtx = updRes.conflictResolveResult();
if (conflictCtx == null)
newConflictVer = null;
else if (conflictCtx.isMerge())
// Conflict version is discarded in case of merge.
newConflictVer = null;
EntryProcessor<Object, Object, Object> entryProcessor = null;
dhtFut.addWriteEntry(affAssignment, entry, updRes.newValue(), entryProcessor, updRes.newTtl(), updRes.conflictExpireTime(), newConflictVer, sndPrevVal, updRes.oldValue(), updRes.updateCounter());
if (readers != null)
dhtFut.addNearWriteEntries(nearNode, readers, entry, updRes.newValue(), entryProcessor, updRes.newTtl(), updRes.conflictExpireTime());
} else {
if (log.isDebugEnabled())
log.debug("Entry did not pass the filter or conflict resolution (will skip write) " + "[entry=" + entry + ", filter=" + Arrays.toString(req.filter()) + ']');
}
}
if (hasNear) {
if (updRes.sendToDht()) {
if (!ctx.affinity().partitionBelongs(nearNode, entry.partition(), topVer)) {
// If put the same value as in request then do not need to send it back.
if (op == TRANSFORM || writeVal != updRes.newValue()) {
res.addNearValue(i, updRes.newValue(), updRes.newTtl(), updRes.conflictExpireTime());
} else
res.addNearTtl(i, updRes.newTtl(), updRes.conflictExpireTime());
if (updRes.newValue() != null) {
IgniteInternalFuture<Boolean> f = entry.addReader(nearNode.id(), req.messageId(), topVer);
assert f == null : f;
}
} else if (GridDhtCacheEntry.ReaderId.contains(readers, nearNode.id())) {
// Reader became primary or backup.
entry.removeReader(nearNode.id(), req.messageId());
} else
res.addSkippedIndex(i);
} else
res.addSkippedIndex(i);
}
if (updRes.removeVersion() != null) {
if (deleted == null)
deleted = new ArrayList<>(req.size());
deleted.add(F.t(entry, updRes.removeVersion()));
}
if (op == TRANSFORM) {
assert !req.returnValue();
IgniteBiTuple<Object, Exception> compRes = updRes.computedResult();
if (compRes != null && (compRes.get1() != null || compRes.get2() != null)) {
if (retVal == null)
retVal = new GridCacheReturn(nearNode.isLocal());
retVal.addEntryProcessResult(ctx, k, null, compRes.get1(), compRes.get2(), req.keepBinary());
}
} else {
// Create only once.
if (retVal == null) {
CacheObject ret = updRes.oldValue();
retVal = new GridCacheReturn(ctx, nearNode.isLocal(), req.keepBinary(), req.returnValue() ? ret : null, updRes.success());
}
}
} catch (IgniteCheckedException e) {
res.addFailedKey(k, e);
}
}
return new DhtAtomicUpdateResult(retVal, deleted, dhtFut);
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtPartitionTopologyImpl method update.
/**
* {@inheritDoc}
*/
@SuppressWarnings({ "MismatchedQueryAndUpdateOfCollection" })
@Override
public boolean update(@Nullable GridDhtPartitionExchangeId exchId, GridDhtPartitionMap parts, boolean force) {
if (log.isDebugEnabled()) {
log.debug("Updating single partition map [grp=" + grp.cacheOrGroupName() + ", exchId=" + exchId + ", parts=" + mapString(parts) + ']');
}
if (!ctx.discovery().alive(parts.nodeId())) {
if (log.isDebugEnabled()) {
log.debug("Received partition update for non-existing node (will ignore) [grp=" + grp.cacheOrGroupName() + ", exchId=" + exchId + ", parts=" + parts + ']');
}
return false;
}
ctx.database().checkpointReadLock();
try {
lock.writeLock().lock();
try {
if (stopping)
return false;
if (!force) {
if (lastTopChangeVer.initialized() && exchId != null && lastTopChangeVer.compareTo(exchId.topologyVersion()) > 0) {
U.warn(log, "Stale exchange id for single partition map update (will ignore) [" + "grp=" + grp.cacheOrGroupName() + ", lastTopChange=" + lastTopChangeVer + ", readTopVer=" + readyTopVer + ", exch=" + exchId.topologyVersion() + ']');
return false;
}
}
if (node2part == null)
// Create invalid partition map.
node2part = new GridDhtPartitionFullMap();
GridDhtPartitionMap cur = node2part.get(parts.nodeId());
if (force) {
if (cur != null && cur.topologyVersion().initialized())
parts.updateSequence(cur.updateSequence(), cur.topologyVersion());
} else if (isStaleUpdate(cur, parts)) {
U.warn(log, "Stale update for single partition map update (will ignore) [" + "grp=" + grp.cacheOrGroupName() + ", exchId=" + exchId + ", curMap=" + cur + ", newMap=" + parts + ']');
return false;
}
long updateSeq = this.updateSeq.incrementAndGet();
node2part.newUpdateSequence(updateSeq);
boolean changed = false;
if (cur == null || !cur.equals(parts))
changed = true;
node2part.put(parts.nodeId(), parts);
// During exchange diff is calculated after all messages are received and affinity initialized.
if (exchId == null && !grp.isReplicated()) {
if (readyTopVer.initialized() && readyTopVer.compareTo(diffFromAffinityVer) >= 0) {
AffinityAssignment affAssignment = grp.affinity().readyAffinity(readyTopVer);
// Add new mappings.
for (Map.Entry<Integer, GridDhtPartitionState> e : parts.entrySet()) {
int p = e.getKey();
Set<UUID> diffIds = diffFromAffinity.get(p);
if ((e.getValue() == MOVING || e.getValue() == OWNING || e.getValue() == RENTING) && !affAssignment.getIds(p).contains(parts.nodeId())) {
if (diffIds == null)
diffFromAffinity.put(p, diffIds = U.newHashSet(3));
if (diffIds.add(parts.nodeId()))
changed = true;
} else {
if (diffIds != null && diffIds.remove(parts.nodeId())) {
changed = true;
if (diffIds.isEmpty())
diffFromAffinity.remove(p);
}
}
}
// Remove obsolete mappings.
if (cur != null) {
for (Integer p : F.view(cur.keySet(), F0.notIn(parts.keySet()))) {
Set<UUID> ids = diffFromAffinity.get(p);
if (ids != null && ids.remove(parts.nodeId())) {
changed = true;
if (ids.isEmpty())
diffFromAffinity.remove(p);
}
}
}
diffFromAffinityVer = readyTopVer;
}
}
if (readyTopVer.initialized() && readyTopVer.equals(lastTopChangeVer)) {
AffinityAssignment aff = grp.affinity().readyAffinity(readyTopVer);
if (exchId == null)
changed |= checkEvictions(updateSeq, aff);
updateRebalanceVersion(aff.assignment());
}
consistencyCheck();
if (log.isDebugEnabled())
log.debug("Partition map after single update [grp=" + grp.cacheOrGroupName() + ", map=" + fullMapString() + ']');
if (changed && exchId == null)
ctx.exchange().scheduleResendPartitions();
return changed;
} finally {
lock.writeLock().unlock();
}
} finally {
ctx.database().checkpointReadUnlock();
}
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtPartitionTopologyImpl method updateLocal.
/**
* Updates state of partition in local {@link #node2part} map and recalculates {@link #diffFromAffinity}.
*
* @param p Partition.
* @param state Partition state.
* @param updateSeq Update sequence.
* @param affVer Affinity version.
* @return Update sequence.
*/
@SuppressWarnings({ "MismatchedQueryAndUpdateOfCollection" })
private long updateLocal(int p, GridDhtPartitionState state, long updateSeq, AffinityTopologyVersion affVer) {
assert lock.isWriteLockedByCurrentThread();
ClusterNode oldest = discoCache.oldestAliveServerNode();
assert oldest != null || ctx.kernalContext().clientNode();
// If this node became the oldest node.
if (ctx.localNode().equals(oldest) && node2part != null) {
long seq = node2part.updateSequence();
if (seq != updateSeq) {
if (seq > updateSeq) {
long seq0 = this.updateSeq.get();
if (seq0 < seq) {
// Update global counter if necessary.
boolean b = this.updateSeq.compareAndSet(seq0, seq + 1);
assert b : "Invalid update sequence [updateSeq=" + updateSeq + ", grp=" + grp.cacheOrGroupName() + ", seq=" + seq + ", curUpdateSeq=" + this.updateSeq.get() + ", node2part=" + node2part.toFullString() + ']';
updateSeq = seq + 1;
} else
updateSeq = seq;
}
node2part.updateSequence(updateSeq);
}
}
if (node2part != null) {
UUID locNodeId = ctx.localNodeId();
GridDhtPartitionMap map = node2part.get(locNodeId);
if (map == null) {
map = new GridDhtPartitionMap(locNodeId, updateSeq, affVer, GridPartitionStateMap.EMPTY, false);
node2part.put(locNodeId, map);
}
map.updateSequence(updateSeq, affVer);
map.put(p, state);
if (!grp.isReplicated() && (state == MOVING || state == OWNING || state == RENTING)) {
AffinityAssignment assignment = grp.affinity().cachedAffinity(diffFromAffinityVer);
if (!assignment.getIds(p).contains(ctx.localNodeId())) {
Set<UUID> diffIds = diffFromAffinity.get(p);
if (diffIds == null)
diffFromAffinity.put(p, diffIds = U.newHashSet(3));
diffIds.add(ctx.localNodeId());
}
}
}
return updateSeq;
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtPartitionTopologyImpl method updateLocal.
/**
* Updates state of partition in local {@link #node2part} map and recalculates {@link #diffFromAffinity}.
*
* @param p Partition.
* @param state Partition state.
* @param updateSeq Update sequence.
* @param affVer Affinity version.
* @return Update sequence.
*/
private long updateLocal(int p, GridDhtPartitionState state, long updateSeq, AffinityTopologyVersion affVer) {
assert lock.isWriteLockedByCurrentThread();
ClusterNode oldest = discoCache.oldestAliveServerNode();
assert oldest != null || ctx.kernalContext().clientNode();
// If this node became the oldest node.
if (ctx.localNode().equals(oldest) && node2part != null) {
long seq = node2part.updateSequence();
if (seq != updateSeq) {
if (seq > updateSeq) {
long seq0 = this.updateSeq.get();
if (seq0 < seq) {
// Update global counter if necessary.
boolean b = this.updateSeq.compareAndSet(seq0, seq + 1);
assert b : "Invalid update sequence [updateSeq=" + updateSeq + ", grp=" + grp.cacheOrGroupName() + ", seq=" + seq + ", curUpdateSeq=" + this.updateSeq.get() + ", node2part=" + node2part.toFullString() + ']';
updateSeq = seq + 1;
} else
updateSeq = seq;
}
node2part.updateSequence(updateSeq);
}
}
if (node2part != null) {
UUID locNodeId = ctx.localNodeId();
GridDhtPartitionMap map = node2part.get(locNodeId);
if (map == null) {
map = new GridDhtPartitionMap(locNodeId, updateSeq, affVer, GridPartitionStateMap.EMPTY, false);
node2part.put(locNodeId, map);
} else
map.updateSequence(updateSeq, affVer);
map.put(p, state);
if (!grp.isReplicated() && (state == MOVING || state == OWNING || state == RENTING)) {
AffinityAssignment assignment = grp.affinity().cachedAffinity(diffFromAffinityVer);
if (!assignment.getIds(p).contains(ctx.localNodeId())) {
Set<UUID> diffIds = diffFromAffinity.get(p);
if (diffIds == null)
diffFromAffinity.put(p, diffIds = U.newHashSet(3));
diffIds.add(ctx.localNodeId());
}
}
}
return updateSeq;
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtPartitionTopologyImpl method update.
/**
* {@inheritDoc}
*/
@Override
public boolean update(@Nullable AffinityTopologyVersion exchangeVer, GridDhtPartitionFullMap partMap, @Nullable CachePartitionFullCountersMap incomeCntrMap, Set<Integer> partsToReload, @Nullable Map<Integer, Long> partSizes, @Nullable AffinityTopologyVersion msgTopVer, @Nullable GridDhtPartitionsExchangeFuture exchFut, @Nullable Set<Integer> lostParts) {
if (log.isDebugEnabled()) {
log.debug("Updating full partition map " + "[grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", fullMap=" + fullMapString() + ']');
}
assert partMap != null;
ctx.database().checkpointReadLock();
try {
lock.writeLock().lock();
try {
if (log.isTraceEnabled() && exchangeVer != null) {
log.trace("Partition states before full update [grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", states=" + dumpPartitionStates() + ']');
}
if (stopping || !lastTopChangeVer.initialized() || // Ignore message not-related to exchange if exchange is in progress.
(exchangeVer == null && !lastTopChangeVer.equals(readyTopVer)))
return false;
if (incomeCntrMap != null) {
// update local counters in partitions
for (int i = 0; i < locParts.length(); i++) {
cntrMap.updateCounter(i, incomeCntrMap.updateCounter(i));
GridDhtLocalPartition part = locParts.get(i);
if (part == null)
continue;
if (part.state() == OWNING || part.state() == MOVING) {
long updCntr = incomeCntrMap.updateCounter(part.id());
long curCntr = part.updateCounter();
// Avoid zero counter update to empty partition to prevent lazy init.
if (updCntr != 0 || curCntr != 0) {
part.updateCounter(updCntr);
if (updCntr > curCntr) {
if (log.isDebugEnabled())
log.debug("Partition update counter has updated [grp=" + grp.cacheOrGroupName() + ", p=" + part.id() + ", state=" + part.state() + ", prevCntr=" + curCntr + ", nextCntr=" + updCntr + "]");
}
}
}
}
}
// TODO FIXME https://issues.apache.org/jira/browse/IGNITE-11800
if (exchangeVer != null) {
// Ignore if exchange already finished or new exchange started.
if (readyTopVer.after(exchangeVer) || lastTopChangeVer.after(exchangeVer)) {
U.warn(log, "Stale exchange id for full partition map update (will ignore) [" + "grp=" + grp.cacheOrGroupName() + ", lastTopChange=" + lastTopChangeVer + ", readTopVer=" + readyTopVer + ", exchVer=" + exchangeVer + ']');
return false;
}
}
boolean fullMapUpdated = node2part == null;
if (node2part != null) {
// Merge maps.
for (GridDhtPartitionMap part : node2part.values()) {
GridDhtPartitionMap newPart = partMap.get(part.nodeId());
if (shouldOverridePartitionMap(part, newPart)) {
fullMapUpdated = true;
if (log.isDebugEnabled()) {
log.debug("Overriding partition map in full update map [" + "node=" + part.nodeId() + ", grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", curPart=" + mapString(part) + ", newPart=" + mapString(newPart) + ']');
}
if (newPart.nodeId().equals(ctx.localNodeId()))
updateSeq.setIfGreater(newPart.updateSequence());
} else {
// If for some nodes current partition has a newer map, then we keep the newer value.
if (log.isDebugEnabled()) {
log.debug("Partitions map for the node keeps newer value than message [" + "node=" + part.nodeId() + ", grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", curPart=" + mapString(part) + ", newPart=" + mapString(newPart) + ']');
}
partMap.put(part.nodeId(), part);
}
}
// Check that we have new nodes.
for (GridDhtPartitionMap part : partMap.values()) {
if (fullMapUpdated)
break;
fullMapUpdated = !node2part.containsKey(part.nodeId());
}
GridDhtPartitionsExchangeFuture topFut = exchFut == null ? ctx.exchange().lastFinishedFuture() : exchFut;
// topFut can be null if lastFinishedFuture has completed with error.
if (topFut != null) {
for (Iterator<UUID> it = partMap.keySet().iterator(); it.hasNext(); ) {
UUID nodeId = it.next();
final ClusterNode node = topFut.events().discoveryCache().node(nodeId);
if (node == null) {
if (log.isTraceEnabled())
log.trace("Removing left node from full map update [grp=" + grp.cacheOrGroupName() + ", exchTopVer=" + exchangeVer + ", futVer=" + topFut.initialVersion() + ", nodeId=" + nodeId + ", partMap=" + partMap + ']');
it.remove();
}
}
}
} else {
GridDhtPartitionMap locNodeMap = partMap.get(ctx.localNodeId());
if (locNodeMap != null)
updateSeq.setIfGreater(locNodeMap.updateSequence());
}
if (!fullMapUpdated) {
if (log.isTraceEnabled()) {
log.trace("No updates for full partition map (will ignore) [" + "grp=" + grp.cacheOrGroupName() + ", lastExch=" + lastTopChangeVer + ", exchVer=" + exchangeVer + ", curMap=" + node2part + ", newMap=" + partMap + ']');
}
return false;
}
if (exchangeVer != null) {
assert exchangeVer.compareTo(readyTopVer) >= 0 && exchangeVer.compareTo(lastTopChangeVer) >= 0;
lastTopChangeVer = readyTopVer = exchangeVer;
// Apply lost partitions from full message.
if (lostParts != null) {
this.lostParts = new HashSet<>(lostParts);
for (Integer part : lostParts) {
GridDhtLocalPartition locPart = localPartition(part);
// New partition should be created instead.
if (locPart != null && locPart.state() != EVICTED) {
locPart.markLost();
GridDhtPartitionMap locMap = partMap.get(ctx.localNodeId());
locMap.put(part, LOST);
}
}
}
}
node2part = partMap;
if (log.isDebugEnabled()) {
log.debug("Partition map after processFullMessage [grp=" + grp.cacheOrGroupName() + ", exchId=" + (exchFut == null ? null : exchFut.exchangeId()) + ", fullMap=" + fullMapString() + ']');
}
if (exchangeVer == null && !grp.isReplicated() && (readyTopVer.initialized() && readyTopVer.compareTo(diffFromAffinityVer) >= 0)) {
AffinityAssignment affAssignment = grp.affinity().readyAffinity(readyTopVer);
for (Map.Entry<UUID, GridDhtPartitionMap> e : partMap.entrySet()) {
for (Map.Entry<Integer, GridDhtPartitionState> e0 : e.getValue().entrySet()) {
int p = e0.getKey();
Set<UUID> diffIds = diffFromAffinity.get(p);
if ((e0.getValue() == MOVING || e0.getValue() == OWNING || e0.getValue() == RENTING) && !affAssignment.getIds(p).contains(e.getKey())) {
if (diffIds == null)
diffFromAffinity.put(p, diffIds = U.newHashSet(3));
diffIds.add(e.getKey());
} else {
if (diffIds != null && diffIds.remove(e.getKey())) {
if (diffIds.isEmpty())
diffFromAffinity.remove(p);
}
}
}
}
diffFromAffinityVer = readyTopVer;
}
boolean changed = false;
GridDhtPartitionMap nodeMap = partMap.get(ctx.localNodeId());
// Only in real exchange occurred.
if (exchangeVer != null && nodeMap != null && grp.persistenceEnabled() && readyTopVer.initialized()) {
assert exchFut != null;
for (Map.Entry<Integer, GridDhtPartitionState> e : nodeMap.entrySet()) {
int p = e.getKey();
GridDhtPartitionState state = e.getValue();
if (state == OWNING) {
GridDhtLocalPartition locPart = locParts.get(p);
assert locPart != null : grp.cacheOrGroupName();
if (locPart.state() == MOVING) {
boolean success = locPart.own();
assert success : locPart;
changed |= success;
}
} else if (state == MOVING) {
GridDhtLocalPartition locPart = locParts.get(p);
rebalancePartition(p, partsToReload.contains(p) || locPart != null && locPart.state() == MOVING && exchFut.localJoinExchange(), exchFut);
changed = true;
}
}
}
long updateSeq = this.updateSeq.incrementAndGet();
if (readyTopVer.initialized() && readyTopVer.equals(lastTopChangeVer)) {
AffinityAssignment aff = grp.affinity().readyAffinity(readyTopVer);
// Evictions on exchange are checked in exchange worker thread before rebalancing.
if (exchangeVer == null)
changed |= checkEvictions(updateSeq, aff);
updateRebalanceVersion(aff.topologyVersion(), aff.assignment());
}
if (partSizes != null)
this.globalPartSizes = partSizes;
consistencyCheck();
if (log.isDebugEnabled()) {
log.debug("Partition map after full update [grp=" + grp.cacheOrGroupName() + ", map=" + fullMapString() + ']');
}
if (log.isTraceEnabled() && exchangeVer != null) {
log.trace("Partition states after full update [grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", states=" + dumpPartitionStates() + ']');
}
if (changed) {
if (log.isDebugEnabled())
log.debug("Partitions have been scheduled to resend [reason=" + "Full map update [grp" + grp.cacheOrGroupName() + "]");
ctx.exchange().scheduleResendPartitions();
}
return changed;
} finally {
lock.writeLock().unlock();
}
} finally {
ctx.database().checkpointReadUnlock();
}
}
Aggregations