use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtPreloader method processAffinityAssignmentRequest.
/**
* @param node Node.
* @param req Request.
*/
private void processAffinityAssignmentRequest(final ClusterNode node, final GridDhtAffinityAssignmentRequest req) {
final AffinityTopologyVersion topVer = req.topologyVersion();
if (log.isDebugEnabled())
log.debug("Processing affinity assignment request [node=" + node + ", req=" + req + ']');
cctx.affinity().affinityReadyFuture(req.topologyVersion()).listen(new CI1<IgniteInternalFuture<AffinityTopologyVersion>>() {
@Override
public void apply(IgniteInternalFuture<AffinityTopologyVersion> fut) {
if (log.isDebugEnabled())
log.debug("Affinity is ready for topology version, will send response [topVer=" + topVer + ", node=" + node + ']');
AffinityAssignment assignment = cctx.affinity().assignment(topVer);
GridDhtAffinityAssignmentResponse res = new GridDhtAffinityAssignmentResponse(req.futureId(), cctx.cacheId(), topVer, assignment.assignment());
if (cctx.affinity().affinityCache().centralizedAffinityFunction()) {
assert assignment.idealAssignment() != null;
res.idealAffinityAssignment(assignment.idealAssignment());
}
try {
cctx.io().send(node, res, AFFINITY_POOL);
} catch (IgniteCheckedException e) {
U.error(log, "Failed to send affinity assignment response to remote node [node=" + node + ']', e);
}
}
});
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtPartitionTopologyImpl method afterExchange.
/**
* {@inheritDoc}
*/
@Override
public boolean afterExchange(GridDhtPartitionsExchangeFuture exchFut) {
boolean changed = false;
int num = grp.affinity().partitions();
AffinityTopologyVersion topVer = exchFut.context().events().topologyVersion();
assert grp.affinity().lastVersion().equals(topVer) : "Affinity is not initialized " + "[grp=" + grp.cacheOrGroupName() + ", topVer=" + topVer + ", affVer=" + grp.affinity().lastVersion() + ", fut=" + exchFut + ']';
ctx.database().checkpointReadLock();
try {
lock.writeLock().lock();
try {
if (stopping)
return false;
assert readyTopVer.initialized() : readyTopVer;
assert lastTopChangeVer.equals(readyTopVer);
if (log.isDebugEnabled()) {
log.debug("Partition map before afterExchange [grp=" + grp.cacheOrGroupName() + ", exchId=" + exchFut.exchangeId() + ", fullMap=" + fullMapString() + ']');
}
long updateSeq = this.updateSeq.incrementAndGet();
for (int p = 0; p < num; p++) {
GridDhtLocalPartition locPart = localPartition0(p, topVer, false, false, false);
if (partitionLocalNode(p, topVer)) {
// which obviously has not happened at this point.
if (locPart == null) {
if (log.isDebugEnabled()) {
log.debug("Skipping local partition afterExchange (will not create) [" + "grp=" + grp.cacheOrGroupName() + ", p=" + p + ']');
}
continue;
}
GridDhtPartitionState state = locPart.state();
if (state == MOVING) {
if (grp.rebalanceEnabled()) {
Collection<ClusterNode> owners = owners(p);
// If there are no other owners, then become an owner.
if (F.isEmpty(owners)) {
boolean owned = locPart.own();
assert owned : "Failed to own partition [grp=" + grp.cacheOrGroupName() + ", locPart=" + locPart + ']';
updateSeq = updateLocal(p, locPart.state(), updateSeq, topVer);
changed = true;
if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_DATA_LOST)) {
DiscoveryEvent discoEvt = exchFut.events().lastEvent();
grp.addRebalanceEvent(p, EVT_CACHE_REBALANCE_PART_DATA_LOST, discoEvt.eventNode(), discoEvt.type(), discoEvt.timestamp());
}
if (log.isDebugEnabled()) {
log.debug("Owned partition [grp=" + grp.cacheOrGroupName() + ", part=" + locPart + ']');
}
} else if (log.isDebugEnabled())
log.debug("Will not own partition (there are owners to rebalance from) [grp=" + grp.cacheOrGroupName() + ", locPart=" + locPart + ", owners = " + owners + ']');
} else
updateSeq = updateLocal(p, locPart.state(), updateSeq, topVer);
}
} else {
if (locPart != null) {
GridDhtPartitionState state = locPart.state();
if (state == MOVING) {
locPart.rent(false);
updateSeq = updateLocal(p, locPart.state(), updateSeq, topVer);
changed = true;
if (log.isDebugEnabled()) {
log.debug("Evicting " + state + " partition (it does not belong to affinity) [" + "grp=" + grp.cacheOrGroupName() + ", part=" + locPart + ']');
}
}
}
}
}
AffinityAssignment aff = grp.affinity().readyAffinity(topVer);
if (node2part != null && node2part.valid())
changed |= checkEvictions(updateSeq, aff);
updateRebalanceVersion(aff.assignment());
consistencyCheck();
} finally {
lock.writeLock().unlock();
}
} finally {
ctx.database().checkpointReadUnlock();
}
return changed;
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtPartitionTopologyImpl method nodes.
/**
* {@inheritDoc}
*/
@Override
public List<ClusterNode> nodes(int p, AffinityTopologyVersion topVer) {
AffinityAssignment affAssignment = grp.affinity().cachedAffinity(topVer);
List<ClusterNode> affNodes = affAssignment.get(p);
List<ClusterNode> nodes = nodes0(p, affAssignment, affNodes);
return nodes != null ? nodes : affNodes;
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtPartitionTopologyImpl method update.
/**
* {@inheritDoc}
*/
@SuppressWarnings({ "MismatchedQueryAndUpdateOfCollection" })
@Override
public boolean update(@Nullable AffinityTopologyVersion exchangeVer, GridDhtPartitionFullMap partMap, @Nullable CachePartitionFullCountersMap incomeCntrMap, Set<Integer> partsToReload, @Nullable AffinityTopologyVersion msgTopVer) {
if (log.isDebugEnabled()) {
log.debug("Updating full partition map [grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", fullMap=" + fullMapString() + ']');
}
assert partMap != null;
ctx.database().checkpointReadLock();
try {
lock.writeLock().lock();
try {
if (stopping || !lastTopChangeVer.initialized() || // Ignore message not-related to exchange if exchange is in progress.
(exchangeVer == null && !lastTopChangeVer.equals(readyTopVer)))
return false;
if (incomeCntrMap != null) {
// update local counters in partitions
for (int i = 0; i < locParts.length(); i++) {
GridDhtLocalPartition part = locParts.get(i);
if (part == null)
continue;
if (part.state() == OWNING || part.state() == MOVING) {
long updCntr = incomeCntrMap.updateCounter(part.id());
if (updCntr != 0 && updCntr > part.updateCounter())
part.updateCounter(updCntr);
}
}
}
if (exchangeVer != null) {
// Ignore if exchange already finished or new exchange started.
if (readyTopVer.compareTo(exchangeVer) > 0 || lastTopChangeVer.compareTo(exchangeVer) > 0) {
U.warn(log, "Stale exchange id for full partition map update (will ignore) [" + "grp=" + grp.cacheOrGroupName() + ", lastTopChange=" + lastTopChangeVer + ", readTopVer=" + readyTopVer + ", exchVer=" + exchangeVer + ']');
return false;
}
}
if (msgTopVer != null && lastTopChangeVer.compareTo(msgTopVer) > 0) {
U.warn(log, "Stale version for full partition map update message (will ignore) [" + "grp=" + grp.cacheOrGroupName() + ", lastTopChange=" + lastTopChangeVer + ", readTopVer=" + readyTopVer + ", msgVer=" + msgTopVer + ']');
return false;
}
boolean fullMapUpdated = (node2part == null);
if (node2part != null) {
for (GridDhtPartitionMap part : node2part.values()) {
GridDhtPartitionMap newPart = partMap.get(part.nodeId());
if (shouldOverridePartitionMap(part, newPart)) {
fullMapUpdated = true;
if (log.isDebugEnabled()) {
log.debug("Overriding partition map in full update map [" + "grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchangeVer + ", curPart=" + mapString(part) + ", newPart=" + mapString(newPart) + ']');
}
if (newPart.nodeId().equals(ctx.localNodeId()))
updateSeq.setIfGreater(newPart.updateSequence());
} else {
// If for some nodes current partition has a newer map,
// then we keep the newer value.
partMap.put(part.nodeId(), part);
}
}
// Check that we have new nodes.
for (GridDhtPartitionMap part : partMap.values()) {
if (fullMapUpdated)
break;
fullMapUpdated = !node2part.containsKey(part.nodeId());
}
// Remove entry if node left.
for (Iterator<UUID> it = partMap.keySet().iterator(); it.hasNext(); ) {
UUID nodeId = it.next();
if (!ctx.discovery().alive(nodeId)) {
if (log.isDebugEnabled())
log.debug("Removing left node from full map update [grp=" + grp.cacheOrGroupName() + ", nodeId=" + nodeId + ", partMap=" + partMap + ']');
it.remove();
}
}
} else {
GridDhtPartitionMap locNodeMap = partMap.get(ctx.localNodeId());
if (locNodeMap != null)
updateSeq.setIfGreater(locNodeMap.updateSequence());
}
if (!fullMapUpdated) {
if (log.isDebugEnabled()) {
log.debug("No updates for full partition map (will ignore) [" + "grp=" + grp.cacheOrGroupName() + ", lastExch=" + lastTopChangeVer + ", exchVer=" + exchangeVer + ", curMap=" + node2part + ", newMap=" + partMap + ']');
}
return false;
}
if (exchangeVer != null) {
assert exchangeVer.compareTo(readyTopVer) >= 0 && exchangeVer.compareTo(lastTopChangeVer) >= 0;
lastTopChangeVer = readyTopVer = exchangeVer;
}
node2part = partMap;
if (exchangeVer == null && !grp.isReplicated() && (readyTopVer.initialized() && readyTopVer.compareTo(diffFromAffinityVer) >= 0)) {
AffinityAssignment affAssignment = grp.affinity().readyAffinity(readyTopVer);
for (Map.Entry<UUID, GridDhtPartitionMap> e : partMap.entrySet()) {
for (Map.Entry<Integer, GridDhtPartitionState> e0 : e.getValue().entrySet()) {
int p = e0.getKey();
Set<UUID> diffIds = diffFromAffinity.get(p);
if ((e0.getValue() == MOVING || e0.getValue() == OWNING || e0.getValue() == RENTING) && !affAssignment.getIds(p).contains(e.getKey())) {
if (diffIds == null)
diffFromAffinity.put(p, diffIds = U.newHashSet(3));
diffIds.add(e.getKey());
} else {
if (diffIds != null && diffIds.remove(e.getKey())) {
if (diffIds.isEmpty())
diffFromAffinity.remove(p);
}
}
}
}
diffFromAffinityVer = readyTopVer;
}
boolean changed = false;
GridDhtPartitionMap nodeMap = partMap.get(ctx.localNodeId());
if (nodeMap != null && grp.persistenceEnabled() && readyTopVer.initialized()) {
for (Map.Entry<Integer, GridDhtPartitionState> e : nodeMap.entrySet()) {
int p = e.getKey();
GridDhtPartitionState state = e.getValue();
if (state == OWNING) {
GridDhtLocalPartition locPart = locParts.get(p);
assert locPart != null : grp.cacheOrGroupName();
if (locPart.state() == MOVING) {
boolean success = locPart.own();
assert success : locPart;
changed |= success;
}
} else if (state == MOVING) {
GridDhtLocalPartition locPart = locParts.get(p);
if (!partsToReload.contains(p)) {
if (locPart == null || locPart.state() == EVICTED)
locPart = createPartition(p);
if (locPart.state() == OWNING) {
locPart.moving();
changed = true;
}
} else {
if (locPart == null || locPart.state() == EVICTED) {
createPartition(p);
changed = true;
} else if (locPart.state() == OWNING || locPart.state() == MOVING) {
if (locPart.state() == OWNING)
locPart.moving();
locPart.clearAsync();
changed = true;
} else if (locPart.state() == RENTING) {
// Try to prevent partition eviction.
if (locPart.reserve()) {
try {
locPart.moving();
locPart.clearAsync();
} finally {
locPart.release();
}
} else // In other case just recreate it.
{
assert locPart.state() == EVICTED;
createPartition(p);
}
changed = true;
}
}
}
}
}
long updateSeq = this.updateSeq.incrementAndGet();
if (readyTopVer.initialized() && readyTopVer.equals(lastTopChangeVer)) {
AffinityAssignment aff = grp.affinity().readyAffinity(readyTopVer);
if (exchangeVer == null)
changed |= checkEvictions(updateSeq, aff);
updateRebalanceVersion(aff.assignment());
}
consistencyCheck();
if (log.isDebugEnabled()) {
log.debug("Partition map after full update [grp=" + grp.cacheOrGroupName() + ", map=" + fullMapString() + ']');
}
if (changed)
ctx.exchange().scheduleResendPartitions();
return changed;
} finally {
lock.writeLock().unlock();
}
} finally {
ctx.database().checkpointReadUnlock();
}
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtAtomicCache method updatePartialBatch.
/**
* @param hasNear {@code True} if originating node has near cache.
* @param firstEntryIdx Index of the first entry in the request keys collection.
* @param entries Entries to update.
* @param ver Version to set.
* @param nearNode Originating node.
* @param writeVals Write values.
* @param putMap Values to put.
* @param rmvKeys Keys to remove.
* @param entryProcessorMap Entry processors.
* @param dhtFut DHT update future if has backups.
* @param req Request.
* @param res Response.
* @param replicate Whether replication is enabled.
* @param batchRes Batch update result.
* @param taskName Task name.
* @param expiry Expiry policy.
* @param sndPrevVal If {@code true} sends previous value to backups.
* @return Deleted entries.
*/
@SuppressWarnings("ForLoopReplaceableByForEach")
@Nullable
private GridDhtAtomicAbstractUpdateFuture updatePartialBatch(final boolean hasNear, final int firstEntryIdx, final List<GridDhtCacheEntry> entries, final GridCacheVersion ver, final ClusterNode nearNode, @Nullable final List<CacheObject> writeVals, @Nullable final Map<KeyCacheObject, CacheObject> putMap, @Nullable final Collection<KeyCacheObject> rmvKeys, @Nullable final Map<KeyCacheObject, EntryProcessor<Object, Object, Object>> entryProcessorMap, @Nullable GridDhtAtomicAbstractUpdateFuture dhtFut, final GridNearAtomicAbstractUpdateRequest req, final GridNearAtomicUpdateResponse res, final boolean replicate, final DhtAtomicUpdateResult batchRes, final String taskName, @Nullable final IgniteCacheExpiryPolicy expiry, final boolean sndPrevVal) {
assert putMap == null ^ rmvKeys == null;
assert req.conflictVersions() == null : "Cannot be called when there are conflict entries in the batch.";
AffinityTopologyVersion topVer = req.topologyVersion();
CacheStorePartialUpdateException storeErr = null;
try {
GridCacheOperation op;
if (putMap != null) {
try {
Map<? extends KeyCacheObject, IgniteBiTuple<? extends CacheObject, GridCacheVersion>> view = F.viewReadOnly(putMap, new C1<CacheObject, IgniteBiTuple<? extends CacheObject, GridCacheVersion>>() {
@Override
public IgniteBiTuple<? extends CacheObject, GridCacheVersion> apply(CacheObject val) {
return F.t(val, ver);
}
});
ctx.store().putAll(null, view);
} catch (CacheStorePartialUpdateException e) {
storeErr = e;
}
op = UPDATE;
} else {
try {
ctx.store().removeAll(null, rmvKeys);
} catch (CacheStorePartialUpdateException e) {
storeErr = e;
}
op = DELETE;
}
boolean intercept = ctx.config().getInterceptor() != null;
AffinityAssignment affAssignment = ctx.affinity().assignment(topVer);
// Avoid iterator creation.
for (int i = 0; i < entries.size(); i++) {
GridDhtCacheEntry entry = entries.get(i);
assert entry.lockedByCurrentThread();
if (entry.obsolete()) {
assert req.operation() == DELETE : "Entry can become obsolete only after remove: " + entry;
continue;
}
if (storeErr != null && storeErr.failedKeys().contains(entry.key().value(ctx.cacheObjectContext(), false)))
continue;
try {
// We are holding java-level locks on entries at this point.
CacheObject writeVal = op == UPDATE ? writeVals.get(i) : null;
assert writeVal != null || op == DELETE : "null write value found.";
// Get readers before innerUpdate (reader cleared after remove).
GridDhtCacheEntry.ReaderId[] readers = entry.readersLocked();
GridCacheUpdateAtomicResult updRes = entry.innerUpdate(ver, nearNode.id(), locNodeId, op, writeVal, null, /*write-through*/
false, /*read-through*/
false, /*retval*/
sndPrevVal, req.keepBinary(), expiry, /*event*/
true, /*metrics*/
true, /*primary*/
true, /*verCheck*/
false, topVer, null, replicate ? DR_PRIMARY : DR_NONE, CU.TTL_NOT_CHANGED, CU.EXPIRE_TIME_CALCULATE, null, /*conflict resolve*/
false, /*intercept*/
false, req.subjectId(), taskName, null, null, dhtFut);
assert !updRes.success() || updRes.newTtl() == CU.TTL_NOT_CHANGED || expiry != null : "success=" + updRes.success() + ", newTtl=" + updRes.newTtl() + ", expiry=" + expiry;
if (intercept) {
if (op == UPDATE) {
ctx.config().getInterceptor().onAfterPut(new CacheLazyEntry(ctx, entry.key(), updRes.newValue(), req.keepBinary()));
} else {
assert op == DELETE : op;
// Old value should be already loaded for 'CacheInterceptor.onBeforeRemove'.
ctx.config().getInterceptor().onAfterRemove(new CacheLazyEntry(ctx, entry.key(), updRes.oldValue(), req.keepBinary()));
}
}
batchRes.addDeleted(entry, updRes, entries);
if (dhtFut != null) {
EntryProcessor<Object, Object, Object> entryProcessor = entryProcessorMap == null ? null : entryProcessorMap.get(entry.key());
dhtFut.addWriteEntry(affAssignment, entry, writeVal, entryProcessor, updRes.newTtl(), CU.EXPIRE_TIME_CALCULATE, null, sndPrevVal, updRes.oldValue(), updRes.updateCounter());
if (readers != null)
dhtFut.addNearWriteEntries(nearNode, readers, entry, writeVal, entryProcessor, updRes.newTtl(), CU.EXPIRE_TIME_CALCULATE);
}
if (hasNear) {
if (!ctx.affinity().partitionBelongs(nearNode, entry.partition(), topVer)) {
int idx = firstEntryIdx + i;
if (req.operation() == TRANSFORM) {
res.addNearValue(idx, writeVal, updRes.newTtl(), CU.EXPIRE_TIME_CALCULATE);
} else
res.addNearTtl(idx, updRes.newTtl(), CU.EXPIRE_TIME_CALCULATE);
if (writeVal != null || entry.hasValue()) {
IgniteInternalFuture<Boolean> f = entry.addReader(nearNode.id(), req.messageId(), topVer);
assert f == null : f;
}
} else if (GridDhtCacheEntry.ReaderId.contains(readers, nearNode.id())) {
// Reader became primary or backup.
entry.removeReader(nearNode.id(), req.messageId());
} else
res.addSkippedIndex(firstEntryIdx + i);
}
} catch (GridCacheEntryRemovedException e) {
assert false : "Entry cannot become obsolete while holding lock.";
e.printStackTrace();
}
}
} catch (IgniteCheckedException e) {
res.addFailedKeys(putMap != null ? putMap.keySet() : rmvKeys, e);
}
if (storeErr != null) {
ArrayList<KeyCacheObject> failed = new ArrayList<>(storeErr.failedKeys().size());
for (Object failedKey : storeErr.failedKeys()) failed.add(ctx.toCacheKeyObject(failedKey));
res.addFailedKeys(failed, storeErr.getCause());
}
return dhtFut;
}
Aggregations