use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtPartitionDemander method handleSupplyMessage.
/**
* Handles supply message from {@code nodeId} with specified {@code topicId}.
*
* Supply message contains entries to populate rebalancing partitions.
*
* There is a cyclic process:
* Populate rebalancing partitions with entries from Supply message.
* If not all partitions specified in {@link #rebalanceFut} were rebalanced or marked as missed
* send new Demand message to request next batch of entries.
*
* @param topicId Topic id.
* @param nodeId Node id.
* @param supply Supply message.
*/
public void handleSupplyMessage(int topicId, final UUID nodeId, final GridDhtPartitionSupplyMessage supply) {
AffinityTopologyVersion topVer = supply.topologyVersion();
final RebalanceFuture fut = rebalanceFut;
ClusterNode node = ctx.node(nodeId);
if (node == null)
return;
if (// Topology already changed (for the future that supply message based on).
topologyChanged(fut))
return;
if (!fut.isActual(supply.rebalanceId())) {
// Supple message based on another future.
return;
}
if (log.isDebugEnabled())
log.debug("Received supply message [grp=" + grp.cacheOrGroupName() + ", msg=" + supply + ']');
// Check whether there were class loading errors on unmarshal
if (supply.classError() != null) {
U.warn(log, "Rebalancing from node cancelled [grp=" + grp.cacheOrGroupName() + ", node=" + nodeId + "]. Class got undeployed during preloading: " + supply.classError());
fut.cancel(nodeId);
return;
}
final GridDhtPartitionTopology top = grp.topology();
if (grp.sharedGroup()) {
for (GridCacheContext cctx : grp.caches()) {
if (cctx.statisticsEnabled()) {
long keysCnt = supply.keysForCache(cctx.cacheId());
if (keysCnt != -1)
cctx.cache().metrics0().onRebalancingKeysCountEstimateReceived(keysCnt);
// Can not be calculated per cache.
cctx.cache().metrics0().onRebalanceBatchReceived(supply.messageSize());
}
}
} else {
GridCacheContext cctx = grp.singleCacheContext();
if (cctx.statisticsEnabled()) {
if (supply.estimatedKeysCount() != -1)
cctx.cache().metrics0().onRebalancingKeysCountEstimateReceived(supply.estimatedKeysCount());
cctx.cache().metrics0().onRebalanceBatchReceived(supply.messageSize());
}
}
try {
AffinityAssignment aff = grp.affinity().cachedAffinity(topVer);
GridCacheContext cctx = grp.sharedGroup() ? null : grp.singleCacheContext();
// Preload.
for (Map.Entry<Integer, CacheEntryInfoCollection> e : supply.infos().entrySet()) {
int p = e.getKey();
if (aff.get(p).contains(ctx.localNode())) {
GridDhtLocalPartition part = top.localPartition(p, topVer, true);
assert part != null;
boolean last = supply.last().containsKey(p);
if (part.state() == MOVING) {
boolean reserved = part.reserve();
assert reserved : "Failed to reserve partition [igniteInstanceName=" + ctx.igniteInstanceName() + ", grp=" + grp.cacheOrGroupName() + ", part=" + part + ']';
part.lock();
try {
// Loop through all received entries and try to preload them.
for (GridCacheEntryInfo entry : e.getValue().infos()) {
if (!preloadEntry(node, p, entry, topVer)) {
if (log.isDebugEnabled())
log.debug("Got entries for invalid partition during " + "preloading (will skip) [p=" + p + ", entry=" + entry + ']');
break;
}
if (grp.sharedGroup() && (cctx == null || cctx.cacheId() != entry.cacheId()))
cctx = ctx.cacheContext(entry.cacheId());
if (cctx != null && cctx.statisticsEnabled())
cctx.cache().metrics0().onRebalanceKeyReceived();
}
// then we take ownership.
if (last) {
top.own(part);
fut.partitionDone(nodeId, p);
if (log.isDebugEnabled())
log.debug("Finished rebalancing partition: " + part);
}
} finally {
part.unlock();
part.release();
}
} else {
if (last)
fut.partitionDone(nodeId, p);
if (log.isDebugEnabled())
log.debug("Skipping rebalancing partition (state is not MOVING): " + part);
}
} else {
fut.partitionDone(nodeId, p);
if (log.isDebugEnabled())
log.debug("Skipping rebalancing partition (it does not belong on current node): " + p);
}
}
// Only request partitions based on latest topology version.
for (Integer miss : supply.missed()) {
if (aff.get(miss).contains(ctx.localNode()))
fut.partitionMissed(nodeId, miss);
}
for (Integer miss : supply.missed()) fut.partitionDone(nodeId, miss);
GridDhtPartitionDemandMessage d = new GridDhtPartitionDemandMessage(supply.rebalanceId(), supply.topologyVersion(), grp.groupId());
d.timeout(grp.config().getRebalanceTimeout());
d.topic(rebalanceTopics.get(topicId));
if (!topologyChanged(fut) && !fut.isDone()) {
// Send demand message.
try {
ctx.io().sendOrderedMessage(node, rebalanceTopics.get(topicId), d.convertIfNeeded(node.version()), grp.ioPolicy(), grp.config().getRebalanceTimeout());
} catch (ClusterTopologyCheckedException e) {
if (log.isDebugEnabled()) {
log.debug("Node left during rebalancing [grp=" + grp.cacheOrGroupName() + ", node=" + node.id() + ", msg=" + e.getMessage() + ']');
}
}
}
} catch (IgniteSpiException | IgniteCheckedException e) {
LT.error(log, e, "Error during rebalancing [grp=" + grp.cacheOrGroupName() + ", srcNode=" + node.id() + ", err=" + e + ']');
}
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class CacheGroupMetricsImpl method getAffinityPartitionsAssignmentMap.
/**
*/
public Map<Integer, List<String>> getAffinityPartitionsAssignmentMap() {
if (ctx.affinity().lastVersion().topologyVersion() < 0)
return Collections.emptyMap();
AffinityAssignment assignment = ctx.affinity().cachedAffinity(AffinityTopologyVersion.NONE);
int part = 0;
Map<Integer, List<String>> assignmentMap = new LinkedHashMap<>();
for (List<ClusterNode> partAssignment : assignment.assignment()) {
List<String> partNodeIds = new ArrayList<>(partAssignment.size());
for (ClusterNode node : partAssignment) partNodeIds.add(node.id().toString());
assignmentMap.put(part, partNodeIds);
part++;
}
return assignmentMap;
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtPartitionTopologyImpl method beforeExchange.
/**
* {@inheritDoc}
*/
@Override
public void beforeExchange(GridDhtPartitionsExchangeFuture exchFut, boolean affReady, boolean updateMoving) throws IgniteCheckedException {
ctx.database().checkpointReadLock();
try {
U.writeLock(lock);
try {
if (stopping)
return;
assert lastTopChangeVer.equals(exchFut.initialVersion()) : "Invalid topology version [topVer=" + lastTopChangeVer + ", exchId=" + exchFut.exchangeId() + ']';
ExchangeDiscoveryEvents evts = exchFut.context().events();
if (affReady) {
assert grp.affinity().lastVersion().equals(evts.topologyVersion()) : "Invalid affinity version [" + "grp=" + grp.cacheOrGroupName() + ", affVer=" + grp.affinity().lastVersion() + ", evtsVer=" + evts.topologyVersion() + ']';
lastTopChangeVer = readyTopVer = evts.topologyVersion();
discoCache = evts.discoveryCache();
}
if (log.isDebugEnabled()) {
log.debug("Partition map beforeExchange [grp=" + grp.cacheOrGroupName() + ", exchId=" + exchFut.exchangeId() + ", fullMap=" + fullMapString() + ']');
}
long updateSeq = this.updateSeq.incrementAndGet();
if (exchFut.exchangeType() == ALL && !exchFut.rebalanced())
cntrMap.clear();
initializeFullMap(updateSeq);
boolean grpStarted = exchFut.cacheGroupAddedOnExchange(grp.groupId(), grp.receivedFrom());
if (evts.hasServerLeft()) {
for (DiscoveryEvent evt : evts.events()) {
if (ExchangeDiscoveryEvents.serverLeftEvent(evt))
removeNode(evt.eventNode().id());
}
} else if (affReady && grpStarted && exchFut.exchangeType() == NONE) {
assert !exchFut.context().mergeExchanges() : exchFut;
assert node2part != null && node2part.valid() : exchFut;
// Initialize node maps if group was started from joining client.
final List<ClusterNode> nodes = exchFut.firstEventCache().cacheGroupAffinityNodes(grp.groupId());
for (ClusterNode node : nodes) {
if (!node2part.containsKey(node.id()) && ctx.discovery().alive(node)) {
final GridDhtPartitionMap partMap = new GridDhtPartitionMap(node.id(), 1L, exchFut.initialVersion(), new GridPartitionStateMap(), false);
final AffinityAssignment aff = grp.affinity().cachedAffinity(exchFut.initialVersion());
for (Integer p0 : aff.primaryPartitions(node.id())) partMap.put(p0, OWNING);
for (Integer p0 : aff.backupPartitions(node.id())) partMap.put(p0, OWNING);
node2part.put(node.id(), partMap);
}
}
}
if (grp.affinityNode()) {
if (grpStarted || exchFut.firstEvent().type() == EVT_DISCOVERY_CUSTOM_EVT || exchFut.serverNodeDiscoveryEvent()) {
AffinityTopologyVersion affVer;
List<List<ClusterNode>> affAssignment;
if (affReady) {
affVer = evts.topologyVersion();
assert grp.affinity().lastVersion().equals(affVer) : "Invalid affinity [topVer=" + grp.affinity().lastVersion() + ", grp=" + grp.cacheOrGroupName() + ", affVer=" + affVer + ", fut=" + exchFut + ']';
affAssignment = grp.affinity().readyAssignments(affVer);
} else {
assert !exchFut.context().mergeExchanges();
affVer = exchFut.initialVersion();
affAssignment = grp.affinity().idealAssignmentRaw();
}
initPartitions(affVer, affAssignment, exchFut, updateSeq);
}
}
consistencyCheck();
if (updateMoving) {
assert grp.affinity().lastVersion().equals(evts.topologyVersion());
createMovingPartitions(grp.affinity().readyAffinity(evts.topologyVersion()));
}
if (log.isDebugEnabled()) {
log.debug("Partition map after beforeExchange [grp=" + grp.cacheOrGroupName() + ", " + "exchId=" + exchFut.exchangeId() + ", fullMap=" + fullMapString() + ']');
}
if (log.isTraceEnabled()) {
log.trace("Partition states after beforeExchange [grp=" + grp.cacheOrGroupName() + ", exchId=" + exchFut.exchangeId() + ", states=" + dumpPartitionStates() + ']');
}
} finally {
lock.writeLock().unlock();
}
} finally {
ctx.database().checkpointReadUnlock();
}
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtPartitionTopologyImpl method nodes.
/**
* {@inheritDoc}
*/
@Override
public List<ClusterNode> nodes(int p, AffinityTopologyVersion topVer) {
AffinityAssignment affAssignment = grp.affinity().cachedAffinity(topVer);
List<ClusterNode> affNodes = affAssignment.get(p);
List<ClusterNode> nodes = nodes0(p, affAssignment, affNodes);
return nodes != null ? nodes : affNodes;
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtPartitionTopologyImpl method afterExchange.
/**
* {@inheritDoc}
*/
@Override
public boolean afterExchange(GridDhtPartitionsExchangeFuture exchFut) {
boolean changed = false;
int partitions = grp.affinity().partitions();
AffinityTopologyVersion topVer = exchFut.context().events().topologyVersion();
assert grp.affinity().lastVersion().equals(topVer) : "Affinity is not initialized " + "[grp=" + grp.cacheOrGroupName() + ", topVer=" + topVer + ", affVer=" + grp.affinity().lastVersion() + ", fut=" + exchFut + ']';
ctx.database().checkpointReadLock();
try {
lock.writeLock().lock();
try {
if (stopping)
return false;
assert readyTopVer.initialized() : readyTopVer;
assert lastTopChangeVer.equals(readyTopVer);
if (log.isDebugEnabled()) {
log.debug("Partition map before afterExchange [grp=" + grp.cacheOrGroupName() + ", exchId=" + exchFut.exchangeId() + ", fullMap=" + fullMapString() + ']');
}
if (log.isTraceEnabled()) {
log.trace("Partition states before afterExchange [grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchFut.exchangeId() + ", states=" + dumpPartitionStates() + ']');
}
long updateSeq = this.updateSeq.incrementAndGet();
// Skip partition updates in case of not real exchange.
if (!ctx.localNode().isClient() && exchFut.exchangeType() == ALL) {
for (int p = 0; p < partitions; p++) {
GridDhtLocalPartition locPart = localPartition0(p, topVer, false, true);
if (partitionLocalNode(p, topVer)) {
// Prepare partition to rebalance if it's not happened on full map update phase.
if (locPart == null || locPart.state() == RENTING || locPart.state() == EVICTED)
locPart = rebalancePartition(p, true, exchFut);
GridDhtPartitionState state = locPart.state();
if (state == MOVING) {
if (grp.rebalanceEnabled()) {
Collection<ClusterNode> owners = owners(p);
// then new exchange should be started with detecting lost partitions.
if (!F.isEmpty(owners)) {
if (log.isDebugEnabled())
log.debug("Will not own partition (there are owners to rebalance from) " + "[grp=" + grp.cacheOrGroupName() + ", p=" + p + ", owners = " + owners + ']');
}
} else
updateSeq = updateLocal(p, locPart.state(), updateSeq, topVer);
}
} else {
if (locPart != null) {
GridDhtPartitionState state = locPart.state();
if (state == MOVING) {
locPart.rent();
updateSeq = updateLocal(p, locPart.state(), updateSeq, topVer);
changed = true;
if (log.isDebugEnabled()) {
log.debug("Evicting MOVING partition (it does not belong to affinity) [" + "grp=" + grp.cacheOrGroupName() + ", p=" + locPart.id() + ']');
}
}
}
}
}
}
AffinityAssignment aff = grp.affinity().readyAffinity(topVer);
if (node2part != null && node2part.valid())
changed |= checkEvictions(updateSeq, aff);
updateRebalanceVersion(aff.topologyVersion(), aff.assignment());
consistencyCheck();
if (log.isTraceEnabled()) {
log.trace("Partition states after afterExchange [grp=" + grp.cacheOrGroupName() + ", exchVer=" + exchFut.exchangeId() + ", states=" + dumpPartitionStates() + ']');
}
} finally {
lock.writeLock().unlock();
}
} finally {
ctx.database().checkpointReadUnlock();
}
return changed;
}
Aggregations