use of org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion in project ignite by apache.
the class GridDhtPartitionDemander method addAssignments.
/**
* Initiates new rebalance process from given {@code assignments}.
* If previous rebalance is not finished method cancels it.
* In case of delayed rebalance method schedules new with configured delay.
*
* @param assignments Assignments.
* @param force {@code True} if dummy reassign.
* @param rebalanceId Rebalance id.
* @param next Runnable responsible for cache rebalancing start.
* @param forcedRebFut External future for forced rebalance.
* @return Rebalancing runnable.
*/
Runnable addAssignments(final GridDhtPreloaderAssignments assignments, boolean force, long rebalanceId, final Runnable next, @Nullable final GridCompoundFuture<Boolean, Boolean> forcedRebFut) {
if (log.isDebugEnabled())
log.debug("Adding partition assignments: " + assignments);
assert force == (forcedRebFut != null);
long delay = grp.config().getRebalanceDelay();
if ((delay == 0 || force) && assignments != null) {
final RebalanceFuture oldFut = rebalanceFut;
final RebalanceFuture fut = new RebalanceFuture(grp, assignments, log, rebalanceId);
if (!oldFut.isInitial())
oldFut.cancel();
else
fut.listen(f -> oldFut.onDone(f.result()));
if (forcedRebFut != null)
forcedRebFut.add(fut);
rebalanceFut = fut;
for (final GridCacheContext cctx : grp.caches()) {
if (cctx.statisticsEnabled()) {
final CacheMetricsImpl metrics = cctx.cache().metrics0();
metrics.clearRebalanceCounters();
metrics.startRebalance(0);
rebalanceFut.listen(f -> metrics.clearRebalanceCounters());
}
}
fut.sendRebalanceStartedEvent();
if (assignments.cancelled()) {
// Pending exchange.
if (log.isDebugEnabled())
log.debug("Rebalancing skipped due to cancelled assignments.");
fut.onDone(false);
fut.sendRebalanceFinishedEvent();
return null;
}
if (assignments.isEmpty()) {
// Nothing to rebalance.
if (log.isDebugEnabled())
log.debug("Rebalancing skipped due to empty assignments.");
fut.onDone(true);
((GridFutureAdapter) grp.preloader().syncFuture()).onDone();
fut.sendRebalanceFinishedEvent();
return null;
}
return () -> {
if (next != null)
fut.listen(f -> {
try {
if (// Not cancelled.
f.get())
// Starts next cache rebalancing (according to the order).
next.run();
} catch (IgniteCheckedException e) {
if (log.isDebugEnabled())
log.debug(e.getMessage());
}
});
requestPartitions(fut, assignments);
};
} else if (delay > 0) {
for (GridCacheContext cctx : grp.caches()) {
if (cctx.statisticsEnabled()) {
final CacheMetricsImpl metrics = cctx.cache().metrics0();
metrics.startRebalance(delay);
}
}
GridTimeoutObject obj = lastTimeoutObj.get();
if (obj != null)
ctx.time().removeTimeoutObject(obj);
final GridDhtPartitionsExchangeFuture exchFut = lastExchangeFut;
assert exchFut != null : "Delaying rebalance process without topology event.";
obj = new GridTimeoutObjectAdapter(delay) {
@Override
public void onTimeout() {
exchFut.listen(new CI1<IgniteInternalFuture<AffinityTopologyVersion>>() {
@Override
public void apply(IgniteInternalFuture<AffinityTopologyVersion> f) {
ctx.exchange().forceRebalance(exchFut.exchangeId());
}
});
}
};
lastTimeoutObj.set(obj);
ctx.time().addTimeoutObject(obj);
}
return null;
}
use of org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion in project ignite by apache.
the class GridDhtPartitionDemander method handleSupplyMessage.
/**
* Handles supply message from {@code nodeId} with specified {@code topicId}.
*
* Supply message contains entries to populate rebalancing partitions.
*
* There is a cyclic process:
* Populate rebalancing partitions with entries from Supply message.
* If not all partitions specified in {@link #rebalanceFut} were rebalanced or marked as missed
* send new Demand message to request next batch of entries.
*
* @param topicId Topic id.
* @param nodeId Node id.
* @param supply Supply message.
*/
public void handleSupplyMessage(int topicId, final UUID nodeId, final GridDhtPartitionSupplyMessage supply) {
AffinityTopologyVersion topVer = supply.topologyVersion();
final RebalanceFuture fut = rebalanceFut;
ClusterNode node = ctx.node(nodeId);
if (node == null)
return;
if (// Topology already changed (for the future that supply message based on).
topologyChanged(fut))
return;
if (!fut.isActual(supply.rebalanceId())) {
// Supple message based on another future.
return;
}
if (log.isDebugEnabled())
log.debug("Received supply message [grp=" + grp.cacheOrGroupName() + ", msg=" + supply + ']');
// Check whether there were class loading errors on unmarshal
if (supply.classError() != null) {
U.warn(log, "Rebalancing from node cancelled [grp=" + grp.cacheOrGroupName() + ", node=" + nodeId + "]. Class got undeployed during preloading: " + supply.classError());
fut.cancel(nodeId);
return;
}
final GridDhtPartitionTopology top = grp.topology();
if (grp.sharedGroup()) {
for (GridCacheContext cctx : grp.caches()) {
if (cctx.statisticsEnabled()) {
long keysCnt = supply.keysForCache(cctx.cacheId());
if (keysCnt != -1)
cctx.cache().metrics0().onRebalancingKeysCountEstimateReceived(keysCnt);
// Can not be calculated per cache.
cctx.cache().metrics0().onRebalanceBatchReceived(supply.messageSize());
}
}
} else {
GridCacheContext cctx = grp.singleCacheContext();
if (cctx.statisticsEnabled()) {
if (supply.estimatedKeysCount() != -1)
cctx.cache().metrics0().onRebalancingKeysCountEstimateReceived(supply.estimatedKeysCount());
cctx.cache().metrics0().onRebalanceBatchReceived(supply.messageSize());
}
}
try {
AffinityAssignment aff = grp.affinity().cachedAffinity(topVer);
GridCacheContext cctx = grp.sharedGroup() ? null : grp.singleCacheContext();
// Preload.
for (Map.Entry<Integer, CacheEntryInfoCollection> e : supply.infos().entrySet()) {
int p = e.getKey();
if (aff.get(p).contains(ctx.localNode())) {
GridDhtLocalPartition part = top.localPartition(p, topVer, true);
assert part != null;
boolean last = supply.last().containsKey(p);
if (part.state() == MOVING) {
boolean reserved = part.reserve();
assert reserved : "Failed to reserve partition [igniteInstanceName=" + ctx.igniteInstanceName() + ", grp=" + grp.cacheOrGroupName() + ", part=" + part + ']';
part.lock();
try {
// Loop through all received entries and try to preload them.
for (GridCacheEntryInfo entry : e.getValue().infos()) {
if (!preloadEntry(node, p, entry, topVer)) {
if (log.isDebugEnabled())
log.debug("Got entries for invalid partition during " + "preloading (will skip) [p=" + p + ", entry=" + entry + ']');
break;
}
if (grp.sharedGroup() && (cctx == null || cctx.cacheId() != entry.cacheId()))
cctx = ctx.cacheContext(entry.cacheId());
if (cctx != null && cctx.statisticsEnabled())
cctx.cache().metrics0().onRebalanceKeyReceived();
}
// then we take ownership.
if (last) {
top.own(part);
fut.partitionDone(nodeId, p);
if (log.isDebugEnabled())
log.debug("Finished rebalancing partition: " + part);
}
} finally {
part.unlock();
part.release();
}
} else {
if (last)
fut.partitionDone(nodeId, p);
if (log.isDebugEnabled())
log.debug("Skipping rebalancing partition (state is not MOVING): " + part);
}
} else {
fut.partitionDone(nodeId, p);
if (log.isDebugEnabled())
log.debug("Skipping rebalancing partition (it does not belong on current node): " + p);
}
}
// Only request partitions based on latest topology version.
for (Integer miss : supply.missed()) {
if (aff.get(miss).contains(ctx.localNode()))
fut.partitionMissed(nodeId, miss);
}
for (Integer miss : supply.missed()) fut.partitionDone(nodeId, miss);
GridDhtPartitionDemandMessage d = new GridDhtPartitionDemandMessage(supply.rebalanceId(), supply.topologyVersion(), grp.groupId());
d.timeout(grp.config().getRebalanceTimeout());
d.topic(rebalanceTopics.get(topicId));
if (!topologyChanged(fut) && !fut.isDone()) {
// Send demand message.
try {
ctx.io().sendOrderedMessage(node, rebalanceTopics.get(topicId), d.convertIfNeeded(node.version()), grp.ioPolicy(), grp.config().getRebalanceTimeout());
} catch (ClusterTopologyCheckedException e) {
if (log.isDebugEnabled()) {
log.debug("Node left during rebalancing [grp=" + grp.cacheOrGroupName() + ", node=" + node.id() + ", msg=" + e.getMessage() + ']');
}
}
}
} catch (IgniteSpiException | IgniteCheckedException e) {
LT.error(log, e, "Error during rebalancing [grp=" + grp.cacheOrGroupName() + ", srcNode=" + node.id() + ", err=" + e + ']');
}
}
use of org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion in project ignite by apache.
the class IgniteClusterActivateDeactivateTest method startNodesAndBlockStatusChange.
/**
* @param srvs Number of servers.
* @param clients Number of clients.
* @param stateChangeFrom Index of node initiating changes.
* @param initiallyActive If {@code true} start cluster in active state (otherwise in inactive).
* @param blockMsgNodes Nodes whcis block exchange messages.
* @return State change future.
* @throws Exception If failed.
*/
private IgniteInternalFuture<?> startNodesAndBlockStatusChange(int srvs, int clients, final int stateChangeFrom, final boolean initiallyActive, int... blockMsgNodes) throws Exception {
active = initiallyActive;
testSpi = true;
startWithCaches1(srvs, clients);
int minorVer = 1;
if (initiallyActive && persistenceEnabled()) {
ignite(0).cluster().active(true);
minorVer++;
}
if (blockMsgNodes.length == 0)
blockMsgNodes = new int[] { 1 };
final AffinityTopologyVersion STATE_CHANGE_TOP_VER = new AffinityTopologyVersion(srvs + clients, minorVer);
List<TestRecordingCommunicationSpi> spis = new ArrayList<>();
for (int idx : blockMsgNodes) {
TestRecordingCommunicationSpi spi = TestRecordingCommunicationSpi.spi(ignite(idx));
spis.add(spi);
blockExchangeSingleMessage(spi, STATE_CHANGE_TOP_VER);
}
IgniteInternalFuture<?> stateChangeFut = GridTestUtils.runAsync(() -> ignite(stateChangeFrom).cluster().active(!initiallyActive));
for (TestRecordingCommunicationSpi spi : spis) spi.waitForBlocked();
U.sleep(500);
assertFalse(stateChangeFut.isDone());
return stateChangeFut;
}
use of org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion in project ignite by apache.
the class GridCommonAbstractTest method awaitPartitionMapExchange.
/**
* @param waitEvicts If {@code true} will wait for evictions finished.
* @param waitNode2PartUpdate If {@code true} will wait for nodes node2part info update finished.
* @param nodes Optional nodes. If {@code null} method will wait for all nodes, for non null collection nodes will
* be filtered
* @param printPartState If {@code true} will print partition state if evictions not happened.
* @throws InterruptedException If interrupted.
*/
@SuppressWarnings("BusyWait")
protected void awaitPartitionMapExchange(boolean waitEvicts, boolean waitNode2PartUpdate, @Nullable Collection<ClusterNode> nodes, boolean printPartState) throws InterruptedException {
long timeout = getPartitionMapExchangeTimeout();
long startTime = -1;
Set<String> names = new HashSet<>();
Ignite crd = null;
for (Ignite g : G.allGrids()) {
ClusterNode node = g.cluster().localNode();
if (crd == null || node.order() < crd.cluster().localNode().order()) {
crd = g;
if (node.order() == 1)
break;
}
}
if (crd == null)
return;
AffinityTopologyVersion waitTopVer = ((IgniteKernal) crd).context().discovery().topologyVersionEx();
if (waitTopVer.topologyVersion() <= 0)
waitTopVer = new AffinityTopologyVersion(1, 0);
for (Ignite g : G.allGrids()) {
if (nodes != null && !nodes.contains(g.cluster().localNode()))
continue;
IgniteKernal g0 = (IgniteKernal) g;
names.add(g0.configuration().getIgniteInstanceName());
if (startTime != -1) {
if (startTime != g0.context().discovery().gridStartTime())
fail("Found nodes from different clusters, probable some test does not stop nodes " + "[allNodes=" + names + ']');
} else
startTime = g0.context().discovery().gridStartTime();
if (g.cluster().localNode().isDaemon())
continue;
IgniteInternalFuture<?> exchFut = g0.context().cache().context().exchange().affinityReadyFuture(waitTopVer);
if (exchFut != null && !exchFut.isDone()) {
try {
exchFut.get(timeout);
} catch (IgniteCheckedException e) {
log.error("Failed to wait for exchange [topVer=" + waitTopVer + ", node=" + g0.name() + ']', e);
}
}
for (IgniteCacheProxy<?, ?> c : g0.context().cache().jcaches()) {
CacheConfiguration cfg = c.context().config();
if (cfg == null)
continue;
if (cfg.getCacheMode() != LOCAL && cfg.getRebalanceMode() != NONE && g.cluster().nodes().size() > 1) {
AffinityFunction aff = cfg.getAffinity();
GridDhtCacheAdapter<?, ?> dht = dht(c);
GridDhtPartitionTopology top = dht.topology();
for (int p = 0; p < aff.partitions(); p++) {
long start = 0;
for (int i = 0; ; i++) {
boolean match = false;
GridCachePartitionExchangeManager<?, ?> exchMgr = dht.context().shared().exchange();
AffinityTopologyVersion readyVer = exchMgr.readyAffinityVersion();
// Otherwise, there may be an assertion when printing top.readyTopologyVersion().
try {
IgniteInternalFuture<?> fut = exchMgr.affinityReadyFuture(readyVer);
if (fut != null)
fut.get();
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
}
if (readyVer.topologyVersion() > 0 && c.context().started()) {
// Must map on updated version of topology.
Collection<ClusterNode> affNodes = dht.context().affinity().assignment(readyVer).idealAssignment().get(p);
int affNodesCnt = affNodes.size();
GridDhtTopologyFuture topFut = top.topologyVersionFuture();
Collection<ClusterNode> owners = (topFut != null && topFut.isDone()) ? top.owners(p, AffinityTopologyVersion.NONE) : Collections.<ClusterNode>emptyList();
int ownerNodesCnt = owners.size();
GridDhtLocalPartition loc = top.localPartition(p, readyVer, false);
if (affNodesCnt != ownerNodesCnt || !affNodes.containsAll(owners) || (waitEvicts && loc != null && loc.state() != GridDhtPartitionState.OWNING)) {
LT.warn(log(), "Waiting for topology map update [" + "igniteInstanceName=" + g.name() + ", cache=" + cfg.getName() + ", cacheId=" + dht.context().cacheId() + ", topVer=" + top.readyTopologyVersion() + ", p=" + p + ", affNodesCnt=" + affNodesCnt + ", ownersCnt=" + ownerNodesCnt + ", affNodes=" + F.nodeIds(affNodes) + ", owners=" + F.nodeIds(owners) + ", topFut=" + topFut + ", locNode=" + g.cluster().localNode() + ']');
} else
match = true;
} else {
LT.warn(log(), "Waiting for topology map update [" + "igniteInstanceName=" + g.name() + ", cache=" + cfg.getName() + ", cacheId=" + dht.context().cacheId() + ", topVer=" + top.readyTopologyVersion() + ", started=" + dht.context().started() + ", p=" + p + ", readVer=" + readyVer + ", locNode=" + g.cluster().localNode() + ']');
}
if (!match) {
if (i == 0)
start = System.currentTimeMillis();
if (System.currentTimeMillis() - start > timeout) {
U.dumpThreads(log);
if (printPartState)
printPartitionState(c);
throw new IgniteException("Timeout of waiting for topology map update [" + "igniteInstanceName=" + g.name() + ", cache=" + cfg.getName() + ", cacheId=" + dht.context().cacheId() + ", topVer=" + top.readyTopologyVersion() + ", p=" + p + ", readVer=" + readyVer + ", locNode=" + g.cluster().localNode() + ']');
}
// Busy wait.
Thread.sleep(20);
continue;
}
if (i > 0)
log().warning("Finished waiting for topology map update [igniteInstanceName=" + g.name() + ", p=" + p + ", duration=" + (System.currentTimeMillis() - start) + "ms]");
break;
}
}
if (waitNode2PartUpdate) {
long start = System.currentTimeMillis();
boolean failed = true;
while (failed) {
failed = false;
for (GridDhtPartitionMap pMap : top.partitionMap(true).values()) {
if (failed)
break;
for (Map.Entry entry : pMap.entrySet()) {
if (System.currentTimeMillis() - start > timeout) {
U.dumpThreads(log);
throw new IgniteException("Timeout of waiting for partition state update [" + "igniteInstanceName=" + g.name() + ", cache=" + cfg.getName() + ", cacheId=" + dht.context().cacheId() + ", topVer=" + top.readyTopologyVersion() + ", locNode=" + g.cluster().localNode() + ']');
}
if (entry.getValue() != GridDhtPartitionState.OWNING) {
LT.warn(log(), "Waiting for correct partition state part=" + entry.getKey() + ", should be OWNING [state=" + entry.getValue() + "], node=" + g.name() + ", cache=" + c.getName());
// Busy wait.
Thread.sleep(200);
failed = true;
break;
}
}
}
}
}
}
}
}
log.info("awaitPartitionMapExchange finished");
}
use of org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion in project ignite by apache.
the class GridAffinityAssignmentJolBenchmark method measure.
/**
* @param aff Aff.
* @param parts Parts.
* @param nodeCnt Node count.
* @param backups Backups.
*/
private static void measure(RendezvousAffinityFunction aff, int parts, int nodeCnt, int backups) throws Exception {
List<ClusterNode> nodes = new ArrayList<>();
for (int i = 0; i < nodeCnt; i++) {
ClusterNode node = node(i);
nodes.add(node);
}
AffinityFunctionContext ctx = new GridAffinityFunctionContextImpl(nodes, new ArrayList<>(), new DiscoveryEvent(), new AffinityTopologyVersion(), backups);
List<List<ClusterNode>> assignment = aff.assignPartitions(ctx);
setOptimization(false);
GridAffinityAssignmentV2 ga = new GridAffinityAssignmentV2(new AffinityTopologyVersion(1, 0), assignment, new ArrayList<>());
System.gc();
long totalSize = GraphLayout.parseInstance(ga).totalSize();
System.out.println("Optimized, parts " + parts + " nodeCount " + nodeCnt + " backups " + backups + " " + totalSize);
setOptimization(true);
GridAffinityAssignmentV2 ga2 = new GridAffinityAssignmentV2(new AffinityTopologyVersion(1, 0), assignment, new ArrayList<>());
System.gc();
long totalSize2 = GraphLayout.parseInstance(ga2).totalSize();
System.out.println("Deoptimized, parts " + parts + " nodeCount " + nodeCnt + " backups " + backups + " " + totalSize2);
if (totalSize > totalSize2)
throw new Exception("Optimized AffinityAssignment size " + totalSize + " is more than deoptimized " + totalSize2);
}
Aggregations