use of org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopology in project ignite by apache.
the class GridDhtPreloader method assign.
/** {@inheritDoc} */
@Override
public GridDhtPreloaderAssignments assign(GridDhtPartitionsExchangeFuture exchFut) {
// No assignments for disabled preloader.
GridDhtPartitionTopology top = cctx.dht().topology();
if (!cctx.rebalanceEnabled() || !cctx.shared().kernalContext().state().active())
return new GridDhtPreloaderAssignments(exchFut, top.topologyVersion());
int partCnt = cctx.affinity().partitions();
assert exchFut.forcePreload() || exchFut.dummyReassign() || exchFut.exchangeId().topologyVersion().equals(top.topologyVersion()) : "Topology version mismatch [exchId=" + exchFut.exchangeId() + ", cache=" + cctx.name() + ", topVer=" + top.topologyVersion() + ']';
GridDhtPreloaderAssignments assigns = new GridDhtPreloaderAssignments(exchFut, top.topologyVersion());
AffinityTopologyVersion topVer = assigns.topologyVersion();
for (int p = 0; p < partCnt; p++) {
if (cctx.shared().exchange().hasPendingExchange()) {
if (log.isDebugEnabled())
log.debug("Skipping assignments creation, exchange worker has pending assignments: " + exchFut.exchangeId());
assigns.cancelled(true);
return assigns;
}
// If partition belongs to local node.
if (cctx.affinity().partitionLocalNode(p, topVer)) {
GridDhtLocalPartition part = top.localPartition(p, topVer, true);
assert part != null;
assert part.id() == p;
if (part.state() != MOVING) {
if (log.isDebugEnabled())
log.debug("Skipping partition assignment (state is not MOVING): " + part);
// For.
continue;
}
Collection<ClusterNode> picked = pickedOwners(p, topVer);
if (picked.isEmpty()) {
top.own(part);
if (cctx.events().isRecordable(EVT_CACHE_REBALANCE_PART_DATA_LOST)) {
DiscoveryEvent discoEvt = exchFut.discoveryEvent();
cctx.events().addPreloadEvent(p, EVT_CACHE_REBALANCE_PART_DATA_LOST, discoEvt.eventNode(), discoEvt.type(), discoEvt.timestamp());
}
if (log.isDebugEnabled())
log.debug("Owning partition as there are no other owners: " + part);
} else {
ClusterNode n = F.rand(picked);
GridDhtPartitionDemandMessage msg = assigns.get(n);
if (msg == null) {
assigns.put(n, msg = new GridDhtPartitionDemandMessage(top.updateSequence(), exchFut.exchangeId().topologyVersion(), cctx.cacheId()));
}
msg.addPartition(p);
}
}
}
return assigns;
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopology in project ignite by apache.
the class IgniteTxHandler method prepareNearTx.
/**
* @param nearNode Node that initiated transaction.
* @param req Near prepare request.
* @return Prepare future or {@code null} if need retry operation.
*/
@Nullable
private IgniteInternalFuture<GridNearTxPrepareResponse> prepareNearTx(final ClusterNode nearNode, final GridNearTxPrepareRequest req) {
IgniteTxEntry firstEntry;
try {
IgniteTxEntry firstWrite = unmarshal(req.writes());
IgniteTxEntry firstRead = unmarshal(req.reads());
firstEntry = firstWrite != null ? firstWrite : firstRead;
} catch (IgniteCheckedException e) {
return new GridFinishedFuture<>(e);
}
assert firstEntry != null : req;
GridDhtTxLocal tx = null;
GridCacheVersion mappedVer = ctx.tm().mappedVersion(req.version());
if (mappedVer != null) {
tx = ctx.tm().tx(mappedVer);
if (tx == null)
U.warn(log, "Missing local transaction for mapped near version [nearVer=" + req.version() + ", mappedVer=" + mappedVer + ']');
else {
if (req.concurrency() == PESSIMISTIC)
tx.nearFutureId(req.futureId());
}
} else {
GridDhtPartitionTopology top = null;
if (req.firstClientRequest()) {
assert req.concurrency() == OPTIMISTIC : req;
assert CU.clientNode(nearNode) : nearNode;
top = firstEntry.context().topology();
top.readLock();
if (req.allowWaitTopologyFuture()) {
GridDhtTopologyFuture topFut = top.topologyVersionFuture();
if (!topFut.isDone()) {
top.readUnlock();
return null;
}
}
}
try {
if (top != null) {
boolean retry = false;
GridDhtTopologyFuture topFut = top.topologyVersionFuture();
if (!req.allowWaitTopologyFuture() && !topFut.isDone()) {
retry = true;
if (txPrepareMsgLog.isDebugEnabled()) {
txPrepareMsgLog.debug("Topology change is in progress, need remap transaction [" + "txId=" + req.version() + ", node=" + nearNode.id() + ", reqTopVer=" + req.topologyVersion() + ", locTopVer=" + top.readyTopologyVersion() + ", req=" + req + ']');
}
}
if (!retry && needRemap(req.topologyVersion(), top.readyTopologyVersion(), req)) {
retry = true;
if (txPrepareMsgLog.isDebugEnabled()) {
txPrepareMsgLog.debug("Topology version mismatch for near prepare, need remap transaction [" + "txId=" + req.version() + ", node=" + nearNode.id() + ", reqTopVer=" + req.topologyVersion() + ", locTopVer=" + top.readyTopologyVersion() + ", req=" + req + ']');
}
}
if (retry) {
GridNearTxPrepareResponse res = new GridNearTxPrepareResponse(req.partition(), req.version(), req.futureId(), req.miniId(), req.version(), req.version(), null, null, top.lastTopologyChangeVersion(), req.onePhaseCommit(), req.deployInfo() != null);
try {
ctx.io().send(nearNode, res, req.policy());
if (txPrepareMsgLog.isDebugEnabled()) {
txPrepareMsgLog.debug("Sent remap response for near prepare [txId=" + req.version() + ", node=" + nearNode.id() + ']');
}
} catch (ClusterTopologyCheckedException ignored) {
if (txPrepareMsgLog.isDebugEnabled()) {
txPrepareMsgLog.debug("Failed to send remap response for near prepare, node failed [" + "txId=" + req.version() + ", node=" + nearNode.id() + ']');
}
} catch (IgniteCheckedException e) {
U.error(txPrepareMsgLog, "Failed to send remap response for near prepare " + "[txId=" + req.version() + ", node=" + nearNode.id() + ", req=" + req + ']', e);
}
return new GridFinishedFuture<>(res);
}
assert topFut.isDone();
}
tx = new GridDhtTxLocal(ctx, req.topologyVersion(), nearNode.id(), req.version(), req.futureId(), req.miniId(), req.threadId(), req.implicitSingle(), req.implicitSingle(), req.system(), req.explicitLock(), req.policy(), req.concurrency(), req.isolation(), req.timeout(), req.isInvalidate(), true, req.onePhaseCommit(), req.txSize(), req.transactionNodes(), req.subjectId(), req.taskNameHash());
tx = ctx.tm().onCreated(null, tx);
if (tx != null)
tx.topologyVersion(req.topologyVersion());
else
U.warn(log, "Failed to create local transaction (was transaction rolled back?) [xid=" + req.version() + ", req=" + req + ']');
} finally {
if (tx != null)
req.txState(tx.txState());
if (top != null)
top.readUnlock();
}
}
if (tx != null) {
req.txState(tx.txState());
if (req.explicitLock())
tx.explicitLock(true);
tx.transactionNodes(req.transactionNodes());
if (req.near())
tx.nearOnOriginatingNode(true);
if (req.onePhaseCommit()) {
assert req.last() : req;
tx.onePhaseCommit(true);
}
if (req.needReturnValue())
tx.needReturnValue(true);
IgniteInternalFuture<GridNearTxPrepareResponse> fut = tx.prepareAsync(req);
if (tx.isRollbackOnly() && !tx.commitOnPrepare()) {
if (tx.state() != TransactionState.ROLLED_BACK && tx.state() != TransactionState.ROLLING_BACK)
tx.rollbackDhtLocalAsync();
}
final GridDhtTxLocal tx0 = tx;
fut.listen(new CI1<IgniteInternalFuture<?>>() {
@Override
public void apply(IgniteInternalFuture<?> txFut) {
try {
txFut.get();
} catch (IgniteCheckedException e) {
// Just in case.
tx0.setRollbackOnly();
if (!X.hasCause(e, IgniteTxOptimisticCheckedException.class) && !X.hasCause(e, IgniteFutureCancelledException.class) && !ctx.kernalContext().isStopping())
U.error(log, "Failed to prepare DHT transaction: " + tx0, e);
}
}
});
return fut;
} else
return new GridFinishedFuture<>((GridNearTxPrepareResponse) null);
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopology in project ignite by apache.
the class GridDhtPartitionDemander method handleSupplyMessage.
/**
* Handles supply message from {@code nodeId} with specified {@code topicId}.
*
* Supply message contains entries to populate rebalancing partitions.
*
* There is a cyclic process:
* Populate rebalancing partitions with entries from Supply message.
* If not all partitions specified in {@link #rebalanceFut} were rebalanced or marked as missed
* send new Demand message to request next batch of entries.
*
* @param topicId Topic id.
* @param nodeId Node id.
* @param supply Supply message.
*/
public void handleSupplyMessage(int topicId, final UUID nodeId, final GridDhtPartitionSupplyMessage supply) {
AffinityTopologyVersion topVer = supply.topologyVersion();
final RebalanceFuture fut = rebalanceFut;
ClusterNode node = ctx.node(nodeId);
if (node == null)
return;
if (// Topology already changed (for the future that supply message based on).
topologyChanged(fut))
return;
if (!fut.isActual(supply.rebalanceId())) {
// Supple message based on another future.
return;
}
if (log.isDebugEnabled())
log.debug("Received supply message [grp=" + grp.cacheOrGroupName() + ", msg=" + supply + ']');
// Check whether there were class loading errors on unmarshal
if (supply.classError() != null) {
U.warn(log, "Rebalancing from node cancelled [grp=" + grp.cacheOrGroupName() + ", node=" + nodeId + "]. Class got undeployed during preloading: " + supply.classError());
fut.cancel(nodeId);
return;
}
final GridDhtPartitionTopology top = grp.topology();
if (grp.sharedGroup()) {
for (GridCacheContext cctx : grp.caches()) {
if (cctx.statisticsEnabled()) {
long keysCnt = supply.keysForCache(cctx.cacheId());
if (keysCnt != -1)
cctx.cache().metrics0().onRebalancingKeysCountEstimateReceived(keysCnt);
// Can not be calculated per cache.
cctx.cache().metrics0().onRebalanceBatchReceived(supply.messageSize());
}
}
} else {
GridCacheContext cctx = grp.singleCacheContext();
if (cctx.statisticsEnabled()) {
if (supply.estimatedKeysCount() != -1)
cctx.cache().metrics0().onRebalancingKeysCountEstimateReceived(supply.estimatedKeysCount());
cctx.cache().metrics0().onRebalanceBatchReceived(supply.messageSize());
}
}
try {
AffinityAssignment aff = grp.affinity().cachedAffinity(topVer);
GridCacheContext cctx = grp.sharedGroup() ? null : grp.singleCacheContext();
// Preload.
for (Map.Entry<Integer, CacheEntryInfoCollection> e : supply.infos().entrySet()) {
int p = e.getKey();
if (aff.get(p).contains(ctx.localNode())) {
GridDhtLocalPartition part = top.localPartition(p, topVer, true);
assert part != null;
boolean last = supply.last().containsKey(p);
if (part.state() == MOVING) {
boolean reserved = part.reserve();
assert reserved : "Failed to reserve partition [igniteInstanceName=" + ctx.igniteInstanceName() + ", grp=" + grp.cacheOrGroupName() + ", part=" + part + ']';
part.lock();
try {
// Loop through all received entries and try to preload them.
for (GridCacheEntryInfo entry : e.getValue().infos()) {
if (!preloadEntry(node, p, entry, topVer)) {
if (log.isDebugEnabled())
log.debug("Got entries for invalid partition during " + "preloading (will skip) [p=" + p + ", entry=" + entry + ']');
break;
}
if (grp.sharedGroup() && (cctx == null || cctx.cacheId() != entry.cacheId()))
cctx = ctx.cacheContext(entry.cacheId());
if (cctx != null && cctx.statisticsEnabled())
cctx.cache().metrics0().onRebalanceKeyReceived();
}
// then we take ownership.
if (last) {
top.own(part);
fut.partitionDone(nodeId, p);
if (log.isDebugEnabled())
log.debug("Finished rebalancing partition: " + part);
}
} finally {
part.unlock();
part.release();
}
} else {
if (last)
fut.partitionDone(nodeId, p);
if (log.isDebugEnabled())
log.debug("Skipping rebalancing partition (state is not MOVING): " + part);
}
} else {
fut.partitionDone(nodeId, p);
if (log.isDebugEnabled())
log.debug("Skipping rebalancing partition (it does not belong on current node): " + p);
}
}
// Only request partitions based on latest topology version.
for (Integer miss : supply.missed()) {
if (aff.get(miss).contains(ctx.localNode()))
fut.partitionMissed(nodeId, miss);
}
for (Integer miss : supply.missed()) fut.partitionDone(nodeId, miss);
GridDhtPartitionDemandMessage d = new GridDhtPartitionDemandMessage(supply.rebalanceId(), supply.topologyVersion(), grp.groupId());
d.timeout(grp.config().getRebalanceTimeout());
d.topic(rebalanceTopics.get(topicId));
if (!topologyChanged(fut) && !fut.isDone()) {
// Send demand message.
try {
ctx.io().sendOrderedMessage(node, rebalanceTopics.get(topicId), d.convertIfNeeded(node.version()), grp.ioPolicy(), grp.config().getRebalanceTimeout());
} catch (ClusterTopologyCheckedException e) {
if (log.isDebugEnabled()) {
log.debug("Node left during rebalancing [grp=" + grp.cacheOrGroupName() + ", node=" + node.id() + ", msg=" + e.getMessage() + ']');
}
}
}
} catch (IgniteSpiException | IgniteCheckedException e) {
LT.error(log, e, "Error during rebalancing [grp=" + grp.cacheOrGroupName() + ", srcNode=" + node.id() + ", err=" + e + ']');
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopology in project ignite by apache.
the class GridDhtPartitionsExchangeFuture method assignPartitionsStates.
/**
*/
private void assignPartitionsStates() {
for (Map.Entry<Integer, CacheGroupDescriptor> e : cctx.affinity().cacheGroups().entrySet()) {
CacheGroupDescriptor grpDesc = e.getValue();
if (grpDesc.config().getCacheMode() == CacheMode.LOCAL)
continue;
if (!CU.isPersistentCache(grpDesc.config(), cctx.gridConfig().getDataStorageConfiguration()))
continue;
CacheGroupContext grpCtx = cctx.cache().cacheGroup(e.getKey());
GridDhtPartitionTopology top = grpCtx != null ? grpCtx.topology() : cctx.exchange().clientTopology(e.getKey(), events().discoveryCache());
assignPartitionStates(top);
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.GridDhtPartitionTopology in project ignite by apache.
the class GridCommonAbstractTest method awaitPartitionMapExchange.
/**
* @param waitEvicts If {@code true} will wait for evictions finished.
* @param waitNode2PartUpdate If {@code true} will wait for nodes node2part info update finished.
* @param nodes Optional nodes. If {@code null} method will wait for all nodes, for non null collection nodes will
* be filtered
* @param printPartState If {@code true} will print partition state if evictions not happened.
* @throws InterruptedException If interrupted.
*/
@SuppressWarnings("BusyWait")
protected void awaitPartitionMapExchange(boolean waitEvicts, boolean waitNode2PartUpdate, @Nullable Collection<ClusterNode> nodes, boolean printPartState) throws InterruptedException {
long timeout = getPartitionMapExchangeTimeout();
long startTime = -1;
Set<String> names = new HashSet<>();
Ignite crd = null;
for (Ignite g : G.allGrids()) {
ClusterNode node = g.cluster().localNode();
if (crd == null || node.order() < crd.cluster().localNode().order()) {
crd = g;
if (node.order() == 1)
break;
}
}
if (crd == null)
return;
AffinityTopologyVersion waitTopVer = ((IgniteKernal) crd).context().discovery().topologyVersionEx();
if (waitTopVer.topologyVersion() <= 0)
waitTopVer = new AffinityTopologyVersion(1, 0);
for (Ignite g : G.allGrids()) {
if (nodes != null && !nodes.contains(g.cluster().localNode()))
continue;
IgniteKernal g0 = (IgniteKernal) g;
names.add(g0.configuration().getIgniteInstanceName());
if (startTime != -1) {
if (startTime != g0.context().discovery().gridStartTime())
fail("Found nodes from different clusters, probable some test does not stop nodes " + "[allNodes=" + names + ']');
} else
startTime = g0.context().discovery().gridStartTime();
if (g.cluster().localNode().isDaemon())
continue;
IgniteInternalFuture<?> exchFut = g0.context().cache().context().exchange().affinityReadyFuture(waitTopVer);
if (exchFut != null && !exchFut.isDone()) {
try {
exchFut.get(timeout);
} catch (IgniteCheckedException e) {
log.error("Failed to wait for exchange [topVer=" + waitTopVer + ", node=" + g0.name() + ']', e);
}
}
for (IgniteCacheProxy<?, ?> c : g0.context().cache().jcaches()) {
CacheConfiguration cfg = c.context().config();
if (cfg == null)
continue;
if (cfg.getCacheMode() != LOCAL && cfg.getRebalanceMode() != NONE && g.cluster().nodes().size() > 1) {
AffinityFunction aff = cfg.getAffinity();
GridDhtCacheAdapter<?, ?> dht = dht(c);
GridDhtPartitionTopology top = dht.topology();
for (int p = 0; p < aff.partitions(); p++) {
long start = 0;
for (int i = 0; ; i++) {
boolean match = false;
GridCachePartitionExchangeManager<?, ?> exchMgr = dht.context().shared().exchange();
AffinityTopologyVersion readyVer = exchMgr.readyAffinityVersion();
// Otherwise, there may be an assertion when printing top.readyTopologyVersion().
try {
IgniteInternalFuture<?> fut = exchMgr.affinityReadyFuture(readyVer);
if (fut != null)
fut.get();
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
}
if (readyVer.topologyVersion() > 0 && c.context().started()) {
// Must map on updated version of topology.
Collection<ClusterNode> affNodes = dht.context().affinity().assignment(readyVer).idealAssignment().get(p);
int affNodesCnt = affNodes.size();
GridDhtTopologyFuture topFut = top.topologyVersionFuture();
Collection<ClusterNode> owners = (topFut != null && topFut.isDone()) ? top.owners(p, AffinityTopologyVersion.NONE) : Collections.<ClusterNode>emptyList();
int ownerNodesCnt = owners.size();
GridDhtLocalPartition loc = top.localPartition(p, readyVer, false);
if (affNodesCnt != ownerNodesCnt || !affNodes.containsAll(owners) || (waitEvicts && loc != null && loc.state() != GridDhtPartitionState.OWNING)) {
LT.warn(log(), "Waiting for topology map update [" + "igniteInstanceName=" + g.name() + ", cache=" + cfg.getName() + ", cacheId=" + dht.context().cacheId() + ", topVer=" + top.readyTopologyVersion() + ", p=" + p + ", affNodesCnt=" + affNodesCnt + ", ownersCnt=" + ownerNodesCnt + ", affNodes=" + F.nodeIds(affNodes) + ", owners=" + F.nodeIds(owners) + ", topFut=" + topFut + ", locNode=" + g.cluster().localNode() + ']');
} else
match = true;
} else {
LT.warn(log(), "Waiting for topology map update [" + "igniteInstanceName=" + g.name() + ", cache=" + cfg.getName() + ", cacheId=" + dht.context().cacheId() + ", topVer=" + top.readyTopologyVersion() + ", started=" + dht.context().started() + ", p=" + p + ", readVer=" + readyVer + ", locNode=" + g.cluster().localNode() + ']');
}
if (!match) {
if (i == 0)
start = System.currentTimeMillis();
if (System.currentTimeMillis() - start > timeout) {
U.dumpThreads(log);
if (printPartState)
printPartitionState(c);
throw new IgniteException("Timeout of waiting for topology map update [" + "igniteInstanceName=" + g.name() + ", cache=" + cfg.getName() + ", cacheId=" + dht.context().cacheId() + ", topVer=" + top.readyTopologyVersion() + ", p=" + p + ", readVer=" + readyVer + ", locNode=" + g.cluster().localNode() + ']');
}
// Busy wait.
Thread.sleep(20);
continue;
}
if (i > 0)
log().warning("Finished waiting for topology map update [igniteInstanceName=" + g.name() + ", p=" + p + ", duration=" + (System.currentTimeMillis() - start) + "ms]");
break;
}
}
if (waitNode2PartUpdate) {
long start = System.currentTimeMillis();
boolean failed = true;
while (failed) {
failed = false;
for (GridDhtPartitionMap pMap : top.partitionMap(true).values()) {
if (failed)
break;
for (Map.Entry entry : pMap.entrySet()) {
if (System.currentTimeMillis() - start > timeout) {
U.dumpThreads(log);
throw new IgniteException("Timeout of waiting for partition state update [" + "igniteInstanceName=" + g.name() + ", cache=" + cfg.getName() + ", cacheId=" + dht.context().cacheId() + ", topVer=" + top.readyTopologyVersion() + ", locNode=" + g.cluster().localNode() + ']');
}
if (entry.getValue() != GridDhtPartitionState.OWNING) {
LT.warn(log(), "Waiting for correct partition state part=" + entry.getKey() + ", should be OWNING [state=" + entry.getValue() + "], node=" + g.name() + ", cache=" + c.getName());
// Busy wait.
Thread.sleep(200);
failed = true;
break;
}
}
}
}
}
}
}
}
log.info("awaitPartitionMapExchange finished");
}
Aggregations