use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.
the class GridNearPessimisticTxPrepareFuture method preparePessimistic.
/**
*/
@SuppressWarnings("unchecked")
private void preparePessimistic() {
// Non-mvcc implicit-single tx goes fast commit way.
assert !tx.implicitSingle() || tx.txState().mvccEnabled();
Map<UUID, GridDistributedTxMapping> mappings = new HashMap<>();
AffinityTopologyVersion topVer = tx.topologyVersion();
boolean hasNearCache = false;
Map<UUID, Collection<UUID>> txNodes;
if (tx.txState().mvccEnabled()) {
Collection<GridDistributedTxMapping> mvccMappings = tx.implicitSingle() ? Collections.singleton(tx.mappings().singleMapping()) : tx.mappings().mappings();
txNodes = new HashMap<>(mvccMappings.size());
for (GridDistributedTxMapping m : mvccMappings) {
mappings.put(m.primary().id(), m);
txNodes.put(m.primary().id(), m.backups());
}
} else {
GridDhtTxMapping txMapping = new GridDhtTxMapping();
for (IgniteTxEntry txEntry : tx.allEntries()) {
txEntry.clearEntryReadVersion();
GridCacheContext cacheCtx = txEntry.context();
if (cacheCtx.isNear())
hasNearCache = true;
List<ClusterNode> nodes;
if (!cacheCtx.isLocal()) {
GridDhtPartitionTopology top = cacheCtx.topology();
nodes = top.nodes(cacheCtx.affinity().partition(txEntry.key()), topVer);
} else
nodes = cacheCtx.affinity().nodesByKey(txEntry.key(), topVer);
if (F.isEmpty(nodes)) {
onDone(new ClusterTopologyServerNotFoundException("Failed to map keys to nodes (partition " + "is not mapped to any node) [key=" + txEntry.key() + ", partition=" + cacheCtx.affinity().partition(txEntry.key()) + ", topVer=" + topVer + ']'));
return;
}
ClusterNode primary = nodes.get(0);
GridDistributedTxMapping nodeMapping = mappings.get(primary.id());
if (nodeMapping == null)
mappings.put(primary.id(), nodeMapping = new GridDistributedTxMapping(primary));
txEntry.nodeId(primary.id());
nodeMapping.add(txEntry);
txMapping.addMapping(nodes);
}
txNodes = txMapping.transactionNodes();
}
tx.transactionNodes(txNodes);
if (!hasNearCache)
checkOnePhase(txNodes);
long timeout = tx.remainingTime();
if (timeout == -1) {
onDone(new IgniteTxTimeoutCheckedException("Transaction timed out and was rolled back: " + tx));
return;
}
int miniId = 0;
for (final GridDistributedTxMapping m : mappings.values()) {
final ClusterNode primary = m.primary();
if (primary.isLocal()) {
if (m.hasNearCacheEntries() && m.hasColocatedCacheEntries()) {
GridNearTxPrepareRequest nearReq = createRequest(txNodes, m, timeout, m.nearEntriesReads(), m.nearEntriesWrites());
prepareLocal(nearReq, m, ++miniId, true);
GridNearTxPrepareRequest colocatedReq = createRequest(txNodes, m, timeout, m.colocatedEntriesReads(), m.colocatedEntriesWrites());
prepareLocal(colocatedReq, m, ++miniId, false);
} else {
GridNearTxPrepareRequest req = createRequest(txNodes, m, timeout, m.reads(), m.writes());
prepareLocal(req, m, ++miniId, m.hasNearCacheEntries());
}
} else {
GridNearTxPrepareRequest req = createRequest(txNodes, m, timeout, m.reads(), m.writes());
final MiniFuture fut = new MiniFuture(m, ++miniId);
req.miniId(fut.futureId());
add((IgniteInternalFuture) fut);
try {
cctx.io().send(primary, req, tx.ioPolicy());
if (msgLog.isDebugEnabled()) {
msgLog.debug("Near pessimistic prepare, sent request [txId=" + tx.nearXidVersion() + ", node=" + primary.id() + ']');
}
} catch (ClusterTopologyCheckedException e) {
e.retryReadyFuture(cctx.nextAffinityReadyFuture(topVer));
fut.onNodeLeft(e);
} catch (IgniteCheckedException e) {
if (msgLog.isDebugEnabled()) {
msgLog.debug("Near pessimistic prepare, failed send request [txId=" + tx.nearXidVersion() + ", node=" + primary.id() + ", err=" + e + ']');
}
fut.onError(e);
break;
}
}
}
markInitialized();
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.
the class TxCrossCacheMapOnInvalidTopologyTest method doTestCrossCacheTxMapOnInvalidTopology.
/**
* Test scenario: cross-cache tx is started when node is left in the middle of rebalance, first cache is rebalanced
* and second is partially rebalanced.
*
* First cache map request will trigger client compatible remap for pessimistic txs,
* second cache map request should use new topology version.
*
* For optimistic tx remap is enforced if more than one mapping in transaction or all enlisted caches have compatible
* assignments.
*
* Success: tx is finished on ideal topology version over all mapped nodes.
*
* @param concurrency Concurrency.
* @param isolation Isolation.
*/
private void doTestCrossCacheTxMapOnInvalidTopology(TransactionConcurrency concurrency, TransactionIsolation isolation) throws Exception {
try {
IgniteEx crd = startGrid(0);
IgniteEx g1 = startGrid(1);
awaitPartitionMapExchange();
IgniteEx client = startClientGrid("client");
assertNotNull(client.cache(CACHE1));
assertNotNull(client.cache(CACHE2));
try (IgniteDataStreamer<Object, Object> streamer = crd.dataStreamer(CACHE1)) {
// Put 500 keys per partition.
for (int k = 0; k < PARTS_CNT * 500; k++) streamer.addData(k, new byte[10]);
}
try (IgniteDataStreamer<Object, Object> streamer = crd.dataStreamer(CACHE2)) {
// Put 500 keys per partition.
for (int k = 0; k < PARTS_CNT * 500; k++) streamer.addData(k, new byte[10]);
}
TestRecordingCommunicationSpi crdSpi = TestRecordingCommunicationSpi.spi(crd);
final AffinityTopologyVersion joinVer = new AffinityTopologyVersion(4, 0);
AffinityTopologyVersion leftVer = new AffinityTopologyVersion(5, 0);
AffinityTopologyVersion idealVer = new AffinityTopologyVersion(5, 1);
AtomicReference<Set<Integer>> full = new AtomicReference<>();
GridConcurrentSkipListSet<Integer> leftVerParts = new GridConcurrentSkipListSet<>();
crdSpi.blockMessages((node, m) -> {
if (m instanceof GridDhtPartitionSupplyMessage) {
GridDhtPartitionSupplyMessage msg = (GridDhtPartitionSupplyMessage) m;
// Allow full rebalance for cache 1 and system cache.
if (msg.groupId() != CU.cacheId(CACHE2))
return false;
// Allow only first batch for cache 2.
if (msg.topologyVersion().equals(joinVer)) {
if (full.get() == null) {
Map<Integer, Long> last = U.field(msg, "last");
full.set(last.keySet());
return false;
}
return true;
}
if (msg.topologyVersion().equals(leftVer)) {
Map<Integer, Long> last = U.field(msg, "last");
leftVerParts.addAll(last.keySet());
return true;
}
} else if (m instanceof GridDhtPartitionsFullMessage) {
GridDhtPartitionsFullMessage msg = (GridDhtPartitionsFullMessage) m;
// Delay full message for ideal topology switch.
GridDhtPartitionExchangeId exchId = msg.exchangeId();
if (exchId != null && exchId.topologyVersion().equals(idealVer))
return true;
}
return false;
});
TestRecordingCommunicationSpi g1Spi = TestRecordingCommunicationSpi.spi(g1);
g1Spi.blockMessages((node, msg) -> {
if (msg instanceof GridDhtPartitionSupplyMessage) {
GridDhtPartitionSupplyMessage m = (GridDhtPartitionSupplyMessage) msg;
return m.groupId() == CU.cacheId(CACHE2);
}
return false;
});
startGrid(2);
crdSpi.waitForBlocked();
g1Spi.waitForBlocked();
// Wait partial owning.
assertTrue("Timed out while waiting for rebalance", GridTestUtils.waitForCondition(() -> {
// Await full rebalance for cache 2.
GridDhtPartitionTopology top0 = grid(2).cachex(CACHE1).context().topology();
for (int p = 0; p < PARTS_CNT; p++) {
if (top0.localPartition(p).state() != OWNING)
return false;
}
// Await partial rebalance for cache 1.
GridDhtPartitionTopology top1 = grid(2).cachex(CACHE2).context().topology();
for (Integer part : full.get()) {
if (top1.localPartition(part).state() != OWNING)
return false;
}
return true;
}, 10_000));
// At this point cache 1 is fully rebalanced and cache 2 is partially rebalanced.
// Stop supplier in the middle of rebalance.
g1.close();
// Wait for topologies and calculate required partitions.
grid(0).cachex(CACHE1).context().affinity().affinityReadyFuture(leftVer).get();
grid(2).cachex(CACHE1).context().affinity().affinityReadyFuture(leftVer).get();
grid(0).cachex(CACHE2).context().affinity().affinityReadyFuture(leftVer).get();
grid(2).cachex(CACHE2).context().affinity().affinityReadyFuture(leftVer).get();
AffinityAssignment assignment0 = grid(0).cachex(CACHE1).context().affinity().assignment(leftVer);
AffinityAssignment assignment = grid(0).cachex(CACHE2).context().affinity().assignment(leftVer);
// Search for a partition with incompatible assignment.
// Partition for cache1 which is mapped for both late and ideal topologies to the same primary.
int stablePart = -1;
// Partition for cache2 which is mapped for both late and ideal topologies on different primaries.
int movingPart = -1;
for (int p = 0; p < assignment0.assignment().size(); p++) {
List<ClusterNode> curr = assignment.assignment().get(p);
List<ClusterNode> ideal = assignment.idealAssignment().get(p);
if (curr.equals(ideal) && curr.get(0).order() == 1) {
stablePart = p;
break;
}
}
assertFalse(stablePart == -1);
for (int p = 0; p < assignment.assignment().size(); p++) {
List<ClusterNode> curr = assignment.assignment().get(p);
List<ClusterNode> ideal = assignment.idealAssignment().get(p);
if (!curr.equals(ideal) && curr.get(0).order() == 1) {
movingPart = p;
break;
}
}
assertFalse(movingPart == -1);
TestRecordingCommunicationSpi.spi(client).blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {
@Override
public boolean apply(ClusterNode node, Message msg) {
if (concurrency == PESSIMISTIC)
return msg instanceof GridNearLockRequest;
else
return msg instanceof GridNearTxPrepareRequest;
}
});
final int finalStablePart = stablePart;
final int finalMovingPart = movingPart;
IgniteInternalFuture<?> txFut = multithreadedAsync(() -> {
try (Transaction tx = client.transactions().txStart(concurrency, isolation)) {
// Will map on crd(order=1).
client.cache(CACHE1).put(finalStablePart, 0);
// Next request will remap to ideal topology, but it's not ready on other node except crd.
client.cache(CACHE2).put(finalMovingPart, 0);
tx.commit();
}
}, 1, "tx-thread");
// Wait until all missing supply messages are blocked.
assertTrue(GridTestUtils.waitForCondition(() -> leftVerParts.size() == PARTS_CNT - full.get().size(), 5_000));
// Delay first lock request on late topology.
TestRecordingCommunicationSpi.spi(client).waitForBlocked();
// At this point only supply messages should be blocked.
// Unblock to continue rebalance and trigger ideal topology switch.
crdSpi.stopBlock(true, null, false, true);
// Wait until ideal topology is ready on crd.
crd.context().cache().context().exchange().affinityReadyFuture(idealVer).get(10_000);
// Other node must wait for full message.
assertFalse(GridTestUtils.waitForCondition(() -> grid(2).context().cache().context().exchange().affinityReadyFuture(idealVer).isDone(), 1_000));
// Map on unstable topology (PME is in progress on other node).
TestRecordingCommunicationSpi.spi(client).stopBlock();
// Capture local transaction.
IgniteInternalTx tx0 = client.context().cache().context().tm().activeTransactions().iterator().next();
// Expected behavior: tx must hang (both pessimistic and optimistic) because topology is not ready.
try {
txFut.get(3_000);
fail("TX must not complete");
} catch (IgniteFutureTimeoutCheckedException e) {
// Expected.
}
crdSpi.stopBlock();
txFut.get();
// Check transaction map version. Should be mapped on ideal topology.
assertEquals(tx0.topologyVersionSnapshot(), idealVer);
awaitPartitionMapExchange();
checkFutures();
} finally {
stopAllGrids();
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.
the class IgniteCacheClientNodePartitionsExchangeTest method waitForTopologyUpdate.
/**
* @param expNodes Expected number of nodes.
* @param topVer Expected topology version.
* @throws Exception If failed.
*/
private void waitForTopologyUpdate(int expNodes, final AffinityTopologyVersion topVer) throws Exception {
List<Ignite> nodes = G.allGrids();
assertEquals(expNodes, nodes.size());
for (Ignite ignite : nodes) {
final IgniteKernal kernal = (IgniteKernal) ignite;
GridTestUtils.waitForCondition(new GridAbsPredicate() {
@Override
public boolean apply() {
return topVer.equals(kernal.context().cache().context().exchange().readyAffinityVersion());
}
}, 10_000);
assertEquals("Unexpected affinity version for " + ignite.name(), topVer, kernal.context().cache().context().exchange().readyAffinityVersion());
}
Iterator<Ignite> it = nodes.iterator();
Ignite ignite0 = it.next();
Affinity<Integer> aff0 = ignite0.affinity(DEFAULT_CACHE_NAME);
while (it.hasNext()) {
Ignite ignite = it.next();
Affinity<Integer> aff = ignite.affinity(DEFAULT_CACHE_NAME);
assertEquals(aff0.partitions(), aff.partitions());
for (int part = 0; part < aff.partitions(); part++) assertEquals(aff0.mapPartitionToPrimaryAndBackups(part), aff.mapPartitionToPrimaryAndBackups(part));
}
for (Ignite ignite : nodes) {
final IgniteKernal kernal = (IgniteKernal) ignite;
for (IgniteInternalCache cache : kernal.context().cache().caches()) {
GridDhtPartitionTopology top = cache.context().topology();
waitForReadyTopology(top, topVer);
assertEquals("Unexpected topology version [node=" + ignite.name() + ", cache=" + cache.name() + ']', topVer, top.readyTopologyVersion());
}
}
awaitPartitionMapExchange();
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.
the class IgnitePdsPartitionsStateRecoveryTest method testPartitionsStateConsistencyAfterRecoveryNoCheckpoints.
/**
* Test checks that partition state is recovered properly if only logical updates exist.
*
* @throws Exception If failed.
*/
@Test
public void testPartitionsStateConsistencyAfterRecoveryNoCheckpoints() throws Exception {
Assume.assumeFalse("https://issues.apache.org/jira/browse/IGNITE-10603", MvccFeatureChecker.forcedMvcc());
IgniteEx ignite = startGrid(0);
ignite.cluster().active(true);
IgniteCache<Object, Object> cache = ignite.cache(DEFAULT_CACHE_NAME);
forceCheckpoint();
for (int key = 0; key < 4096; key++) {
int[] payload = new int[4096];
Arrays.fill(payload, key);
cache.put(key, payload);
}
GridDhtPartitionTopology topology = ignite.cachex(DEFAULT_CACHE_NAME).context().topology();
Assert.assertFalse(topology.hasMovingPartitions());
log.info("Stopping grid...");
stopGrid(0);
ignite = startGrid(0);
awaitPartitionMapExchange();
topology = ignite.cachex(DEFAULT_CACHE_NAME).context().topology();
Assert.assertFalse("Node restored moving partitions after join to topology.", topology.hasMovingPartitions());
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology in project ignite by apache.
the class IgnitePdsPartitionsStateRecoveryTest method testPartitionsStateConsistencyAfterRecovery.
/**
* Test checks that partition state is recovered properly if last checkpoint was skipped and there are logical updates to apply.
*
* @throws Exception If failed.
*/
@Test
public void testPartitionsStateConsistencyAfterRecovery() throws Exception {
IgniteEx ignite = startGrid(0);
ignite.cluster().active(true);
IgniteCache<Object, Object> cache = ignite.cache(DEFAULT_CACHE_NAME);
for (int key = 0; key < 4096; key++) cache.put(key, key);
forceCheckpoint();
for (int key = 0; key < 4096; key++) {
int[] payload = new int[4096];
Arrays.fill(payload, key);
cache.put(key, payload);
}
GridDhtPartitionTopology topology = ignite.cachex(DEFAULT_CACHE_NAME).context().topology();
Assert.assertFalse(topology.hasMovingPartitions());
log.info("Stopping grid...");
stopGrid(0);
ignite = startGrid(0);
awaitPartitionMapExchange();
topology = ignite.cachex(DEFAULT_CACHE_NAME).context().topology();
Assert.assertFalse("Node restored moving partitions after join to topology.", topology.hasMovingPartitions());
}
Aggregations