use of org.apache.ignite.internal.processors.cache.distributed.near.GridNearLockRequest in project ignite by apache.
the class GridDhtColocatedLockFuture method proceedMapping0.
/**
* Gets next near lock mapping and either acquires dht locks locally or sends near lock request to
* remote primary node.
*
* @throws IgniteCheckedException If mapping can not be completed.
*/
private void proceedMapping0() throws IgniteCheckedException {
GridNearLockMapping map;
// Fail fast if future is completed (in case of async rollback)
if (isDone()) {
clear();
return;
}
// Fail fast if the transaction is timed out.
if (tx != null && tx.remainingTime() == -1) {
GridDhtColocatedLockFuture.this.onDone(false, tx.timeoutException());
clear();
return;
}
synchronized (this) {
map = mappings.poll();
}
// If there are no more mappings to process or prepare has timed out, complete the future.
if (map == null)
return;
final GridNearLockRequest req = map.request();
final Collection<KeyCacheObject> mappedKeys = map.distributedKeys();
final ClusterNode node = map.node();
if (filter != null && filter.length != 0)
req.filter(filter, cctx);
if (node.isLocal())
lockLocally(mappedKeys, req.topologyVersion());
else {
final MiniFuture fut = new MiniFuture(node, mappedKeys, ++miniId);
req.miniId(fut.futureId());
// Append new future.
add(fut);
try {
cctx.io().send(node, req, cctx.ioPolicy());
if (msgLog.isDebugEnabled()) {
msgLog.debug("Collocated lock fut, sent request [txId=" + lockVer + ", inTx=" + inTx() + ", node=" + node.id() + ']');
}
} catch (ClusterTopologyCheckedException ex) {
assert fut != null;
fut.onResult(ex);
}
}
}
use of org.apache.ignite.internal.processors.cache.distributed.near.GridNearLockRequest in project ignite by apache.
the class GridDhtColocatedLockFuture method map0.
/**
* @param keys Keys to map.
* @param remap Remap flag.
* @param topLocked Topology locked flag.
* @throws IgniteCheckedException If mapping failed.
*/
private synchronized void map0(Collection<KeyCacheObject> keys, boolean remap, boolean topLocked) throws IgniteCheckedException {
try {
AffinityTopologyVersion topVer = this.topVer;
assert topVer != null;
assert topVer.topologyVersion() > 0;
if (CU.affinityNodes(cctx, topVer).isEmpty()) {
onDone(new ClusterTopologyServerNotFoundException("Failed to map keys for cache " + "(all partition nodes left the grid): " + cctx.name()));
return;
}
boolean clientNode = cctx.kernalContext().clientNode();
assert !remap || (clientNode && (tx == null || !tx.hasRemoteLocks()));
// First assume this node is primary for all keys passed in.
if (!clientNode && mapAsPrimary(keys, topVer))
return;
mappings = new ArrayDeque<>();
// Assign keys to primary nodes.
GridNearLockMapping map = null;
for (KeyCacheObject key : keys) {
GridNearLockMapping updated = map(key, map, topVer);
// If new mapping was created, add to collection.
if (updated != map) {
mappings.add(updated);
if (tx != null && updated.node().isLocal())
tx.colocatedLocallyMapped(true);
}
map = updated;
}
if (isDone()) {
if (log.isDebugEnabled())
log.debug("Abandoning (re)map because future is done: " + this);
return;
}
if (log.isDebugEnabled())
log.debug("Starting (re)map for mappings [mappings=" + mappings + ", fut=" + this + ']');
boolean hasRmtNodes = false;
boolean first = true;
// Create mini futures.
for (Iterator<GridNearLockMapping> iter = mappings.iterator(); iter.hasNext(); ) {
GridNearLockMapping mapping = iter.next();
ClusterNode node = mapping.node();
Collection<KeyCacheObject> mappedKeys = mapping.mappedKeys();
boolean loc = node.equals(cctx.localNode());
assert !mappedKeys.isEmpty();
GridNearLockRequest req = null;
Collection<KeyCacheObject> distributedKeys = new ArrayList<>(mappedKeys.size());
for (KeyCacheObject key : mappedKeys) {
IgniteTxKey txKey = cctx.txKey(key);
GridDistributedCacheEntry entry = null;
if (tx != null) {
IgniteTxEntry txEntry = tx.entry(txKey);
if (txEntry != null) {
entry = (GridDistributedCacheEntry) txEntry.cached();
if (entry != null && loc == entry.detached()) {
entry = cctx.colocated().entryExx(key, topVer, true);
txEntry.cached(entry);
}
}
}
boolean explicit;
while (true) {
try {
if (entry == null)
entry = cctx.colocated().entryExx(key, topVer, true);
if (!cctx.isAll(entry, filter)) {
if (log.isDebugEnabled())
log.debug("Entry being locked did not pass filter (will not lock): " + entry);
onComplete(false, false);
return;
}
assert loc ^ entry.detached() : "Invalid entry [loc=" + loc + ", entry=" + entry + ']';
GridCacheMvccCandidate cand = addEntry(entry);
// Will either return value from dht cache or null if this is a miss.
IgniteBiTuple<GridCacheVersion, CacheObject> val = entry.detached() ? null : ((GridDhtCacheEntry) entry).versionedValue(topVer);
GridCacheVersion dhtVer = null;
if (val != null) {
dhtVer = val.get1();
valMap.put(key, val);
}
if (cand != null && !cand.reentry()) {
if (req == null) {
boolean clientFirst = false;
if (first) {
clientFirst = clientNode && !topLocked && (tx == null || !tx.hasRemoteLocks());
first = false;
}
assert !implicitTx() && !implicitSingleTx() : tx;
req = new GridNearLockRequest(cctx.cacheId(), topVer, cctx.nodeId(), threadId, futId, lockVer, inTx(), read, retval, isolation(), isInvalidate(), timeout, mappedKeys.size(), inTx() ? tx.size() : mappedKeys.size(), inTx() && tx.syncMode() == FULL_SYNC, inTx() ? tx.taskNameHash() : 0, read ? createTtl : -1L, read ? accessTtl : -1L, skipStore, keepBinary, clientFirst, false, cctx.deploymentEnabled(), inTx() ? tx.label() : null);
mapping.request(req);
}
distributedKeys.add(key);
if (tx != null)
tx.addKeyMapping(txKey, mapping.node());
req.addKeyBytes(key, retval, // Include DHT version to match remote DHT entry.
dhtVer, cctx);
}
explicit = inTx() && cand == null;
if (explicit)
tx.addKeyMapping(txKey, mapping.node());
break;
} catch (GridCacheEntryRemovedException ignored) {
if (log.isDebugEnabled())
log.debug("Got removed entry in lockAsync(..) method (will retry): " + entry);
entry = null;
}
}
// Mark mapping explicit lock flag.
if (explicit) {
boolean marked = tx != null && tx.markExplicit(node.id());
assert tx == null || marked;
}
}
if (!distributedKeys.isEmpty()) {
mapping.distributedKeys(distributedKeys);
hasRmtNodes |= !mapping.node().isLocal();
} else {
assert mapping.request() == null;
iter.remove();
}
}
} finally {
/**
* Notify ready {@link mappings} waiters. See {@link #cancel()}
*/
if (tx != null) {
mappingsReady = true;
notifyAll();
}
}
proceedMapping();
}
use of org.apache.ignite.internal.processors.cache.distributed.near.GridNearLockRequest in project ignite by apache.
the class IgniteCacheNearLockValueSelfTest method testDhtVersion.
/**
* @throws Exception If failed.
*/
@Test
public void testDhtVersion() throws Exception {
CacheConfiguration<Object, Object> pCfg = new CacheConfiguration<>("partitioned");
pCfg.setAtomicityMode(CacheAtomicityMode.TRANSACTIONAL);
try (IgniteCache<Object, Object> cache = ignite(0).getOrCreateCache(pCfg, new NearCacheConfiguration<>())) {
cache.put("key1", "val1");
for (int i = 0; i < 3; i++) {
try (Transaction tx = ignite(0).transactions().txStart(PESSIMISTIC, REPEATABLE_READ)) {
cache.get("key1");
tx.commit();
}
TestRecordingCommunicationSpi comm = (TestRecordingCommunicationSpi) ignite(0).configuration().getCommunicationSpi();
Collection<GridNearLockRequest> reqs = (Collection) comm.recordedMessages(false);
assertEquals(1, reqs.size());
GridCacheAdapter<Object, Object> primary = ((IgniteKernal) grid(1)).internalCache("partitioned");
GridCacheEntryEx dhtEntry = primary.peekEx(primary.context().toCacheKeyObject("key1"));
assertNotNull(dhtEntry);
GridNearLockRequest req = reqs.iterator().next();
assertEquals(dhtEntry.version(), req.dhtVersion(0));
// Check entry version in near cache after commit.
GridCacheAdapter<Object, Object> near = ((IgniteKernal) grid(0)).internalCache("partitioned");
GridNearCacheEntry nearEntry = (GridNearCacheEntry) near.peekEx(near.context().toCacheKeyObject("key1"));
assertNotNull(nearEntry);
assertEquals(dhtEntry.version(), nearEntry.dhtVersion());
}
}
}
use of org.apache.ignite.internal.processors.cache.distributed.near.GridNearLockRequest in project ignite by apache.
the class TxPartitionCounterStateConsistencyTest method testPartitionConsistencyDuringRebalanceAndConcurrentUpdates_LateAffinitySwitch.
/**
* Tests tx load concurrently with PME for switching late affinity.
* <p>
* Scenario: two keys tx mapped locally on late affinity topology and when mapped and prepared remotely on ideal
* topology, first key is mapped to non-moving partition, second is mapped on moving partition.
* <p>
* Success: key over moving partition is prepared on new owner (choosed after late affinity switch),
* otherwise it's possible txs are prepared on different primaries after late affinity switch.
*/
@Test
public void testPartitionConsistencyDuringRebalanceAndConcurrentUpdates_LateAffinitySwitch() throws Exception {
backups = 1;
customDiscoSpi = new BlockTcpDiscoverySpi().setIpFinder(IP_FINDER);
Field rndAddrsField = U.findField(BlockTcpDiscoverySpi.class, "skipAddrsRandomization");
assertNotNull(rndAddrsField);
rndAddrsField.set(customDiscoSpi, true);
// Start coordinator with custom discovery SPI.
IgniteEx crd = startGrid(0);
IgniteEx g1 = startGrid(1);
startGrid(2);
crd.cluster().baselineAutoAdjustEnabled(false);
crd.cluster().active(true);
// Same name pattern as in test configuration.
String consistentId = "node" + getTestIgniteInstanceName(3);
List<Integer> g1Keys = primaryKeys(g1.cache(DEFAULT_CACHE_NAME), 10);
List<Integer> movingFromG1 = movingKeysAfterJoin(g1, DEFAULT_CACHE_NAME, 10, null, consistentId);
// Retain only stable keys;
g1Keys.removeAll(movingFromG1);
// The key will move from grid0 to grid3.
Integer key = movingKeysAfterJoin(crd, DEFAULT_CACHE_NAME, 1, null, consistentId).get(0);
IgniteEx g3 = startGrid(3);
assertEquals(consistentId, g3.localNode().consistentId());
resetBaselineTopology();
awaitPartitionMapExchange();
assertTrue(crd.affinity(DEFAULT_CACHE_NAME).isPrimary(g1.localNode(), g1Keys.get(0)));
stopGrid(3);
Ignite client = startClientGrid(CLIENT_GRID_NAME);
IgniteCache<Object, Object> cache = client.cache(DEFAULT_CACHE_NAME);
IgniteCache<Object, Object> cache2 = client.getOrCreateCache(cacheConfiguration(DEFAULT_CACHE_NAME + "2"));
// Put one key per partition.
for (int k = 0; k < partitions(); k++) {
cache.put(k, 0);
cache2.put(k, 0);
}
CountDownLatch resumeDiscoSndLatch = new CountDownLatch(1);
BlockTcpDiscoverySpi crdDiscoSpi = (BlockTcpDiscoverySpi) grid(0).configuration().getDiscoverySpi();
CyclicBarrier sync = new CyclicBarrier(2);
crdDiscoSpi.setClosure((node, msg) -> {
if (msg instanceof CacheAffinityChangeMessage) {
U.awaitQuiet(sync);
U.awaitQuiet(resumeDiscoSndLatch);
}
return null;
});
// Locks mapped wait.
IgniteInternalFuture fut = GridTestUtils.runAsync(() -> {
try {
startGrid(SERVER_NODES);
awaitPartitionMapExchange();
} catch (Exception e) {
fail(X.getFullStackTrace(e));
}
});
sync.await();
TestRecordingCommunicationSpi clientSpi = TestRecordingCommunicationSpi.spi(client);
clientSpi.blockMessages((node, msg) -> msg instanceof GridNearLockRequest);
IgniteInternalFuture txFut = GridTestUtils.runAsync(() -> {
try (Transaction tx = client.transactions().txStart()) {
Map<Integer, Integer> map = new LinkedHashMap<>();
// clientFirst=true in lockAll mapped to stable part.
map.put(g1Keys.get(0), g1Keys.get(0));
// clientFirst=false in lockAll mapped to moving part.
map.put(key, key);
cache.putAll(map);
cache2.putAll(new LinkedHashMap<>(map));
// Will start preparing in the middle of PME.
tx.commit();
}
});
IgniteInternalFuture lockFut = GridTestUtils.runAsync(() -> {
try {
// Wait for first lock request sent on local (late) topology.
clientSpi.waitForBlocked();
// Continue late switch PME.
resumeDiscoSndLatch.countDown();
crdDiscoSpi.setClosure(null);
// Wait late affinity switch.
awaitPartitionMapExchange();
// Continue tx mapping and preparing.
clientSpi.stopBlock();
} catch (InterruptedException e) {
fail(X.getFullStackTrace(e));
}
});
fut.get();
txFut.get();
lockFut.get();
assertPartitionsSame(idleVerify(crd, DEFAULT_CACHE_NAME));
// TX must be prepared over new owner.
PartitionUpdateCounter cntr = counter(key, grid(3).name());
assertNotNull(cntr);
assertEquals(cntr.toString(), 2, cntr.reserved());
PartitionUpdateCounter cntr2 = counter(key, DEFAULT_CACHE_NAME + "2", grid(3).name());
assertNotNull(cntr2);
assertEquals(cntr2.toString(), 2, cntr2.reserved());
}
use of org.apache.ignite.internal.processors.cache.distributed.near.GridNearLockRequest in project ignite by apache.
the class TxCrossCacheMapOnInvalidTopologyTest method doTestCrossCacheTxMapOnInvalidTopology.
/**
* Test scenario: cross-cache tx is started when node is left in the middle of rebalance, first cache is rebalanced
* and second is partially rebalanced.
*
* First cache map request will trigger client compatible remap for pessimistic txs,
* second cache map request should use new topology version.
*
* For optimistic tx remap is enforced if more than one mapping in transaction or all enlisted caches have compatible
* assignments.
*
* Success: tx is finished on ideal topology version over all mapped nodes.
*
* @param concurrency Concurrency.
* @param isolation Isolation.
*/
private void doTestCrossCacheTxMapOnInvalidTopology(TransactionConcurrency concurrency, TransactionIsolation isolation) throws Exception {
try {
IgniteEx crd = startGrid(0);
IgniteEx g1 = startGrid(1);
awaitPartitionMapExchange();
IgniteEx client = startClientGrid("client");
assertNotNull(client.cache(CACHE1));
assertNotNull(client.cache(CACHE2));
try (IgniteDataStreamer<Object, Object> streamer = crd.dataStreamer(CACHE1)) {
// Put 500 keys per partition.
for (int k = 0; k < PARTS_CNT * 500; k++) streamer.addData(k, new byte[10]);
}
try (IgniteDataStreamer<Object, Object> streamer = crd.dataStreamer(CACHE2)) {
// Put 500 keys per partition.
for (int k = 0; k < PARTS_CNT * 500; k++) streamer.addData(k, new byte[10]);
}
TestRecordingCommunicationSpi crdSpi = TestRecordingCommunicationSpi.spi(crd);
final AffinityTopologyVersion joinVer = new AffinityTopologyVersion(4, 0);
AffinityTopologyVersion leftVer = new AffinityTopologyVersion(5, 0);
AffinityTopologyVersion idealVer = new AffinityTopologyVersion(5, 1);
AtomicReference<Set<Integer>> full = new AtomicReference<>();
GridConcurrentSkipListSet<Integer> leftVerParts = new GridConcurrentSkipListSet<>();
crdSpi.blockMessages((node, m) -> {
if (m instanceof GridDhtPartitionSupplyMessage) {
GridDhtPartitionSupplyMessage msg = (GridDhtPartitionSupplyMessage) m;
// Allow full rebalance for cache 1 and system cache.
if (msg.groupId() != CU.cacheId(CACHE2))
return false;
// Allow only first batch for cache 2.
if (msg.topologyVersion().equals(joinVer)) {
if (full.get() == null) {
Map<Integer, Long> last = U.field(msg, "last");
full.set(last.keySet());
return false;
}
return true;
}
if (msg.topologyVersion().equals(leftVer)) {
Map<Integer, Long> last = U.field(msg, "last");
leftVerParts.addAll(last.keySet());
return true;
}
} else if (m instanceof GridDhtPartitionsFullMessage) {
GridDhtPartitionsFullMessage msg = (GridDhtPartitionsFullMessage) m;
// Delay full message for ideal topology switch.
GridDhtPartitionExchangeId exchId = msg.exchangeId();
if (exchId != null && exchId.topologyVersion().equals(idealVer))
return true;
}
return false;
});
TestRecordingCommunicationSpi g1Spi = TestRecordingCommunicationSpi.spi(g1);
g1Spi.blockMessages((node, msg) -> {
if (msg instanceof GridDhtPartitionSupplyMessage) {
GridDhtPartitionSupplyMessage m = (GridDhtPartitionSupplyMessage) msg;
return m.groupId() == CU.cacheId(CACHE2);
}
return false;
});
startGrid(2);
crdSpi.waitForBlocked();
g1Spi.waitForBlocked();
// Wait partial owning.
assertTrue("Timed out while waiting for rebalance", GridTestUtils.waitForCondition(() -> {
// Await full rebalance for cache 2.
GridDhtPartitionTopology top0 = grid(2).cachex(CACHE1).context().topology();
for (int p = 0; p < PARTS_CNT; p++) {
if (top0.localPartition(p).state() != OWNING)
return false;
}
// Await partial rebalance for cache 1.
GridDhtPartitionTopology top1 = grid(2).cachex(CACHE2).context().topology();
for (Integer part : full.get()) {
if (top1.localPartition(part).state() != OWNING)
return false;
}
return true;
}, 10_000));
// At this point cache 1 is fully rebalanced and cache 2 is partially rebalanced.
// Stop supplier in the middle of rebalance.
g1.close();
// Wait for topologies and calculate required partitions.
grid(0).cachex(CACHE1).context().affinity().affinityReadyFuture(leftVer).get();
grid(2).cachex(CACHE1).context().affinity().affinityReadyFuture(leftVer).get();
grid(0).cachex(CACHE2).context().affinity().affinityReadyFuture(leftVer).get();
grid(2).cachex(CACHE2).context().affinity().affinityReadyFuture(leftVer).get();
AffinityAssignment assignment0 = grid(0).cachex(CACHE1).context().affinity().assignment(leftVer);
AffinityAssignment assignment = grid(0).cachex(CACHE2).context().affinity().assignment(leftVer);
// Search for a partition with incompatible assignment.
// Partition for cache1 which is mapped for both late and ideal topologies to the same primary.
int stablePart = -1;
// Partition for cache2 which is mapped for both late and ideal topologies on different primaries.
int movingPart = -1;
for (int p = 0; p < assignment0.assignment().size(); p++) {
List<ClusterNode> curr = assignment.assignment().get(p);
List<ClusterNode> ideal = assignment.idealAssignment().get(p);
if (curr.equals(ideal) && curr.get(0).order() == 1) {
stablePart = p;
break;
}
}
assertFalse(stablePart == -1);
for (int p = 0; p < assignment.assignment().size(); p++) {
List<ClusterNode> curr = assignment.assignment().get(p);
List<ClusterNode> ideal = assignment.idealAssignment().get(p);
if (!curr.equals(ideal) && curr.get(0).order() == 1) {
movingPart = p;
break;
}
}
assertFalse(movingPart == -1);
TestRecordingCommunicationSpi.spi(client).blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {
@Override
public boolean apply(ClusterNode node, Message msg) {
if (concurrency == PESSIMISTIC)
return msg instanceof GridNearLockRequest;
else
return msg instanceof GridNearTxPrepareRequest;
}
});
final int finalStablePart = stablePart;
final int finalMovingPart = movingPart;
IgniteInternalFuture<?> txFut = multithreadedAsync(() -> {
try (Transaction tx = client.transactions().txStart(concurrency, isolation)) {
// Will map on crd(order=1).
client.cache(CACHE1).put(finalStablePart, 0);
// Next request will remap to ideal topology, but it's not ready on other node except crd.
client.cache(CACHE2).put(finalMovingPart, 0);
tx.commit();
}
}, 1, "tx-thread");
// Wait until all missing supply messages are blocked.
assertTrue(GridTestUtils.waitForCondition(() -> leftVerParts.size() == PARTS_CNT - full.get().size(), 5_000));
// Delay first lock request on late topology.
TestRecordingCommunicationSpi.spi(client).waitForBlocked();
// At this point only supply messages should be blocked.
// Unblock to continue rebalance and trigger ideal topology switch.
crdSpi.stopBlock(true, null, false, true);
// Wait until ideal topology is ready on crd.
crd.context().cache().context().exchange().affinityReadyFuture(idealVer).get(10_000);
// Other node must wait for full message.
assertFalse(GridTestUtils.waitForCondition(() -> grid(2).context().cache().context().exchange().affinityReadyFuture(idealVer).isDone(), 1_000));
// Map on unstable topology (PME is in progress on other node).
TestRecordingCommunicationSpi.spi(client).stopBlock();
// Capture local transaction.
IgniteInternalTx tx0 = client.context().cache().context().tm().activeTransactions().iterator().next();
// Expected behavior: tx must hang (both pessimistic and optimistic) because topology is not ready.
try {
txFut.get(3_000);
fail("TX must not complete");
} catch (IgniteFutureTimeoutCheckedException e) {
// Expected.
}
crdSpi.stopBlock();
txFut.get();
// Check transaction map version. Should be mapped on ideal topology.
assertEquals(tx0.topologyVersionSnapshot(), idealVer);
awaitPartitionMapExchange();
checkFutures();
} finally {
stopAllGrids();
}
}
Aggregations