use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl in project ignite by apache.
the class CacheRentingStateRepairTest method testRebalanceRentingPartitionAndNodeJoin.
/**
* @param client {@code True} for client node join.
* @param delay Delay.
*
* @throws Exception if failed.
*/
private void testRebalanceRentingPartitionAndNodeJoin(boolean client, long delay) throws Exception {
try {
IgniteEx g0 = startGrids(2);
g0.cluster().baselineAutoAdjustEnabled(false);
g0.cluster().active(true);
awaitPartitionMapExchange();
List<Integer> parts = evictingPartitionsAfterJoin(g0, g0.cache(DEFAULT_CACHE_NAME), 20);
int delayEvictPart = parts.get(0);
List<Integer> keys = partitionKeys(g0.cache(DEFAULT_CACHE_NAME), delayEvictPart, 2_000, 0);
for (Integer key : keys) g0.cache(DEFAULT_CACHE_NAME).put(key, key);
GridDhtPartitionTopologyImpl top = (GridDhtPartitionTopologyImpl) dht(g0.cache(DEFAULT_CACHE_NAME)).topology();
GridDhtLocalPartition part = top.localPartition(delayEvictPart);
assertNotNull(part);
// Wait for eviction. Same could be achieved by calling awaitPartitionMapExchange(true, true, null, true);
part.reserve();
startGrid(2);
resetBaselineTopology();
part.release();
part.rent().get();
CountDownLatch l1 = new CountDownLatch(1);
CountDownLatch l2 = new CountDownLatch(1);
// Create race between processing of final supply message and partition clearing.
// Evicted partition will be recreated using supplied factory.
top.partitionFactory((ctx, grp, id, recovery) -> id != delayEvictPart ? new GridDhtLocalPartition(ctx, grp, id, recovery) : new GridDhtLocalPartition(ctx, grp, id, recovery) {
@Override
public void beforeApplyBatch(boolean last) {
if (last) {
l1.countDown();
U.awaitQuiet(l2);
if (// Delay rebalance finish to enforce race with clearing.
delay > 0)
doSleep(delay);
}
}
});
stopGrid(2);
// Trigger rebalance for delayEvictPart after eviction.
resetBaselineTopology();
IgniteInternalFuture<?> fut = multithreadedAsync(new Runnable() {
@Override
public void run() {
try {
l1.await();
// Trigger partition clear on next topology version.
if (client)
startClientGrid(CLIENT);
else
startGrid(2);
// Finish partition rebalance after initiating clear.
l2.countDown();
} catch (Exception e) {
fail(X.getFullStackTrace(e));
}
}
}, 1);
fut.get();
awaitPartitionMapExchange(true, true, null, true);
assertPartitionsSame(idleVerify(g0));
} finally {
stopAllGrids();
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl in project ignite by apache.
the class GridCacheRebalancingPartitionCountersTest method checkUpdCounter.
/**
*/
private void checkUpdCounter(IgniteEx ignite, List<String> issues, HashMap<Integer, Long> partMap) {
final CacheGroupContext grpCtx = ignite.context().cache().cacheGroup(CU.cacheId(CACHE_NAME));
assertNotNull(grpCtx);
GridDhtPartitionTopologyImpl top = (GridDhtPartitionTopologyImpl) grpCtx.topology();
List<GridDhtLocalPartition> locParts = top.localPartitions();
for (GridDhtLocalPartition part : locParts) {
Long cnt = partMap.get(part.id());
if (cnt == null)
partMap.put(part.id(), part.updateCounter());
if ((cnt != null && part.updateCounter() != cnt) || part.updateCounter() == 0)
issues.add("Node name " + ignite.name() + "Part = " + part.id() + " updCounter " + part.updateCounter());
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl in project ignite by apache.
the class TxRollbackOnMapOnInvalidTopologyTest method doTestRollback.
/**
* Test scenario: mock partition to fail check, start new node.
* Expected result: Transaction is rolled back.
*
* @param near Near mode.
* @param node Owner.
*/
private void doTestRollback(Ignite near, IgniteEx node) throws Exception {
List<Integer> primKeys = primaryKeys(node.cache(DEFAULT_CACHE_NAME), 100);
List<Integer> movingKeys = movingKeysAfterJoin(node, DEFAULT_CACHE_NAME, 100);
primKeys.removeAll(movingKeys);
/**
* {@code primKeys} contains stable partitions.
*/
int part = primKeys.get(0);
IgniteEx grid = (IgniteEx) grid(node.affinity(DEFAULT_CACHE_NAME).mapPartitionToNode(part));
GridDhtPartitionTopologyImpl top = (GridDhtPartitionTopologyImpl) grid.cachex(DEFAULT_CACHE_NAME).context().topology();
AffinityTopologyVersion failCheckVer = new AffinityTopologyVersion(GRIDS + 2, 1);
top.partitionFactory((ctx, grp, id, recovery) -> new GridDhtLocalPartition(ctx, grp, id, recovery) {
@Override
public boolean primary(AffinityTopologyVersion topVer) {
return !(id == part && topVer.equals(failCheckVer)) && super.primary(topVer);
}
});
// Re-create mocked part.
GridDhtLocalPartition p0 = top.localPartition(part);
p0.rent().get();
assertTrue(p0.state() == EVICTED);
ReadWriteLock lock = U.field(top, "lock");
lock.writeLock().lock();
p0 = top.getOrCreatePartition(part);
p0.own();
lock.writeLock().unlock();
startGrid(GRIDS);
awaitPartitionMapExchange();
try (Transaction tx = near.transactions().txStart()) {
near.cache(DEFAULT_CACHE_NAME).put(part, part);
tx.commit();
fail();
} catch (TransactionRollbackException ignore) {
// Expected.
} catch (Exception e) {
fail(X.getFullStackTrace(e));
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl in project ignite by apache.
the class CacheGroupContext method start.
/**
* @throws IgniteCheckedException If failed.
*/
public void start() throws IgniteCheckedException {
GridAffinityAssignmentCache affCache = ctx.affinity().groupAffinity(grpId);
aff = affCache == null ? GridAffinityAssignmentCache.create(ctx.kernalContext(), ccfg.getAffinity(), ccfg) : affCache;
if (ccfg.getCacheMode() != LOCAL) {
top = ctx.kernalContext().resource().resolve(new GridDhtPartitionTopologyImpl(ctx, this));
metrics.onTopologyInitialized();
}
try {
offheapMgr = ctx.kernalContext().resource().resolve(persistenceEnabled ? new GridCacheOffheapManager() : new IgniteCacheOffheapManagerImpl());
} catch (Exception e) {
throw new IgniteCheckedException("Failed to initialize offheap manager", e);
}
offheapMgr.start(ctx, this);
if (!isRecoveryMode()) {
initializeIO();
ctx.affinity().onCacheGroupCreated(this);
ctx.evict().onCacheGroupStarted(this);
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopologyImpl in project ignite by apache.
the class IgnitePdsCacheEntriesExpirationTest method testDeadlockBetweenCachePutAndEntryExpiration.
/**
* Verifies scenario of a deadlock between thread, modifying a cache entry (acquires cp read lock and entry lock),
* ttl thread, expiring the entry (acquires cp read lock and entry lock) and checkpoint thread (acquires cp write
* lock).
*
* Checkpoint thread in not used but emulated by the test to avoid test hang (interruptible API for acquiring write
* lock is used).
*
* For more details see <a href="https://ggsystems.atlassian.net/browse/GG-23135">GG-23135</a>.
*
* <p> <strong>Important note</strong> Implementation of this test relies heavily on structure of existing code in
* {@link GridCacheOffheapManager.GridCacheDataStore#purgeExpiredInternal(GridCacheContext, IgniteInClosure2X, int)}
* and {@link GridCacheMapEntry#onExpired(CacheObject, GridCacheVersion)} methods.
*
* Any changes to those methods could break logic inside the test so if new failures of the test occure test code
* itself may require refactoring. </p>
*
* @throws Exception If failed.
*/
@Test
public void testDeadlockBetweenCachePutAndEntryExpiration() throws Exception {
AtomicBoolean timeoutReached = new AtomicBoolean(false);
AtomicBoolean cpWriteLocked = new AtomicBoolean(false);
AtomicInteger partId = new AtomicInteger();
CountDownLatch ttlLatch = new CountDownLatch(2);
IgniteEx srv0 = startGrids(2);
srv0.cluster().active(true);
awaitPartitionMapExchange();
srv0.getOrCreateCache(DEFAULT_CACHE_NAME);
GridDhtPartitionTopologyImpl top = (GridDhtPartitionTopologyImpl) srv0.cachex(DEFAULT_CACHE_NAME).context().topology();
top.partitionFactory((ctx, grp, id, recovery) -> {
partId.set(id);
return new GridDhtLocalPartition(ctx, grp, id, recovery) {
/**
* This method is modified to bring threads in deadlock situation.
* Idea is the following: updater thread (see code below) on its way to
* {@link GridCacheMapEntry#onExpired(CacheObject, GridCacheVersion)} call stops here
* (already having entry lock acquired) and waits until checkpoint write lock is acquired
* by another special thread imulating checkpointer thread (cp-write-lock-holder, see code below).
* After that it enables ttl-cleanup-worker thread to proceed
* (by counting down ttLatch, see next overridden method) and reproduce deadlock scenario.
*/
@Override
public IgniteCacheOffheapManager.CacheDataStore dataStore() {
Thread t = Thread.currentThread();
String tName = t.getName();
if (tName == null || !tName.contains("updater"))
return super.dataStore();
boolean unswapFoundInST = false;
for (StackTraceElement e : t.getStackTrace()) {
if (e.getMethodName().contains("unswap")) {
unswapFoundInST = true;
break;
}
}
if (!unswapFoundInST)
return super.dataStore();
while (!cpWriteLocked.get()) {
try {
Thread.sleep(10);
} catch (InterruptedException ignored) {
log.warning(">>> Thread caught InterruptedException while waiting " + "for cp write lock to be locked");
}
}
ttlLatch.countDown();
return super.dataStore();
}
/**
* This method is modified to bring threads in deadlock situation.
* Idea is the following: internal ttl-cleanup-worker thread wakes up to cleanup expired entries,
* reaches this method after calling purgeExpiredInternal (thus having checkpoint readlock acquired)
* and stops on ttlLatch until updater thread comes in, acquires entry lock and gets stuck
* on acquiring cp read lock
* (because of special cp-write-lock-holder thread already holding cp write lock).
*
* So situation of three threads stuck in deadlock is reproduced.
*/
@Override
public boolean reserve() {
Thread t = Thread.currentThread();
String tName = t.getName();
if (tName == null || !tName.contains("ttl-cleanup-worker"))
return super.reserve();
boolean purgeExpiredFoundInST = false;
for (StackTraceElement e : t.getStackTrace()) {
if (e.getMethodName().contains("purgeExpiredInternal")) {
purgeExpiredFoundInST = true;
break;
}
}
if (!purgeExpiredFoundInST)
return super.reserve();
ttlLatch.countDown();
try {
ttlLatch.await();
} catch (InterruptedException ignored) {
log.warning(">>> Thread caught InterruptedException while waiting for ttl latch" + " to be released by updater thread");
}
return super.reserve();
}
};
});
stopGrid(1);
// change BLT to force new partition creation with modified GridDhtLocalPartition class
srv0.cluster().setBaselineTopology(srv0.cluster().topologyVersion());
Thread.sleep(500);
IgniteCache<Object, Object> cache = srv0.getOrCreateCache(DEFAULT_CACHE_NAME);
GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) srv0.context().cache().context().database();
CheckpointReadWriteLock checkpointReadWriteLock = U.field(db.checkpointManager.checkpointTimeoutLock(), "checkpointReadWriteLock");
ReentrantReadWriteLockWithTracking rwLock = U.field(checkpointReadWriteLock, "checkpointLock");
int key = 0;
while (true) {
if (srv0.affinity(DEFAULT_CACHE_NAME).partition(key) != partId.get())
key++;
else
break;
}
cache.put(key, 1);
int finalKey = key;
IgniteInternalFuture updateFut = GridTestUtils.runAsync(() -> {
log.info(">>> Updater thread has started, updating key " + finalKey);
int i = 10;
while (!timeoutReached.get()) {
cache.put(finalKey, i++);
try {
Thread.sleep(300);
} catch (InterruptedException e) {
log.warning(">>> Updater thread sleep was interrupted");
}
}
}, "updater-thread");
IgniteInternalFuture writeLockHolderFut = GridTestUtils.runAsync(() -> {
while (ttlLatch.getCount() != 1) {
try {
Thread.sleep(20);
} catch (InterruptedException e) {
log.warning(">>> Write lock holder thread sleep was interrupted");
break;
}
}
try {
cpWriteLocked.set(true);
rwLock.writeLock().lockInterruptibly();
ttlLatch.await();
} catch (InterruptedException e) {
log.warning(">>> Write lock holder thread was interrupted while obtaining write lock.");
} finally {
rwLock.writeLock().unlock();
}
}, "cp-write-lock-holder");
GridTestUtils.runAsync(() -> {
long start = System.currentTimeMillis();
while (System.currentTimeMillis() - start < TIMEOUT) doSleep(1_000);
timeoutReached.set(true);
});
try {
updateFut.get(TIMEOUT * 2);
} catch (IgniteFutureTimeoutCheckedException ignored) {
fail("Failed to wait for futures for doubled timeout");
} finally {
while (ttlLatch.getCount() > 0) ttlLatch.countDown();
writeLockHolderFut.cancel();
updateFut.cancel();
}
}
Aggregations