use of org.apache.ignite.internal.IgniteFutureTimeoutCheckedException in project gridgain by gridgain.
the class IgniteCacheCrossCacheTxFailoverTest method crossCacheTxFailover.
/**
* @param cacheMode Cache mode.
* @param sameAff If {@code false} uses different number of partitions for caches.
* @param concurrency Transaction concurrency.
* @param isolation Transaction isolation.
* @throws Exception If failed.
*/
private void crossCacheTxFailover(CacheMode cacheMode, boolean sameAff, final TransactionConcurrency concurrency, final TransactionIsolation isolation) throws Exception {
IgniteKernal ignite0 = (IgniteKernal) ignite(0);
final AtomicBoolean stop = new AtomicBoolean();
try {
ignite0.createCache(cacheConfiguration(CACHE1, cacheMode, 256));
ignite0.createCache(cacheConfiguration(CACHE2, cacheMode, sameAff ? 256 : 128));
final AtomicInteger threadIdx = new AtomicInteger();
IgniteInternalFuture<?> fut = runMultiThreadedAsync(new Callable<Void>() {
@Override
public Void call() throws Exception {
int idx = threadIdx.getAndIncrement();
Ignite ignite = ignite(idx % GRID_CNT);
log.info("Started update thread [node=" + ignite.name() + ", client=" + ignite.configuration().isClientMode() + ']');
IgniteCache<TestKey, TestValue> cache1 = ignite.cache(CACHE1);
IgniteCache<TestKey, TestValue> cache2 = ignite.cache(CACHE2);
assertNotSame(cache1, cache2);
IgniteTransactions txs = ignite.transactions();
ThreadLocalRandom rnd = ThreadLocalRandom.current();
long iter = 0;
while (!stop.get()) {
boolean sameKey = rnd.nextBoolean();
try {
try (Transaction tx = txs.txStart(concurrency, isolation)) {
if (sameKey) {
TestKey key = new TestKey(rnd.nextLong(KEY_RANGE));
cacheOperation(rnd, cache1, key);
cacheOperation(rnd, cache2, key);
} else {
TestKey key1 = new TestKey(rnd.nextLong(KEY_RANGE));
TestKey key2 = new TestKey(key1.key() + 1);
cacheOperation(rnd, cache1, key1);
cacheOperation(rnd, cache2, key2);
}
tx.commit();
}
} catch (CacheException | IgniteException e) {
log.info("Update error: " + e);
}
if (iter++ % 500 == 0)
log.info("Iteration: " + iter);
}
return null;
}
/**
* @param rnd Random.
* @param cache Cache.
* @param key Key.
*/
private void cacheOperation(ThreadLocalRandom rnd, IgniteCache<TestKey, TestValue> cache, TestKey key) {
switch(rnd.nextInt(4)) {
case 0:
cache.put(key, new TestValue(rnd.nextLong()));
break;
case 1:
cache.remove(key);
break;
case 2:
cache.invoke(key, new TestEntryProcessor(rnd.nextBoolean() ? 1L : null));
break;
case 3:
cache.get(key);
break;
default:
assert false;
}
}
}, 10, "tx-thread");
long stopTime = System.currentTimeMillis() + SF.applyLB(3 * 60_000, 20_000);
long topVer = ignite0.cluster().topologyVersion();
boolean failed = false;
while (System.currentTimeMillis() < stopTime) {
log.info("Start node.");
IgniteKernal ignite = (IgniteKernal) startGrid(GRID_CNT);
assertFalse(ignite.configuration().isClientMode());
topVer++;
IgniteInternalFuture<?> affFut = ignite.context().cache().context().exchange().affinityReadyFuture(new AffinityTopologyVersion(topVer));
try {
if (affFut != null)
affFut.get(30_000);
} catch (IgniteFutureTimeoutCheckedException ignored) {
log.error("Failed to wait for affinity future after start: " + topVer);
failed = true;
break;
}
Thread.sleep(500);
log.info("Stop node.");
stopGrid(GRID_CNT);
topVer++;
affFut = ignite0.context().cache().context().exchange().affinityReadyFuture(new AffinityTopologyVersion(topVer));
try {
if (affFut != null)
affFut.get(30_000);
} catch (IgniteFutureTimeoutCheckedException ignored) {
log.error("Failed to wait for affinity future after stop: " + topVer);
failed = true;
break;
}
}
stop.set(true);
fut.get();
assertFalse("Test failed, see log for details.", failed);
} finally {
stop.set(true);
ignite0.destroyCache(CACHE1);
ignite0.destroyCache(CACHE2);
AffinityTopologyVersion topVer = ignite0.context().cache().context().exchange().lastTopologyFuture().get();
for (Ignite ignite : G.allGrids()) ((IgniteKernal) ignite).context().cache().context().exchange().affinityReadyFuture(topVer).get();
awaitPartitionMapExchange();
}
}
use of org.apache.ignite.internal.IgniteFutureTimeoutCheckedException in project gridgain by gridgain.
the class IgnitePdsCacheEntriesExpirationTest method testDeadlockBetweenCachePutAndEntryExpiration.
/**
* Verifies scenario of a deadlock between thread, modifying a cache entry (acquires cp read lock and entry lock),
* ttl thread, expiring the entry (acquires cp read lock and entry lock) and checkpoint thread (acquires cp write
* lock).
*
* Checkpoint thread in not used but emulated by the test to avoid test hang (interruptible API for acquiring write
* lock is used).
*
* For more details see <a href="https://ggsystems.atlassian.net/browse/GG-23135">GG-23135</a>.
*
* <p> <strong>Important note</strong> Implementation of this test relies heavily on structure of existing code in
* {@link GridCacheOffheapManager.GridCacheDataStore#purgeExpiredInternal(GridCacheContext, IgniteInClosure2X, int)}
* and {@link GridCacheMapEntry#onExpired(CacheObject, GridCacheVersion)} methods.
*
* Any changes to those methods could break logic inside the test so if new failures of the test occure test code
* itself may require refactoring. </p>
*
* @throws Exception If failed.
*/
@Test
public void testDeadlockBetweenCachePutAndEntryExpiration() throws Exception {
AtomicBoolean timeoutReached = new AtomicBoolean(false);
AtomicBoolean cpWriteLocked = new AtomicBoolean(false);
AtomicInteger partId = new AtomicInteger();
CountDownLatch ttlLatch = new CountDownLatch(2);
IgniteEx srv0 = startGrids(2);
srv0.cluster().active(true);
awaitPartitionMapExchange();
srv0.getOrCreateCache(DEFAULT_CACHE_NAME);
GridDhtPartitionTopologyImpl top = (GridDhtPartitionTopologyImpl) srv0.cachex(DEFAULT_CACHE_NAME).context().topology();
top.partitionFactory((ctx, grp, id, recovery) -> {
partId.set(id);
return new GridDhtLocalPartition(ctx, grp, id, recovery) {
/**
* This method is modified to bring threads in deadlock situation.
* Idea is the following: updater thread (see code below) on its way to
* {@link GridCacheMapEntry#onExpired(CacheObject, GridCacheVersion)} call stops here
* (already having entry lock acquired) and waits until checkpoint write lock is acquired
* by another special thread imulating checkpointer thread (cp-write-lock-holder, see code below).
* After that it enables ttl-cleanup-worker thread to proceed
* (by counting down ttLatch, see next overridden method) and reproduce deadlock scenario.
*/
@Override
public IgniteCacheOffheapManager.CacheDataStore dataStore() {
Thread t = Thread.currentThread();
String tName = t.getName();
if (tName == null || !tName.contains("updater"))
return super.dataStore();
boolean unswapFoundInST = false;
for (StackTraceElement e : t.getStackTrace()) {
if (e.getMethodName().contains("unswap")) {
unswapFoundInST = true;
break;
}
}
if (!unswapFoundInST)
return super.dataStore();
while (!cpWriteLocked.get()) {
try {
Thread.sleep(10);
} catch (InterruptedException ignored) {
log.warning(">>> Thread caught InterruptedException while waiting " + "for cp write lock to be locked");
}
}
ttlLatch.countDown();
return super.dataStore();
}
/**
* This method is modified to bring threads in deadlock situation.
* Idea is the following: internal ttl-cleanup-worker thread wakes up to cleanup expired entries,
* reaches this method after calling purgeExpiredInternal (thus having checkpoint readlock acquired)
* and stops on ttlLatch until updater thread comes in, acquires entry lock and gets stuck
* on acquiring cp read lock
* (because of special cp-write-lock-holder thread already holding cp write lock).
*
* So situation of three threads stuck in deadlock is reproduced.
*/
@Override
public boolean reserve() {
Thread t = Thread.currentThread();
String tName = t.getName();
if (tName == null || !tName.contains("ttl-cleanup-worker"))
return super.reserve();
boolean purgeExpiredFoundInST = false;
for (StackTraceElement e : t.getStackTrace()) {
if (e.getMethodName().contains("purgeExpiredInternal")) {
purgeExpiredFoundInST = true;
break;
}
}
if (!purgeExpiredFoundInST)
return super.reserve();
ttlLatch.countDown();
try {
ttlLatch.await();
} catch (InterruptedException ignored) {
log.warning(">>> Thread caught InterruptedException while waiting for ttl latch" + " to be released by updater thread");
}
return super.reserve();
}
};
});
stopGrid(1);
// change BLT to force new partition creation with modified GridDhtLocalPartition class
srv0.cluster().setBaselineTopology(srv0.cluster().topologyVersion());
Thread.sleep(500);
IgniteCache<Object, Object> cache = srv0.getOrCreateCache(DEFAULT_CACHE_NAME);
GridCacheDatabaseSharedManager db = (GridCacheDatabaseSharedManager) srv0.context().cache().context().database();
CheckpointReadWriteLock checkpointReadWriteLock = U.field(db.checkpointManager.checkpointTimeoutLock(), "checkpointReadWriteLock");
ReentrantReadWriteLockWithTracking rwLock = U.field(checkpointReadWriteLock, "checkpointLock");
int key = 0;
while (true) {
if (srv0.affinity(DEFAULT_CACHE_NAME).partition(key) != partId.get())
key++;
else
break;
}
cache.put(key, 1);
int finalKey = key;
IgniteInternalFuture updateFut = GridTestUtils.runAsync(() -> {
log.info(">>> Updater thread has started, updating key " + finalKey);
int i = 10;
while (!timeoutReached.get()) {
cache.put(finalKey, i++);
try {
Thread.sleep(300);
} catch (InterruptedException e) {
log.warning(">>> Updater thread sleep was interrupted");
}
}
}, "updater-thread");
IgniteInternalFuture writeLockHolderFut = GridTestUtils.runAsync(() -> {
while (ttlLatch.getCount() != 1) {
try {
Thread.sleep(20);
} catch (InterruptedException e) {
log.warning(">>> Write lock holder thread sleep was interrupted");
break;
}
}
try {
cpWriteLocked.set(true);
rwLock.writeLock().lockInterruptibly();
ttlLatch.await();
} catch (InterruptedException e) {
log.warning(">>> Write lock holder thread was interrupted while obtaining write lock.");
} finally {
rwLock.writeLock().unlock();
}
}, "cp-write-lock-holder");
GridTestUtils.runAsync(() -> {
long start = System.currentTimeMillis();
while (System.currentTimeMillis() - start < TIMEOUT) doSleep(1_000);
timeoutReached.set(true);
});
try {
updateFut.get(TIMEOUT * 2);
} catch (IgniteFutureTimeoutCheckedException ignored) {
fail("Failed to wait for futures for doubled timeout");
} finally {
while (ttlLatch.getCount() > 0) ttlLatch.countDown();
writeLockHolderFut.cancel();
updateFut.cancel();
}
}
use of org.apache.ignite.internal.IgniteFutureTimeoutCheckedException in project gridgain by gridgain.
the class DiscoveryClientSocketTest method startSslClient.
/**
* Test starts ssl client socket and writes data until socket's write blocking. When the socket is blocking on write
* tries to close it.
*/
public void startSslClient() {
try {
Socket clientSocket = sslSockFactory.createSocket(HOST, PORT_TO_LNSR);
info("Client started.");
fakeTcpDiscoverySpi.configureSocketOptions(clientSocket);
long handshakeStartTime = System.currentTimeMillis();
// need to send message in order to ssl handshake passed.
clientSocket.getOutputStream().write(U.IGNITE_HEADER);
readHandshake(clientSocket);
long handshakeInterval = System.currentTimeMillis() - handshakeStartTime;
info("Handshake time: " + handshakeInterval + "ms");
int iter = 0;
try {
while (true) {
iter++;
IgniteInternalFuture writeFut = GridTestUtils.runAsync(() -> {
try {
clientSocket.getOutputStream().write(new byte[4 * 1024]);
} catch (IOException e) {
assertEquals("Socket closed", e.getMessage());
}
});
writeFut.get(10 * handshakeInterval);
}
} catch (IgniteFutureTimeoutCheckedException e) {
info("Socket stuck on write, when passed too much through itself [kBytes=" + (iter * 4) + ", time=" + (System.currentTimeMillis() - handshakeStartTime) + ']');
}
info("Try to close a socket.");
long startClose = System.currentTimeMillis();
// Do not use try-catch-resource here, because JVM has a bug on TLS implementation and requires to close socket streams explicitly.
U.closeQuiet(clientSocket);
info("Socket closed [time=" + (System.currentTimeMillis() - startClose) + ']');
} catch (Exception e) {
fail(e.getMessage());
}
}
use of org.apache.ignite.internal.IgniteFutureTimeoutCheckedException in project gridgain by gridgain.
the class DiscoveryClientSocketTest method sslSocketTest.
/**
* It creates a SSL socket server and client for checks correctness closing when write exceed read.
*
* @throws Exception If failed.
*/
@Test
public void sslSocketTest() throws Exception {
try (ServerSocket listen = sslSrvSockFactory.createServerSocket(PORT_TO_LNSR)) {
info("Server started.");
IgniteInternalFuture clientFut = GridTestUtils.runAsync(this::startSslClient);
Socket connection = listen.accept();
try {
fakeTcpDiscoverySpi.configureSocketOptions(connection);
readHandshake(connection);
connection.getOutputStream().write(U.IGNITE_HEADER);
clientFut.get(10_000);
} catch (IgniteFutureTimeoutCheckedException e) {
U.dumpThreads(log);
U.closeQuiet(connection);
fail("Can't wait connection closed from client side.");
} catch (Exception e) {
U.closeQuiet(connection);
info("Ex: " + e.getMessage() + " (Socket closed)");
}
}
}
Aggregations