use of org.apache.ignite.internal.IgniteFutureTimeoutCheckedException in project ignite by apache.
the class IgniteCacheOffheapEvictQueryTest method testEvictAndRemove.
/**
* @throws Exception If failed.
*/
@Test
public void testEvictAndRemove() throws Exception {
final int KEYS_CNT = 3000;
final int THREADS_CNT = 250;
final IgniteCache<Integer, Integer> c = startGrid().cache(DEFAULT_CACHE_NAME);
for (int i = 0; i < KEYS_CNT; i++) {
c.put(i, i);
if ((i & 1) == 0)
c.localEvict(F.asList(i));
}
X.println("___ Cache loaded...");
final CyclicBarrier b = new CyclicBarrier(THREADS_CNT, new Runnable() {
@Override
public void run() {
X.println("___ go!");
}
});
final AtomicInteger keys = new AtomicInteger(KEYS_CNT);
IgniteInternalFuture<?> fut = multithreadedAsync(new Runnable() {
@Override
public void run() {
Random rnd = new GridRandom();
try {
b.await();
} catch (InterruptedException e) {
throw new IgniteInterruptedException(e);
} catch (BrokenBarrierException e) {
throw new IllegalStateException(e);
}
while (keys.get() > 0) {
int k = rnd.nextInt(KEYS_CNT);
try {
switch(rnd.nextInt(4)) {
case 0:
c.localEvict(F.asList(k));
break;
case 1:
c.get(k);
break;
case 2:
if (c.remove(k))
keys.decrementAndGet();
break;
case 3:
c.query(new SqlFieldsQuery("select _val from Integer where _key between ? and ?").setArgs(k, k + 20).setLocal(true)).getAll();
break;
}
} catch (CacheException e) {
String msgStart = "Failed to get value for key:";
for (Throwable th = e; th != null; th = th.getCause()) {
String msg = th.getMessage();
if (msg != null && msg.startsWith(msgStart)) {
int dot = msg.indexOf('.', msgStart.length());
assertTrue(dot != -1);
final Integer failedKey = Integer.parseInt(msg.substring(msgStart.length(), dot).trim());
X.println("___ failed key: " + failedKey);
break;
}
}
LT.warn(log, e.getMessage());
return;
}
}
}
}, THREADS_CNT);
try {
fut.get(60_000);
if (c.size(CachePeekMode.ALL) != 0)
fail("Not all keys removed.");
X.println("___ all keys removed");
} catch (IgniteFutureTimeoutCheckedException ignored) {
X.println("___ timeout");
X.println("___ keys: " + keys.get());
keys.set(0);
fut.get();
}
}
use of org.apache.ignite.internal.IgniteFutureTimeoutCheckedException in project ignite by apache.
the class GridDhtPartitionsExchangeFuture method waitPartitionRelease.
/**
* The main purpose of this method is to wait for all ongoing updates (transactional and atomic), initiated on
* the previous topology version, to finish to prevent inconsistencies during rebalancing and to prevent two
* different simultaneous owners of the same lock.
* Also, this method can be used to wait for tx recovery only in case of PME-free switch.
*
* @param latchId Distributed latch Id.
* @param distributed If {@code true} then node should wait for partition release completion on all other nodes.
* @param doRollback If {@code true} tries to rollback transactions which lock partitions. Avoids unnecessary calls
* of {@link org.apache.ignite.internal.processors.cache.transactions.IgniteTxManager#rollbackOnTopologyChange}
*
* @throws IgniteCheckedException If failed.
*/
private void waitPartitionRelease(String latchId, boolean distributed, boolean doRollback) throws IgniteCheckedException {
Latch releaseLatch = null;
IgniteInternalFuture<?> partReleaseFut;
cctx.exchange().exchangerBlockingSectionBegin();
try {
// Wait for other nodes only on first phase.
if (distributed)
releaseLatch = cctx.exchange().latch().getOrCreate(latchId, initialVersion());
partReleaseFut = context().exchangeFreeSwitch() && isBaselineNodeFailed() ? cctx.partitionRecoveryFuture(initialVersion(), firstDiscoEvt.eventNode()) : cctx.partitionReleaseFuture(initialVersion());
// Assign to class variable so it will be included into toString() method.
this.partReleaseFut = partReleaseFut;
} finally {
cctx.exchange().exchangerBlockingSectionEnd();
}
if (log.isTraceEnabled())
log.trace("Before waiting for partition release future: " + this);
int dumpCnt = 0;
long nextDumpTime = 0;
IgniteConfiguration cfg = cctx.gridConfig();
long waitStartNanos = System.nanoTime();
long waitTimeout = 2 * cfg.getNetworkTimeout();
boolean txRolledBack = !doRollback;
while (true) {
// Read txTimeoutOnPME from configuration after every iteration.
long curTimeout = cfg.getTransactionConfiguration().getTxTimeoutOnPartitionMapExchange();
cctx.exchange().exchangerBlockingSectionBegin();
try {
// This avoids unnecessary waiting for rollback.
partReleaseFut.get(curTimeout > 0 && !txRolledBack ? Math.min(curTimeout, waitTimeout) : waitTimeout, TimeUnit.MILLISECONDS);
break;
} catch (IgniteFutureTimeoutCheckedException ignored) {
// Print pending transactions and locks that might have led to hang.
if (nextDumpTime <= U.currentTimeMillis()) {
dumpPendingObjects(partReleaseFut, curTimeout <= 0 && !txRolledBack);
nextDumpTime = U.currentTimeMillis() + nextDumpTimeout(dumpCnt++, waitTimeout);
}
long passedMillis = U.millisSinceNanos(waitStartNanos);
if (!txRolledBack && curTimeout > 0 && passedMillis >= curTimeout) {
txRolledBack = true;
cctx.tm().rollbackOnTopologyChange(initialVersion());
}
} catch (IgniteCheckedException e) {
U.warn(log, "Unable to await partitions release future", e);
throw e;
} finally {
cctx.exchange().exchangerBlockingSectionEnd();
}
}
long waitEndNanos = System.nanoTime();
if (log.isInfoEnabled()) {
long waitTime = U.nanosToMillis(waitEndNanos - waitStartNanos);
String futInfo = RELEASE_FUTURE_DUMP_THRESHOLD > 0 && waitTime > RELEASE_FUTURE_DUMP_THRESHOLD ? partReleaseFut.toString() : "NA";
String mode = distributed ? "DISTRIBUTED" : "LOCAL";
if (log.isInfoEnabled())
log.info("Finished waiting for partition release future [topVer=" + exchangeId().topologyVersion() + ", waitTime=" + waitTime + "ms, futInfo=" + futInfo + ", mode=" + mode + "]");
}
if (!context().exchangeFreeSwitch()) {
IgniteInternalFuture<?> locksFut = cctx.mvcc().finishLocks(exchId.topologyVersion());
nextDumpTime = 0;
dumpCnt = 0;
while (true) {
cctx.exchange().exchangerBlockingSectionBegin();
try {
locksFut.get(50, TimeUnit.MILLISECONDS);
break;
} catch (IgniteFutureTimeoutCheckedException ignored) {
if (nextDumpTime <= U.currentTimeMillis()) {
U.warn(log, "Failed to wait for locks release future. " + "Dumping pending objects that might be the cause: " + cctx.localNodeId());
U.warn(log, "Locked keys:");
for (IgniteTxKey key : cctx.mvcc().lockedKeys()) U.warn(log, "Locked key: " + key);
for (IgniteTxKey key : cctx.mvcc().nearLockedKeys()) U.warn(log, "Locked near key: " + key);
Map<IgniteTxKey, Collection<GridCacheMvccCandidate>> locks = cctx.mvcc().unfinishedLocks(exchId.topologyVersion());
for (Map.Entry<IgniteTxKey, Collection<GridCacheMvccCandidate>> e : locks.entrySet()) U.warn(log, "Awaited locked entry [key=" + e.getKey() + ", mvcc=" + e.getValue() + ']');
nextDumpTime = U.currentTimeMillis() + nextDumpTimeout(dumpCnt++, waitTimeout);
if (getBoolean(IGNITE_THREAD_DUMP_ON_EXCHANGE_TIMEOUT, false))
U.dumpThreads(log);
}
// Sometimes FinishLockFuture is not rechecked causing frozen PME.
// Will recheck every 50 milliseconds.
cctx.mvcc().recheckPendingLocks();
} finally {
cctx.exchange().exchangerBlockingSectionEnd();
}
}
timeBag.finishGlobalStage("Wait partitions release [latch=" + latchId + "]");
}
if (releaseLatch == null) {
assert !distributed : "Partitions release latch must be initialized in distributed mode.";
return;
}
releaseLatch.countDown();
// For compatibility with old version where joining nodes are not waiting for latch.
if (localJoinExchange() && !cctx.exchange().latch().canSkipJoiningNodes(initialVersion()))
return;
try {
String troubleshootingHint;
if (crd.isLocal())
troubleshootingHint = "Some nodes have not sent acknowledgement for latch completion. " + "It's possible due to unfinishined atomic updates, transactions " + "or not released explicit locks on that nodes. " + "Please check logs for errors on nodes with ids reported in latch `pendingAcks` collection";
else
troubleshootingHint = "For more details please check coordinator node logs [crdNode=" + crd.toString() + "]";
while (true) {
try {
cctx.exchange().exchangerBlockingSectionBegin();
try {
releaseLatch.await(waitTimeout, TimeUnit.MILLISECONDS);
} finally {
cctx.exchange().exchangerBlockingSectionEnd();
}
if (log.isInfoEnabled())
log.info("Finished waiting for partitions release latch: " + releaseLatch);
break;
} catch (IgniteFutureTimeoutCheckedException ignored) {
U.warn(log, "Unable to await partitions release latch within timeout. " + troubleshootingHint + " [latch=" + releaseLatch + "]");
// Try to resend ack.
releaseLatch.countDown();
}
}
} catch (IgniteCheckedException e) {
U.warn(log, "Stop waiting for partitions release latch: " + e.getMessage());
}
timeBag.finishGlobalStage("Wait partitions release latch [latch=" + latchId + "]");
}
use of org.apache.ignite.internal.IgniteFutureTimeoutCheckedException in project ignite by apache.
the class GridCachePartitionExchangeManager method onKernalStart.
/**
* @param active Cluster state.
* @param reconnect Reconnect flag.
* @return Topology version of local join exchange if cluster is active.
* Topology version NONE if cluster is not active or reconnect.
* @throws IgniteCheckedException If failed.
*/
public AffinityTopologyVersion onKernalStart(boolean active, boolean reconnect) throws IgniteCheckedException {
for (ClusterNode n : cctx.discovery().remoteNodes()) cctx.versions().onReceived(n.id(), n.metrics().getLastDataVersion());
DiscoveryLocalJoinData locJoin = cctx.discovery().localJoin();
GridDhtPartitionsExchangeFuture fut = null;
if (reconnect)
reconnectExchangeFut = new GridFutureAdapter<>();
if (active) {
DiscoveryEvent discoEvt = locJoin.event();
DiscoCache discoCache = locJoin.discoCache();
GridDhtPartitionExchangeId exchId = initialExchangeId();
fut = exchangeFuture(exchId, reconnect ? null : discoEvt, reconnect ? null : discoCache, null, null);
} else if (reconnect)
reconnectExchangeFut.onDone();
new IgniteThread(cctx.igniteInstanceName(), "exchange-worker", exchWorker).start();
if (reconnect) {
if (fut != null) {
fut.listen(new CI1<IgniteInternalFuture<AffinityTopologyVersion>>() {
@Override
public void apply(IgniteInternalFuture<AffinityTopologyVersion> fut) {
try {
fut.get();
for (CacheGroupContext grp : cctx.cache().cacheGroups()) grp.preloader().onInitialExchangeComplete(null);
reconnectExchangeFut.onDone();
} catch (IgniteCheckedException e) {
for (CacheGroupContext grp : cctx.cache().cacheGroups()) grp.preloader().onInitialExchangeComplete(e);
reconnectExchangeFut.onDone(e);
}
}
});
}
} else if (fut != null) {
if (log.isDebugEnabled())
log.debug("Beginning to wait on local exchange future: " + fut);
boolean first = true;
while (true) {
try {
fut.get(cctx.preloadExchangeTimeout());
break;
} catch (IgniteFutureTimeoutCheckedException ignored) {
if (first) {
U.warn(log, "Failed to wait for initial partition map exchange. " + "Possible reasons are: " + U.nl() + " ^-- Transactions in deadlock." + U.nl() + " ^-- Long running transactions (ignore if this is the case)." + U.nl() + " ^-- Unreleased explicit locks.");
first = false;
} else
U.warn(log, "Still waiting for initial partition map exchange [fut=" + fut + ']');
} catch (IgniteNeedReconnectException e) {
throw e;
} catch (Exception e) {
if (fut.reconnectOnError(e))
throw new IgniteNeedReconnectException(cctx.localNode(), e);
throw e;
}
}
for (CacheGroupContext grp : cctx.cache().cacheGroups()) {
if (locJoin.joinTopologyVersion().equals(grp.localStartVersion()))
grp.preloader().onInitialExchangeComplete(null);
}
if (log.isDebugEnabled())
log.debug("Finished waiting for initial exchange: " + fut.exchangeId());
return fut.initialVersion();
}
return NONE;
}
use of org.apache.ignite.internal.IgniteFutureTimeoutCheckedException in project ignite by apache.
the class CheckpointTimeoutLock method checkpointReadLock.
/**
* Gets the checkpoint read lock. While this lock is held, checkpoint thread will not acquireSnapshotWorker memory
* state.
*
* @throws IgniteException If failed.
*/
public void checkpointReadLock() {
if (checkpointReadWriteLock.isWriteLockHeldByCurrentThread())
return;
long timeout = checkpointReadLockTimeout;
long start = U.currentTimeMillis();
boolean interrupted = false;
try {
for (; ; ) {
try {
if (timeout > 0 && (U.currentTimeMillis() - start) >= timeout)
failCheckpointReadLock();
try {
if (timeout > 0) {
if (!checkpointReadWriteLock.tryReadLock(timeout - (U.currentTimeMillis() - start), TimeUnit.MILLISECONDS))
failCheckpointReadLock();
} else
checkpointReadWriteLock.readLock();
} catch (InterruptedException e) {
interrupted = true;
continue;
}
if (stop) {
checkpointReadWriteLock.readUnlock();
throw new IgniteException(new NodeStoppingException("Failed to perform cache update: node is stopping."));
}
if (checkpointReadWriteLock.getReadHoldCount() > 1 || safeToUpdatePageMemories() || checkpointer.runner() == null)
break;
else {
CheckpointProgress pages = checkpointer.scheduleCheckpoint(0, "too many dirty pages");
checkpointReadWriteLock.readUnlock();
if (timeout > 0 && U.currentTimeMillis() - start >= timeout)
failCheckpointReadLock();
try {
pages.futureFor(LOCK_RELEASED).getUninterruptibly();
} catch (IgniteFutureTimeoutCheckedException e) {
failCheckpointReadLock();
} catch (IgniteCheckedException e) {
throw new IgniteException("Failed to wait for checkpoint begin.", e);
}
}
} catch (CheckpointReadLockTimeoutException e) {
log.error(e.getMessage(), e);
timeout = 0;
}
}
} finally {
if (interrupted)
Thread.currentThread().interrupt();
}
}
use of org.apache.ignite.internal.IgniteFutureTimeoutCheckedException in project ignite by apache.
the class TxRollbackOnTimeoutTest method testRandomMixedTxConfigurations.
/**
* Test timeouts with random values and different tx configurations.
*/
@Test
public void testRandomMixedTxConfigurations() throws Exception {
final Ignite client = startClient();
final AtomicBoolean stop = new AtomicBoolean();
final long seed = System.currentTimeMillis();
final Random r = new Random(seed);
log.info("Using seed: " + seed);
final int threadsCnt = Runtime.getRuntime().availableProcessors() * 2;
for (int k = 0; k < threadsCnt; k++) grid(0).cache(CACHE_NAME).put(k, (long) 0);
final TransactionConcurrency[] TC_VALS = TransactionConcurrency.values();
final TransactionIsolation[] TI_VALS = TransactionIsolation.values();
final LongAdder cntr0 = new LongAdder();
final LongAdder cntr1 = new LongAdder();
final LongAdder cntr2 = new LongAdder();
final LongAdder cntr3 = new LongAdder();
final IgniteInternalFuture<?> fut = multithreadedAsync(new Runnable() {
@Override
public void run() {
while (!stop.get()) {
int nodeId = r.nextInt(GRID_CNT + 1);
Ignite node = nodeId == GRID_CNT || nearCacheEnabled() ? client : grid(nodeId);
TransactionConcurrency conc = TC_VALS[r.nextInt(TC_VALS.length)];
TransactionIsolation isolation = TI_VALS[r.nextInt(TI_VALS.length)];
int k = r.nextInt(threadsCnt);
long timeout = r.nextInt(200) + 50;
// Roughly 50% of transactions should time out.
try (Transaction tx = node.transactions().txStart(conc, isolation, timeout, 1)) {
cntr0.add(1);
final Long v = (Long) node.cache(CACHE_NAME).get(k);
assertNotNull("Expecting not null value: " + tx, v);
final int delay = r.nextInt(400);
if (delay > 0)
sleep(delay);
node.cache(CACHE_NAME).put(k, v + 1);
tx.commit();
cntr1.add(1);
} catch (TransactionTimeoutException e) {
cntr2.add(1);
} catch (CacheException e) {
assertEquals(TransactionTimeoutException.class, X.getCause(e).getClass());
cntr2.add(1);
} catch (Exception e) {
cntr3.add(1);
}
}
}
}, threadsCnt, "tx-async-thread");
sleep(DURATION);
stop.set(true);
try {
fut.get(30_000);
} catch (IgniteFutureTimeoutCheckedException e) {
error("Transactions hang", e);
for (Ignite node : G.allGrids()) ((IgniteKernal) node).dumpDebugInfo();
// Try to interrupt hanging threads.
fut.cancel();
throw e;
}
log.info("Tx test stats: started=" + cntr0.sum() + ", completed=" + cntr1.sum() + ", failed=" + cntr3.sum() + ", timedOut=" + cntr2.sum());
assertEquals("Expected finished count same as started count", cntr0.sum(), cntr1.sum() + cntr2.sum() + cntr3.sum());
}
Aggregations