Search in sources :

Example 1 with Latch

use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.latch.Latch in project ignite by apache.

the class GridDhtPartitionsExchangeFuture method waitPartitionRelease.

/**
 * The main purpose of this method is to wait for all ongoing updates (transactional and atomic), initiated on
 * the previous topology version, to finish to prevent inconsistencies during rebalancing and to prevent two
 * different simultaneous owners of the same lock.
 * Also, this method can be used to wait for tx recovery only in case of PME-free switch.
 *
 * @param latchId Distributed latch Id.
 * @param distributed If {@code true} then node should wait for partition release completion on all other nodes.
 * @param doRollback If {@code true} tries to rollback transactions which lock partitions. Avoids unnecessary calls
 *      of {@link org.apache.ignite.internal.processors.cache.transactions.IgniteTxManager#rollbackOnTopologyChange}
 *
 * @throws IgniteCheckedException If failed.
 */
private void waitPartitionRelease(String latchId, boolean distributed, boolean doRollback) throws IgniteCheckedException {
    Latch releaseLatch = null;
    IgniteInternalFuture<?> partReleaseFut;
    cctx.exchange().exchangerBlockingSectionBegin();
    try {
        // Wait for other nodes only on first phase.
        if (distributed)
            releaseLatch = cctx.exchange().latch().getOrCreate(latchId, initialVersion());
        partReleaseFut = context().exchangeFreeSwitch() && isBaselineNodeFailed() ? cctx.partitionRecoveryFuture(initialVersion(), firstDiscoEvt.eventNode()) : cctx.partitionReleaseFuture(initialVersion());
        // Assign to class variable so it will be included into toString() method.
        this.partReleaseFut = partReleaseFut;
    } finally {
        cctx.exchange().exchangerBlockingSectionEnd();
    }
    if (log.isTraceEnabled())
        log.trace("Before waiting for partition release future: " + this);
    int dumpCnt = 0;
    long nextDumpTime = 0;
    IgniteConfiguration cfg = cctx.gridConfig();
    long waitStartNanos = System.nanoTime();
    long waitTimeout = 2 * cfg.getNetworkTimeout();
    boolean txRolledBack = !doRollback;
    while (true) {
        // Read txTimeoutOnPME from configuration after every iteration.
        long curTimeout = cfg.getTransactionConfiguration().getTxTimeoutOnPartitionMapExchange();
        cctx.exchange().exchangerBlockingSectionBegin();
        try {
            // This avoids unnecessary waiting for rollback.
            partReleaseFut.get(curTimeout > 0 && !txRolledBack ? Math.min(curTimeout, waitTimeout) : waitTimeout, TimeUnit.MILLISECONDS);
            break;
        } catch (IgniteFutureTimeoutCheckedException ignored) {
            // Print pending transactions and locks that might have led to hang.
            if (nextDumpTime <= U.currentTimeMillis()) {
                dumpPendingObjects(partReleaseFut, curTimeout <= 0 && !txRolledBack);
                nextDumpTime = U.currentTimeMillis() + nextDumpTimeout(dumpCnt++, waitTimeout);
            }
            long passedMillis = U.millisSinceNanos(waitStartNanos);
            if (!txRolledBack && curTimeout > 0 && passedMillis >= curTimeout) {
                txRolledBack = true;
                cctx.tm().rollbackOnTopologyChange(initialVersion());
            }
        } catch (IgniteCheckedException e) {
            U.warn(log, "Unable to await partitions release future", e);
            throw e;
        } finally {
            cctx.exchange().exchangerBlockingSectionEnd();
        }
    }
    long waitEndNanos = System.nanoTime();
    if (log.isInfoEnabled()) {
        long waitTime = U.nanosToMillis(waitEndNanos - waitStartNanos);
        String futInfo = RELEASE_FUTURE_DUMP_THRESHOLD > 0 && waitTime > RELEASE_FUTURE_DUMP_THRESHOLD ? partReleaseFut.toString() : "NA";
        String mode = distributed ? "DISTRIBUTED" : "LOCAL";
        if (log.isInfoEnabled())
            log.info("Finished waiting for partition release future [topVer=" + exchangeId().topologyVersion() + ", waitTime=" + waitTime + "ms, futInfo=" + futInfo + ", mode=" + mode + "]");
    }
    if (!context().exchangeFreeSwitch()) {
        IgniteInternalFuture<?> locksFut = cctx.mvcc().finishLocks(exchId.topologyVersion());
        nextDumpTime = 0;
        dumpCnt = 0;
        while (true) {
            cctx.exchange().exchangerBlockingSectionBegin();
            try {
                locksFut.get(50, TimeUnit.MILLISECONDS);
                break;
            } catch (IgniteFutureTimeoutCheckedException ignored) {
                if (nextDumpTime <= U.currentTimeMillis()) {
                    U.warn(log, "Failed to wait for locks release future. " + "Dumping pending objects that might be the cause: " + cctx.localNodeId());
                    U.warn(log, "Locked keys:");
                    for (IgniteTxKey key : cctx.mvcc().lockedKeys()) U.warn(log, "Locked key: " + key);
                    for (IgniteTxKey key : cctx.mvcc().nearLockedKeys()) U.warn(log, "Locked near key: " + key);
                    Map<IgniteTxKey, Collection<GridCacheMvccCandidate>> locks = cctx.mvcc().unfinishedLocks(exchId.topologyVersion());
                    for (Map.Entry<IgniteTxKey, Collection<GridCacheMvccCandidate>> e : locks.entrySet()) U.warn(log, "Awaited locked entry [key=" + e.getKey() + ", mvcc=" + e.getValue() + ']');
                    nextDumpTime = U.currentTimeMillis() + nextDumpTimeout(dumpCnt++, waitTimeout);
                    if (getBoolean(IGNITE_THREAD_DUMP_ON_EXCHANGE_TIMEOUT, false))
                        U.dumpThreads(log);
                }
                // Sometimes FinishLockFuture is not rechecked causing frozen PME.
                // Will recheck every 50 milliseconds.
                cctx.mvcc().recheckPendingLocks();
            } finally {
                cctx.exchange().exchangerBlockingSectionEnd();
            }
        }
        timeBag.finishGlobalStage("Wait partitions release [latch=" + latchId + "]");
    }
    if (releaseLatch == null) {
        assert !distributed : "Partitions release latch must be initialized in distributed mode.";
        return;
    }
    releaseLatch.countDown();
    // For compatibility with old version where joining nodes are not waiting for latch.
    if (localJoinExchange() && !cctx.exchange().latch().canSkipJoiningNodes(initialVersion()))
        return;
    try {
        String troubleshootingHint;
        if (crd.isLocal())
            troubleshootingHint = "Some nodes have not sent acknowledgement for latch completion. " + "It's possible due to unfinishined atomic updates, transactions " + "or not released explicit locks on that nodes. " + "Please check logs for errors on nodes with ids reported in latch `pendingAcks` collection";
        else
            troubleshootingHint = "For more details please check coordinator node logs [crdNode=" + crd.toString() + "]";
        while (true) {
            try {
                cctx.exchange().exchangerBlockingSectionBegin();
                try {
                    releaseLatch.await(waitTimeout, TimeUnit.MILLISECONDS);
                } finally {
                    cctx.exchange().exchangerBlockingSectionEnd();
                }
                if (log.isInfoEnabled())
                    log.info("Finished waiting for partitions release latch: " + releaseLatch);
                break;
            } catch (IgniteFutureTimeoutCheckedException ignored) {
                U.warn(log, "Unable to await partitions release latch within timeout. " + troubleshootingHint + " [latch=" + releaseLatch + "]");
                // Try to resend ack.
                releaseLatch.countDown();
            }
        }
    } catch (IgniteCheckedException e) {
        U.warn(log, "Stop waiting for partitions release latch: " + e.getMessage());
    }
    timeBag.finishGlobalStage("Wait partitions release latch [latch=" + latchId + "]");
}
Also used : IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteConfiguration(org.apache.ignite.configuration.IgniteConfiguration) Latch(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.latch.Latch) CountDownLatch(java.util.concurrent.CountDownLatch) IgniteFutureTimeoutCheckedException(org.apache.ignite.internal.IgniteFutureTimeoutCheckedException) IgniteTxKey(org.apache.ignite.internal.processors.cache.transactions.IgniteTxKey) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) GridCacheMvccCandidate(org.apache.ignite.internal.processors.cache.GridCacheMvccCandidate)

Example 2 with Latch

use of org.apache.ignite.internal.processors.cache.distributed.dht.preloader.latch.Latch in project ignite by apache.

the class IgniteExchangeLatchManagerCoordinatorFailTest method testCoordinatorFailoverAfterServerLatchCompleted.

/**
 * @throws Exception if failed.
 */
@Test
public void testCoordinatorFailoverAfterServerLatchCompleted() throws Exception {
    Latch[] latches = new Latch[5];
    for (int i = 0; i < 5; i++) {
        ExchangeLatchManager latchMgr = grid(i).context().cache().context().exchange().latch();
        latches[i] = latchMgr.getOrCreate(LATCH_DROP_NAME, latchTopVer);
        info("Created latch: " + i);
        latches[i].countDown();
    }
    for (int i = 0; i < 4; i++) {
        info("Waiting for latch: " + i);
        latches[i].await(10_000, TimeUnit.MILLISECONDS);
    }
    stopGrid(0);
    for (int i = 1; i < 5; i++) {
        info("Waiting for latch after stop: " + i);
        latches[i].await(10_000, TimeUnit.MILLISECONDS);
    }
}
Also used : Latch(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.latch.Latch) CountDownLatch(java.util.concurrent.CountDownLatch) ExchangeLatchManager(org.apache.ignite.internal.processors.cache.distributed.dht.preloader.latch.ExchangeLatchManager) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Aggregations

CountDownLatch (java.util.concurrent.CountDownLatch)2 Latch (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.latch.Latch)2 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 ConcurrentMap (java.util.concurrent.ConcurrentMap)1 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)1 IgniteConfiguration (org.apache.ignite.configuration.IgniteConfiguration)1 IgniteFutureTimeoutCheckedException (org.apache.ignite.internal.IgniteFutureTimeoutCheckedException)1 GridCacheMvccCandidate (org.apache.ignite.internal.processors.cache.GridCacheMvccCandidate)1 ExchangeLatchManager (org.apache.ignite.internal.processors.cache.distributed.dht.preloader.latch.ExchangeLatchManager)1 IgniteTxKey (org.apache.ignite.internal.processors.cache.transactions.IgniteTxKey)1 GridCommonAbstractTest (org.apache.ignite.testframework.junits.common.GridCommonAbstractTest)1 Test (org.junit.Test)1