Search in sources :

Example 6 with GridDhtLocalPartition

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition in project ignite by apache.

the class GridCacheDatabaseSharedManager method applyLogicalUpdates.

/**
 * @param status Last registered checkpoint status.
 * @param restoreMeta Metastore restore phase if {@code true}.
 * @throws IgniteCheckedException If failed to apply updates.
 * @throws StorageException If IO exception occurred while reading write-ahead log.
 */
private RestoreLogicalState applyLogicalUpdates(CheckpointStatus status, IgnitePredicate<Integer> cacheGroupsPredicate, IgniteBiPredicate<WALRecord.RecordType, WALPointer> recordTypePredicate, boolean restoreMeta) throws IgniteCheckedException {
    if (log.isInfoEnabled())
        log.info("Applying lost " + (restoreMeta ? "metastore" : "cache") + " updates since last checkpoint record [lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
    if (!restoreMeta)
        cctx.kernalContext().query().skipFieldLookup(true);
    long start = U.currentTimeMillis();
    AtomicReference<Throwable> applyError = new AtomicReference<>();
    AtomicLong applied = new AtomicLong();
    long lastArchivedSegment = cctx.wal().lastArchivedSegment();
    StripedExecutor exec = cctx.kernalContext().pools().getStripedExecutorService();
    Semaphore semaphore = new Semaphore(semaphorePertmits(exec));
    Map<GroupPartitionId, Integer> partitionRecoveryStates = new HashMap<>();
    WALIterator it = cctx.wal().replay(status.startPtr, recordTypePredicate);
    RestoreLogicalState restoreLogicalState = new RestoreLogicalState(status, it, lastArchivedSegment, cacheGroupsPredicate, partitionRecoveryStates);
    final IgniteTxManager txManager = cctx.tm();
    try {
        while (restoreLogicalState.hasNext()) {
            WALRecord rec = restoreLogicalState.next();
            if (rec == null)
                break;
            switch(rec.type()) {
                case TX_RECORD:
                    if (restoreMeta) {
                        // Also restore tx states.
                        TxRecord txRec = (TxRecord) rec;
                        txManager.collectTxStates(txRec);
                    }
                    break;
                case // Calculate initial partition states
                CHECKPOINT_RECORD:
                    CheckpointRecord cpRec = (CheckpointRecord) rec;
                    for (Map.Entry<Integer, CacheState> entry : cpRec.cacheGroupStates().entrySet()) {
                        CacheState cacheState = entry.getValue();
                        for (int i = 0; i < cacheState.size(); i++) {
                            int partId = cacheState.partitionByIndex(i);
                            byte state = cacheState.stateByIndex(i);
                            // Ignore undefined state.
                            if (state != -1) {
                                partitionRecoveryStates.put(new GroupPartitionId(entry.getKey(), partId), (int) state);
                            }
                        }
                    }
                    break;
                case ROLLBACK_TX_RECORD:
                    RollbackRecord rbRec = (RollbackRecord) rec;
                    CacheGroupContext ctx = cctx.cache().cacheGroup(rbRec.groupId());
                    if (ctx != null && !ctx.isLocal()) {
                        GridDhtLocalPartition part = ctx.topology().forceCreatePartition(rbRec.partitionId());
                        ctx.offheap().dataStore(part).updateInitialCounter(rbRec.start(), rbRec.range());
                    }
                    break;
                case MVCC_DATA_RECORD:
                case DATA_RECORD:
                case DATA_RECORD_V2:
                case ENCRYPTED_DATA_RECORD:
                case ENCRYPTED_DATA_RECORD_V2:
                case ENCRYPTED_DATA_RECORD_V3:
                    DataRecord dataRec = (DataRecord) rec;
                    int entryCnt = dataRec.entryCount();
                    for (int i = 0; i < entryCnt; i++) {
                        DataEntry dataEntry = dataRec.get(i);
                        if (!restoreMeta && txManager.uncommitedTx(dataEntry))
                            continue;
                        int cacheId = dataEntry.cacheId();
                        DynamicCacheDescriptor cacheDesc = cctx.cache().cacheDescriptor(cacheId);
                        // Can empty in case recovery node on blt changed.
                        if (cacheDesc == null)
                            continue;
                        stripedApply(() -> {
                            GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
                            if (skipRemovedIndexUpdates(cacheCtx.groupId(), PageIdAllocator.INDEX_PARTITION))
                                cctx.kernalContext().query().markAsRebuildNeeded(cacheCtx, true);
                            try {
                                applyUpdate(cacheCtx, dataEntry);
                            } catch (IgniteCheckedException e) {
                                U.error(log, "Failed to apply data entry, dataEntry=" + dataEntry + ", ptr=" + dataRec.position());
                                applyError.compareAndSet(null, e);
                            }
                            applied.incrementAndGet();
                        }, cacheDesc.groupId(), dataEntry.partitionId(), exec, semaphore);
                    }
                    break;
                case MVCC_TX_RECORD:
                    MvccTxRecord txRecord = (MvccTxRecord) rec;
                    byte txState = convertToTxState(txRecord.state());
                    cctx.coordinators().updateState(txRecord.mvccVersion(), txState, true);
                    break;
                case PART_META_UPDATE_STATE:
                    PartitionMetaStateRecord metaStateRecord = (PartitionMetaStateRecord) rec;
                    GroupPartitionId groupPartitionId = new GroupPartitionId(metaStateRecord.groupId(), metaStateRecord.partitionId());
                    restoreLogicalState.partitionRecoveryStates.put(groupPartitionId, (int) metaStateRecord.state());
                    break;
                case METASTORE_DATA_RECORD:
                    MetastoreDataRecord metastoreDataRecord = (MetastoreDataRecord) rec;
                    metaStorage.applyUpdate(metastoreDataRecord.key(), metastoreDataRecord.value());
                    break;
                case META_PAGE_UPDATE_NEXT_SNAPSHOT_ID:
                case META_PAGE_UPDATE_LAST_SUCCESSFUL_SNAPSHOT_ID:
                case META_PAGE_UPDATE_LAST_SUCCESSFUL_FULL_SNAPSHOT_ID:
                case META_PAGE_UPDATE_LAST_ALLOCATED_INDEX:
                    PageDeltaRecord pageDelta = (PageDeltaRecord) rec;
                    stripedApplyPage((pageMem) -> {
                        try {
                            applyPageDelta(pageMem, pageDelta, false);
                        } catch (IgniteCheckedException e) {
                            U.error(log, "Failed to apply page delta, " + pageDelta);
                            applyError.compareAndSet(null, e);
                        }
                    }, pageDelta.groupId(), partId(pageDelta.pageId()), exec, semaphore);
                    break;
                case MASTER_KEY_CHANGE_RECORD_V2:
                    cctx.kernalContext().encryption().applyKeys((MasterKeyChangeRecordV2) rec);
                    break;
                case REENCRYPTION_START_RECORD:
                    cctx.kernalContext().encryption().applyReencryptionStartRecord((ReencryptionStartRecord) rec);
                    break;
                case INDEX_ROOT_PAGE_RENAME_RECORD:
                    IndexRenameRootPageRecord record = (IndexRenameRootPageRecord) rec;
                    int cacheId = record.cacheId();
                    GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
                    if (cacheCtx != null) {
                        IgniteCacheOffheapManager offheap = cacheCtx.offheap();
                        for (int i = 0; i < record.segments(); i++) offheap.renameRootPageForIndex(cacheId, record.oldTreeName(), record.newTreeName(), i);
                    }
                    break;
                case PARTITION_CLEARING_START_RECORD:
                    PartitionClearingStartRecord rec0 = (PartitionClearingStartRecord) rec;
                    CacheGroupContext grp = this.ctx.cache().cacheGroup(rec0.groupId());
                    if (grp != null) {
                        GridDhtLocalPartition part;
                        try {
                            part = grp.topology().forceCreatePartition(rec0.partitionId());
                        } catch (IgniteCheckedException e) {
                            throw new IgniteException("Cannot get or create a partition [groupId=" + rec0.groupId() + ", partitionId=" + rec0.partitionId() + "]", e);
                        }
                        stripedApply(() -> {
                            try {
                                part.updateClearVersion(rec0.clearVersion());
                                IgniteInternalFuture<?> clearFut = grp.shared().evict().evictPartitionAsync(grp, part, new GridFutureAdapter<>());
                                clearFut.get();
                                part.updateClearVersion();
                            } catch (IgniteCheckedException e) {
                                U.error(log, "Failed to apply partition clearing record, " + rec0);
                                applyError.compareAndSet(null, e);
                            }
                        }, rec0.groupId(), rec0.partitionId(), exec, semaphore);
                    }
                    break;
                default:
            }
        }
    } finally {
        it.close();
        if (!restoreMeta)
            cctx.kernalContext().query().skipFieldLookup(false);
    }
    awaitApplyComplete(exec, applyError);
    if (log.isInfoEnabled())
        log.info("Finished applying WAL changes [updatesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + " ms]");
    for (DatabaseLifecycleListener lsnr : getDatabaseListeners(cctx.kernalContext())) lsnr.afterLogicalUpdatesApplied(this, restoreLogicalState);
    return restoreLogicalState;
}
Also used : WALRecord(org.apache.ignite.internal.pagemem.wal.record.WALRecord) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) PageDeltaRecord(org.apache.ignite.internal.pagemem.wal.record.delta.PageDeltaRecord) Semaphore(java.util.concurrent.Semaphore) MvccTxRecord(org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord) DataEntry(org.apache.ignite.internal.pagemem.wal.record.DataEntry) MvccDataEntry(org.apache.ignite.internal.pagemem.wal.record.MvccDataEntry) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) WALIterator(org.apache.ignite.internal.pagemem.wal.WALIterator) IgniteException(org.apache.ignite.IgniteException) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) DataRecord(org.apache.ignite.internal.pagemem.wal.record.DataRecord) MetastoreDataRecord(org.apache.ignite.internal.pagemem.wal.record.MetastoreDataRecord) GroupPartitionId(org.apache.ignite.internal.processors.cache.persistence.partstate.GroupPartitionId) GridCacheContext(org.apache.ignite.internal.processors.cache.GridCacheContext) PartitionMetaStateRecord(org.apache.ignite.internal.pagemem.wal.record.delta.PartitionMetaStateRecord) DynamicCacheDescriptor(org.apache.ignite.internal.processors.cache.DynamicCacheDescriptor) AtomicReference(java.util.concurrent.atomic.AtomicReference) CheckpointRecord(org.apache.ignite.internal.pagemem.wal.record.CheckpointRecord) IgniteTxManager(org.apache.ignite.internal.processors.cache.transactions.IgniteTxManager) CacheState(org.apache.ignite.internal.pagemem.wal.record.CacheState) TxRecord(org.apache.ignite.internal.pagemem.wal.record.TxRecord) MvccTxRecord(org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IgniteSystemProperties.getInteger(org.apache.ignite.IgniteSystemProperties.getInteger) AtomicLong(java.util.concurrent.atomic.AtomicLong) PartitionClearingStartRecord(org.apache.ignite.internal.pagemem.wal.record.PartitionClearingStartRecord) IgniteCacheOffheapManager(org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManager) StripedExecutor(org.apache.ignite.internal.util.StripedExecutor) MetastoreDataRecord(org.apache.ignite.internal.pagemem.wal.record.MetastoreDataRecord) RollbackRecord(org.apache.ignite.internal.pagemem.wal.record.RollbackRecord) CacheGroupContext(org.apache.ignite.internal.processors.cache.CacheGroupContext) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) IndexRenameRootPageRecord(org.apache.ignite.internal.pagemem.wal.record.IndexRenameRootPageRecord)

Example 7 with GridDhtLocalPartition

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition in project ignite by apache.

the class GridCacheOffheapManager method saveStoreMetadata.

/**
 * @param store Store to save metadata.
 * @throws IgniteCheckedException If failed.
 */
private void saveStoreMetadata(CacheDataStore store, Context ctx, boolean beforeDestroy, boolean needSnapshot) throws IgniteCheckedException {
    RowStore rowStore0 = store.rowStore();
    if (rowStore0 != null && (partitionStatesRestored || grp.isLocal())) {
        ((CacheFreeList) rowStore0.freeList()).saveMetadata(grp.statisticsHolderData());
        PartitionMetaStorage<SimpleDataRow> partStore = store.partStorage();
        long updCntr = store.updateCounter();
        long size = store.fullSize();
        long rmvId = globalRemoveId().get();
        byte[] updCntrsBytes = store.partUpdateCounter().getBytes();
        PageMemoryEx pageMem = (PageMemoryEx) grp.dataRegion().pageMemory();
        IgniteWriteAheadLogManager wal = this.ctx.wal();
        GridEncryptionManager encMgr = this.ctx.kernalContext().encryption();
        if (size > 0 || updCntr > 0 || !store.partUpdateCounter().sequential() || (grp.config().isEncryptionEnabled() && encMgr.getEncryptionState(grp.groupId(), store.partId()) > 0)) {
            GridDhtPartitionState state = null;
            // localPartition will not acquire writeLock here because create=false.
            GridDhtLocalPartition part = null;
            if (!grp.isLocal()) {
                if (beforeDestroy)
                    state = GridDhtPartitionState.EVICTED;
                else {
                    part = getPartition(store);
                    if (part != null && part.state() != GridDhtPartitionState.EVICTED)
                        state = part.state();
                }
                // Do not save meta for evicted partitions on next checkpoints.
                if (state == null)
                    return;
            }
            int grpId = grp.groupId();
            long partMetaId = pageMem.partitionMetaPageId(grpId, store.partId());
            long partMetaPage = pageMem.acquirePage(grpId, partMetaId);
            try {
                long partMetaPageAddr = pageMem.writeLock(grpId, partMetaId, partMetaPage);
                if (partMetaPageAddr == 0L) {
                    U.warn(log, "Failed to acquire write lock for meta page [metaPage=" + partMetaPage + ", beforeDestroy=" + beforeDestroy + ", size=" + size + ", updCntr=" + updCntr + ", state=" + state + ']');
                    return;
                }
                boolean changed = false;
                try {
                    PagePartitionMetaIOV3 io = PageIO.getPageIO(partMetaPageAddr);
                    long link = io.getGapsLink(partMetaPageAddr);
                    if (updCntrsBytes == null && link != 0) {
                        partStore.removeDataRowByLink(link, grp.statisticsHolderData());
                        io.setGapsLink(partMetaPageAddr, (link = 0));
                        changed = true;
                    } else if (updCntrsBytes != null && link == 0) {
                        SimpleDataRow row = new SimpleDataRow(store.partId(), updCntrsBytes);
                        partStore.insertDataRow(row, grp.statisticsHolderData());
                        io.setGapsLink(partMetaPageAddr, (link = row.link()));
                        changed = true;
                    } else if (updCntrsBytes != null && link != 0) {
                        byte[] prev = partStore.readRow(link);
                        assert prev != null : "Read null gaps using link=" + link;
                        if (!Arrays.equals(prev, updCntrsBytes)) {
                            partStore.removeDataRowByLink(link, grp.statisticsHolderData());
                            SimpleDataRow row = new SimpleDataRow(store.partId(), updCntrsBytes);
                            partStore.insertDataRow(row, grp.statisticsHolderData());
                            io.setGapsLink(partMetaPageAddr, (link = row.link()));
                            changed = true;
                        }
                    }
                    if (changed)
                        partStore.saveMetadata(grp.statisticsHolderData());
                    changed |= io.setUpdateCounter(partMetaPageAddr, updCntr);
                    changed |= io.setGlobalRemoveId(partMetaPageAddr, rmvId);
                    changed |= io.setSize(partMetaPageAddr, size);
                    int encryptIdx = 0;
                    int encryptCnt = 0;
                    if (grp.config().isEncryptionEnabled()) {
                        long reencryptState = encMgr.getEncryptionState(grpId, store.partId());
                        if (reencryptState != 0) {
                            encryptIdx = ReencryptStateUtils.pageIndex(reencryptState);
                            encryptCnt = ReencryptStateUtils.pageCount(reencryptState);
                            if (encryptIdx == encryptCnt) {
                                encMgr.setEncryptionState(grp, store.partId(), 0, 0);
                                encryptIdx = encryptCnt = 0;
                            }
                            changed |= io.setEncryptedPageIndex(partMetaPageAddr, encryptIdx);
                            changed |= io.setEncryptedPageCount(partMetaPageAddr, encryptCnt);
                        }
                    }
                    if (state != null)
                        changed |= io.setPartitionState(partMetaPageAddr, (byte) state.ordinal());
                    else
                        assert grp.isLocal() : grp.cacheOrGroupName();
                    long cntrsPageId;
                    if (grp.sharedGroup()) {
                        long initCntrPageId = io.getCountersPageId(partMetaPageAddr);
                        Map<Integer, Long> newSizes = store.cacheSizes();
                        Map<Integer, Long> prevSizes = readSharedGroupCacheSizes(pageMem, grpId, initCntrPageId);
                        if (prevSizes != null && prevSizes.equals(newSizes))
                            // Preventing modification of sizes pages for store
                            cntrsPageId = initCntrPageId;
                        else {
                            cntrsPageId = writeSharedGroupCacheSizes(pageMem, grpId, initCntrPageId, store.partId(), newSizes);
                            if (initCntrPageId == 0 && cntrsPageId != 0) {
                                io.setCountersPageId(partMetaPageAddr, cntrsPageId);
                                changed = true;
                            }
                        }
                    } else
                        cntrsPageId = 0L;
                    int pageCnt;
                    if (needSnapshot) {
                        pageCnt = this.ctx.pageStore().pages(grpId, store.partId());
                        io.setCandidatePageCount(partMetaPageAddr, size == 0 ? 0 : pageCnt);
                        if (state == OWNING) {
                            assert part != null;
                            if (!addPartition(part, ctx.partitionStatMap(), partMetaPageAddr, io, grpId, store.partId(), this.ctx.pageStore().pages(grpId, store.partId()), store.fullSize()))
                                U.warn(log, "Partition was concurrently evicted grpId=" + grpId + ", partitionId=" + part.id());
                        } else if (state == MOVING || state == RENTING) {
                            if (ctx.partitionStatMap().forceSkipIndexPartition(grpId)) {
                                if (log.isInfoEnabled())
                                    log.info("Will not include SQL indexes to snapshot because there is " + "a partition not in " + OWNING + " state [grp=" + grp.cacheOrGroupName() + ", partId=" + store.partId() + ", state=" + state + ']');
                            }
                        }
                        changed = true;
                    } else
                        pageCnt = io.getCandidatePageCount(partMetaPageAddr);
                    if (changed && isWalDeltaRecordNeeded(pageMem, grpId, partMetaId, partMetaPage, wal, null))
                        wal.log(new MetaPageUpdatePartitionDataRecordV3(grpId, partMetaId, updCntr, rmvId, // TODO: Partition size may be long
                        (int) size, cntrsPageId, state == null ? -1 : (byte) state.ordinal(), pageCnt, link, encryptIdx, encryptCnt));
                    if (changed) {
                        partStore.saveMetadata(grp.statisticsHolderData());
                        io.setPartitionMetaStoreReuseListRoot(partMetaPageAddr, partStore.metaPageId());
                    }
                } finally {
                    pageMem.writeUnlock(grpId, partMetaId, partMetaPage, null, changed);
                }
            } finally {
                pageMem.releasePage(grpId, partMetaId, partMetaPage);
            }
        } else if (needSnapshot)
            tryAddEmptyPartitionToSnapshot(store, ctx);
    } else if (needSnapshot)
        tryAddEmptyPartitionToSnapshot(store, ctx);
}
Also used : GridEncryptionManager(org.apache.ignite.internal.managers.encryption.GridEncryptionManager) IgniteWriteAheadLogManager(org.apache.ignite.internal.pagemem.wal.IgniteWriteAheadLogManager) CacheFreeList(org.apache.ignite.internal.processors.cache.persistence.freelist.CacheFreeList) CacheDataRowStore(org.apache.ignite.internal.processors.cache.tree.CacheDataRowStore) PagePartitionMetaIOV3(org.apache.ignite.internal.processors.cache.persistence.tree.io.PagePartitionMetaIOV3) SimpleDataRow(org.apache.ignite.internal.processors.cache.persistence.freelist.SimpleDataRow) MetaPageUpdatePartitionDataRecordV3(org.apache.ignite.internal.pagemem.wal.record.delta.MetaPageUpdatePartitionDataRecordV3) AtomicLong(java.util.concurrent.atomic.AtomicLong) GridDhtPartitionState(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState) PageMemoryEx(org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryEx) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition)

Example 8 with GridDhtLocalPartition

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition in project ignite by apache.

the class GridCacheOffheapManager method preloadPartition.

/**
 * {@inheritDoc}
 */
@Override
public void preloadPartition(int partId) throws IgniteCheckedException {
    if (grp.isLocal()) {
        dataStore(null).preload();
        return;
    }
    GridDhtLocalPartition locPart = grp.topology().localPartition(partId, AffinityTopologyVersion.NONE, false, false);
    assert locPart != null && locPart.reservations() > 0;
    locPart.dataStore().preload();
}
Also used : GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition)

Example 9 with GridDhtLocalPartition

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition in project ignite by apache.

the class GridCacheOffheapManager method restoreStateOfPartition.

/**
 * {@inheritDoc}
 */
@Override
public long restoreStateOfPartition(int p, @Nullable Integer recoveryState) throws IgniteCheckedException {
    if (grp.isLocal() || !grp.affinityNode() || !grp.dataRegion().config().isPersistenceEnabled() || partitionStatesRestored)
        return 0;
    PageMemoryEx pageMem = (PageMemoryEx) grp.dataRegion().pageMemory();
    long startTime = U.currentTimeMillis();
    long res = 0;
    if (log.isDebugEnabled())
        log.debug("Started restoring partition state [grp=" + grp.cacheOrGroupName() + ", p=" + p + ']');
    if (ctx.pageStore().exists(grp.groupId(), p)) {
        ctx.pageStore().ensure(grp.groupId(), p);
        if (ctx.pageStore().pages(grp.groupId(), p) <= 1) {
            if (log.isDebugEnabled()) {
                log.debug("Skipping partition on recovery (pages less than or equals 1) " + "[grp=" + grp.cacheOrGroupName() + ", p=" + p + ']');
            }
            return 0;
        }
        if (log.isDebugEnabled()) {
            log.debug("Creating partition on recovery (exists in page store) " + "[grp=" + grp.cacheOrGroupName() + ", p=" + p + ']');
        }
        GridDhtLocalPartition part = grp.topology().forceCreatePartition(p);
        // Triggers initialization of existing(having datafile) partition before acquiring cp read lock.
        part.dataStore().init();
        ctx.database().checkpointReadLock();
        try {
            long partMetaId = pageMem.partitionMetaPageId(grp.groupId(), p);
            long partMetaPage = pageMem.acquirePage(grp.groupId(), partMetaId);
            try {
                long pageAddr = pageMem.writeLock(grp.groupId(), partMetaId, partMetaPage);
                boolean changed = false;
                try {
                    PagePartitionMetaIO io = PagePartitionMetaIO.VERSIONS.forPage(pageAddr);
                    if (recoveryState != null) {
                        changed = io.setPartitionState(pageAddr, (byte) recoveryState.intValue());
                        updateState(part, recoveryState);
                        if (log.isDebugEnabled()) {
                            log.debug("Restored partition state (from WAL) " + "[grp=" + grp.cacheOrGroupName() + ", p=" + p + ", state=" + part.state() + ", updCntr=" + part.initialUpdateCounter() + ", size=" + part.fullSize() + ']');
                        }
                    } else {
                        int stateId = io.getPartitionState(pageAddr);
                        updateState(part, stateId);
                        if (log.isDebugEnabled()) {
                            log.debug("Restored partition state (from page memory) " + "[grp=" + grp.cacheOrGroupName() + ", p=" + p + ", state=" + part.state() + ", updCntr=" + part.initialUpdateCounter() + ", stateId=" + stateId + ", size=" + part.fullSize() + ']');
                        }
                    }
                } finally {
                    pageMem.writeUnlock(grp.groupId(), partMetaId, partMetaPage, null, changed);
                }
            } finally {
                pageMem.releasePage(grp.groupId(), partMetaId, partMetaPage);
            }
        } finally {
            ctx.database().checkpointReadUnlock();
        }
        res = U.currentTimeMillis() - startTime;
    } else if (recoveryState != null) {
        // Pre-create partition if having valid state.
        GridDhtLocalPartition part = grp.topology().forceCreatePartition(p);
        updateState(part, recoveryState);
        res = U.currentTimeMillis() - startTime;
        if (log.isDebugEnabled()) {
            log.debug("Restored partition state (from WAL) " + "[grp=" + grp.cacheOrGroupName() + ", p=" + p + ", state=" + part.state() + ", updCntr=" + part.initialUpdateCounter() + ", size=" + part.fullSize() + ']');
        }
    } else {
        if (log.isDebugEnabled()) {
            log.debug("Skipping partition on recovery (no page store OR wal state) " + "[grp=" + grp.cacheOrGroupName() + ", p=" + p + ']');
        }
    }
    if (log.isDebugEnabled()) {
        log.debug("Finished restoring partition state " + "[grp=" + grp.cacheOrGroupName() + ", p=" + p + ", time=" + U.humanReadableDuration(U.currentTimeMillis() - startTime) + ']');
    }
    return res;
}
Also used : PageMemoryEx(org.apache.ignite.internal.processors.cache.persistence.pagemem.PageMemoryEx) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) PagePartitionMetaIO(org.apache.ignite.internal.processors.cache.persistence.tree.io.PagePartitionMetaIO)

Example 10 with GridDhtLocalPartition

use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition in project ignite by apache.

the class IgniteTxLocalAdapter method calculatePartitionUpdateCounters.

/**
 * Calculates partition update counters for current transaction. Each partition will be supplied with
 * pair (init, delta) values, where init - initial update counter, and delta - updates count made
 * by current transaction for a given partition.
 */
public void calculatePartitionUpdateCounters() throws IgniteTxRollbackCheckedException {
    TxCounters counters = txCounters(false);
    if (counters != null && F.isEmpty(counters.updateCounters())) {
        List<PartitionUpdateCountersMessage> cntrMsgs = new ArrayList<>();
        for (Map.Entry<Integer, Map<Integer, AtomicLong>> record : counters.accumulatedUpdateCounters().entrySet()) {
            int cacheId = record.getKey();
            Map<Integer, AtomicLong> partToCntrs = record.getValue();
            assert partToCntrs != null;
            if (F.isEmpty(partToCntrs))
                continue;
            PartitionUpdateCountersMessage msg = new PartitionUpdateCountersMessage(cacheId, partToCntrs.size());
            GridCacheContext ctx0 = cctx.cacheContext(cacheId);
            GridDhtPartitionTopology top = ctx0.topology();
            assert top != null;
            for (Map.Entry<Integer, AtomicLong> e : partToCntrs.entrySet()) {
                AtomicLong acc = e.getValue();
                assert acc != null;
                long cntr = acc.get();
                assert cntr >= 0;
                if (cntr != 0) {
                    int p = e.getKey();
                    GridDhtLocalPartition part = top.localPartition(p);
                    // Verify primary tx mapping.
                    // LOST state is possible if tx is started over LOST partition.
                    boolean valid = part != null && (part.state() == OWNING || part.state() == LOST) && part.primary(top.readyTopologyVersion());
                    if (!valid) {
                        // Local node is no longer primary for the partition, need to rollback a transaction.
                        if (part != null && !part.primary(top.readyTopologyVersion())) {
                            log.warning("Failed to prepare a transaction on outdated topology, rolling back " + "[tx=" + CU.txString(this) + ", readyTopVer=" + top.readyTopologyVersion() + ", lostParts=" + top.lostPartitions() + ", part=" + part.toString() + ']');
                            throw new IgniteTxRollbackCheckedException("Failed to prepare a transaction on outdated " + "topology, please try again [timeout=" + timeout() + ", tx=" + CU.txString(this) + ']');
                        }
                        // Trigger error.
                        throw new AssertionError("Invalid primary mapping [tx=" + CU.txString(this) + ", readyTopVer=" + top.readyTopologyVersion() + ", lostParts=" + top.lostPartitions() + ", part=" + (part == null ? "NULL" : part.toString()) + ']');
                    }
                    msg.add(p, part.getAndIncrementUpdateCounter(cntr), cntr);
                }
            }
            if (msg.size() > 0)
                cntrMsgs.add(msg);
        }
        counters.updateCounters(cntrMsgs);
    }
}
Also used : GridCacheContext(org.apache.ignite.internal.processors.cache.GridCacheContext) GridDhtPartitionTopology(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology) ArrayList(java.util.ArrayList) IgniteTxRollbackCheckedException(org.apache.ignite.internal.transactions.IgniteTxRollbackCheckedException) AtomicLong(java.util.concurrent.atomic.AtomicLong) PartitionUpdateCountersMessage(org.apache.ignite.internal.processors.cache.distributed.dht.PartitionUpdateCountersMessage) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) Map(java.util.Map)

Aggregations

GridDhtLocalPartition (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition)95 GridDhtPartitionTopology (org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionTopology)21 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)19 IgniteEx (org.apache.ignite.internal.IgniteEx)19 CacheGroupContext (org.apache.ignite.internal.processors.cache.CacheGroupContext)19 ArrayList (java.util.ArrayList)18 Map (java.util.Map)18 Test (org.junit.Test)18 AffinityTopologyVersion (org.apache.ignite.internal.processors.affinity.AffinityTopologyVersion)16 GridCommonAbstractTest (org.apache.ignite.testframework.junits.common.GridCommonAbstractTest)16 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)15 ClusterNode (org.apache.ignite.cluster.ClusterNode)15 GridCacheContext (org.apache.ignite.internal.processors.cache.GridCacheContext)15 HashMap (java.util.HashMap)14 HashSet (java.util.HashSet)13 AtomicLong (java.util.concurrent.atomic.AtomicLong)13 CacheDataRow (org.apache.ignite.internal.processors.cache.persistence.CacheDataRow)13 Ignite (org.apache.ignite.Ignite)12 KeyCacheObject (org.apache.ignite.internal.processors.cache.KeyCacheObject)12 IgniteException (org.apache.ignite.IgniteException)11