Search in sources :

Example 1 with GroupPartitionId

use of org.apache.ignite.internal.processors.cache.persistence.partstate.GroupPartitionId in project ignite by apache.

the class GridCacheDatabaseSharedManager method applyLogicalUpdates.

/**
 * @param status Last registered checkpoint status.
 * @param restoreMeta Metastore restore phase if {@code true}.
 * @throws IgniteCheckedException If failed to apply updates.
 * @throws StorageException If IO exception occurred while reading write-ahead log.
 */
private RestoreLogicalState applyLogicalUpdates(CheckpointStatus status, IgnitePredicate<Integer> cacheGroupsPredicate, IgniteBiPredicate<WALRecord.RecordType, WALPointer> recordTypePredicate, boolean restoreMeta) throws IgniteCheckedException {
    if (log.isInfoEnabled())
        log.info("Applying lost " + (restoreMeta ? "metastore" : "cache") + " updates since last checkpoint record [lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
    if (!restoreMeta)
        cctx.kernalContext().query().skipFieldLookup(true);
    long start = U.currentTimeMillis();
    AtomicReference<Throwable> applyError = new AtomicReference<>();
    AtomicLong applied = new AtomicLong();
    long lastArchivedSegment = cctx.wal().lastArchivedSegment();
    StripedExecutor exec = cctx.kernalContext().pools().getStripedExecutorService();
    Semaphore semaphore = new Semaphore(semaphorePertmits(exec));
    Map<GroupPartitionId, Integer> partitionRecoveryStates = new HashMap<>();
    WALIterator it = cctx.wal().replay(status.startPtr, recordTypePredicate);
    RestoreLogicalState restoreLogicalState = new RestoreLogicalState(status, it, lastArchivedSegment, cacheGroupsPredicate, partitionRecoveryStates);
    final IgniteTxManager txManager = cctx.tm();
    try {
        while (restoreLogicalState.hasNext()) {
            WALRecord rec = restoreLogicalState.next();
            if (rec == null)
                break;
            switch(rec.type()) {
                case TX_RECORD:
                    if (restoreMeta) {
                        // Also restore tx states.
                        TxRecord txRec = (TxRecord) rec;
                        txManager.collectTxStates(txRec);
                    }
                    break;
                case // Calculate initial partition states
                CHECKPOINT_RECORD:
                    CheckpointRecord cpRec = (CheckpointRecord) rec;
                    for (Map.Entry<Integer, CacheState> entry : cpRec.cacheGroupStates().entrySet()) {
                        CacheState cacheState = entry.getValue();
                        for (int i = 0; i < cacheState.size(); i++) {
                            int partId = cacheState.partitionByIndex(i);
                            byte state = cacheState.stateByIndex(i);
                            // Ignore undefined state.
                            if (state != -1) {
                                partitionRecoveryStates.put(new GroupPartitionId(entry.getKey(), partId), (int) state);
                            }
                        }
                    }
                    break;
                case ROLLBACK_TX_RECORD:
                    RollbackRecord rbRec = (RollbackRecord) rec;
                    CacheGroupContext ctx = cctx.cache().cacheGroup(rbRec.groupId());
                    if (ctx != null && !ctx.isLocal()) {
                        GridDhtLocalPartition part = ctx.topology().forceCreatePartition(rbRec.partitionId());
                        ctx.offheap().dataStore(part).updateInitialCounter(rbRec.start(), rbRec.range());
                    }
                    break;
                case MVCC_DATA_RECORD:
                case DATA_RECORD:
                case DATA_RECORD_V2:
                case ENCRYPTED_DATA_RECORD:
                case ENCRYPTED_DATA_RECORD_V2:
                case ENCRYPTED_DATA_RECORD_V3:
                    DataRecord dataRec = (DataRecord) rec;
                    int entryCnt = dataRec.entryCount();
                    for (int i = 0; i < entryCnt; i++) {
                        DataEntry dataEntry = dataRec.get(i);
                        if (!restoreMeta && txManager.uncommitedTx(dataEntry))
                            continue;
                        int cacheId = dataEntry.cacheId();
                        DynamicCacheDescriptor cacheDesc = cctx.cache().cacheDescriptor(cacheId);
                        // Can empty in case recovery node on blt changed.
                        if (cacheDesc == null)
                            continue;
                        stripedApply(() -> {
                            GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
                            if (skipRemovedIndexUpdates(cacheCtx.groupId(), PageIdAllocator.INDEX_PARTITION))
                                cctx.kernalContext().query().markAsRebuildNeeded(cacheCtx, true);
                            try {
                                applyUpdate(cacheCtx, dataEntry);
                            } catch (IgniteCheckedException e) {
                                U.error(log, "Failed to apply data entry, dataEntry=" + dataEntry + ", ptr=" + dataRec.position());
                                applyError.compareAndSet(null, e);
                            }
                            applied.incrementAndGet();
                        }, cacheDesc.groupId(), dataEntry.partitionId(), exec, semaphore);
                    }
                    break;
                case MVCC_TX_RECORD:
                    MvccTxRecord txRecord = (MvccTxRecord) rec;
                    byte txState = convertToTxState(txRecord.state());
                    cctx.coordinators().updateState(txRecord.mvccVersion(), txState, true);
                    break;
                case PART_META_UPDATE_STATE:
                    PartitionMetaStateRecord metaStateRecord = (PartitionMetaStateRecord) rec;
                    GroupPartitionId groupPartitionId = new GroupPartitionId(metaStateRecord.groupId(), metaStateRecord.partitionId());
                    restoreLogicalState.partitionRecoveryStates.put(groupPartitionId, (int) metaStateRecord.state());
                    break;
                case METASTORE_DATA_RECORD:
                    MetastoreDataRecord metastoreDataRecord = (MetastoreDataRecord) rec;
                    metaStorage.applyUpdate(metastoreDataRecord.key(), metastoreDataRecord.value());
                    break;
                case META_PAGE_UPDATE_NEXT_SNAPSHOT_ID:
                case META_PAGE_UPDATE_LAST_SUCCESSFUL_SNAPSHOT_ID:
                case META_PAGE_UPDATE_LAST_SUCCESSFUL_FULL_SNAPSHOT_ID:
                case META_PAGE_UPDATE_LAST_ALLOCATED_INDEX:
                    PageDeltaRecord pageDelta = (PageDeltaRecord) rec;
                    stripedApplyPage((pageMem) -> {
                        try {
                            applyPageDelta(pageMem, pageDelta, false);
                        } catch (IgniteCheckedException e) {
                            U.error(log, "Failed to apply page delta, " + pageDelta);
                            applyError.compareAndSet(null, e);
                        }
                    }, pageDelta.groupId(), partId(pageDelta.pageId()), exec, semaphore);
                    break;
                case MASTER_KEY_CHANGE_RECORD_V2:
                    cctx.kernalContext().encryption().applyKeys((MasterKeyChangeRecordV2) rec);
                    break;
                case REENCRYPTION_START_RECORD:
                    cctx.kernalContext().encryption().applyReencryptionStartRecord((ReencryptionStartRecord) rec);
                    break;
                case INDEX_ROOT_PAGE_RENAME_RECORD:
                    IndexRenameRootPageRecord record = (IndexRenameRootPageRecord) rec;
                    int cacheId = record.cacheId();
                    GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
                    if (cacheCtx != null) {
                        IgniteCacheOffheapManager offheap = cacheCtx.offheap();
                        for (int i = 0; i < record.segments(); i++) offheap.renameRootPageForIndex(cacheId, record.oldTreeName(), record.newTreeName(), i);
                    }
                    break;
                case PARTITION_CLEARING_START_RECORD:
                    PartitionClearingStartRecord rec0 = (PartitionClearingStartRecord) rec;
                    CacheGroupContext grp = this.ctx.cache().cacheGroup(rec0.groupId());
                    if (grp != null) {
                        GridDhtLocalPartition part;
                        try {
                            part = grp.topology().forceCreatePartition(rec0.partitionId());
                        } catch (IgniteCheckedException e) {
                            throw new IgniteException("Cannot get or create a partition [groupId=" + rec0.groupId() + ", partitionId=" + rec0.partitionId() + "]", e);
                        }
                        stripedApply(() -> {
                            try {
                                part.updateClearVersion(rec0.clearVersion());
                                IgniteInternalFuture<?> clearFut = grp.shared().evict().evictPartitionAsync(grp, part, new GridFutureAdapter<>());
                                clearFut.get();
                                part.updateClearVersion();
                            } catch (IgniteCheckedException e) {
                                U.error(log, "Failed to apply partition clearing record, " + rec0);
                                applyError.compareAndSet(null, e);
                            }
                        }, rec0.groupId(), rec0.partitionId(), exec, semaphore);
                    }
                    break;
                default:
            }
        }
    } finally {
        it.close();
        if (!restoreMeta)
            cctx.kernalContext().query().skipFieldLookup(false);
    }
    awaitApplyComplete(exec, applyError);
    if (log.isInfoEnabled())
        log.info("Finished applying WAL changes [updatesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + " ms]");
    for (DatabaseLifecycleListener lsnr : getDatabaseListeners(cctx.kernalContext())) lsnr.afterLogicalUpdatesApplied(this, restoreLogicalState);
    return restoreLogicalState;
}
Also used : WALRecord(org.apache.ignite.internal.pagemem.wal.record.WALRecord) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) PageDeltaRecord(org.apache.ignite.internal.pagemem.wal.record.delta.PageDeltaRecord) Semaphore(java.util.concurrent.Semaphore) MvccTxRecord(org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord) DataEntry(org.apache.ignite.internal.pagemem.wal.record.DataEntry) MvccDataEntry(org.apache.ignite.internal.pagemem.wal.record.MvccDataEntry) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) WALIterator(org.apache.ignite.internal.pagemem.wal.WALIterator) IgniteException(org.apache.ignite.IgniteException) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) DataRecord(org.apache.ignite.internal.pagemem.wal.record.DataRecord) MetastoreDataRecord(org.apache.ignite.internal.pagemem.wal.record.MetastoreDataRecord) GroupPartitionId(org.apache.ignite.internal.processors.cache.persistence.partstate.GroupPartitionId) GridCacheContext(org.apache.ignite.internal.processors.cache.GridCacheContext) PartitionMetaStateRecord(org.apache.ignite.internal.pagemem.wal.record.delta.PartitionMetaStateRecord) DynamicCacheDescriptor(org.apache.ignite.internal.processors.cache.DynamicCacheDescriptor) AtomicReference(java.util.concurrent.atomic.AtomicReference) CheckpointRecord(org.apache.ignite.internal.pagemem.wal.record.CheckpointRecord) IgniteTxManager(org.apache.ignite.internal.processors.cache.transactions.IgniteTxManager) CacheState(org.apache.ignite.internal.pagemem.wal.record.CacheState) TxRecord(org.apache.ignite.internal.pagemem.wal.record.TxRecord) MvccTxRecord(org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IgniteSystemProperties.getInteger(org.apache.ignite.IgniteSystemProperties.getInteger) AtomicLong(java.util.concurrent.atomic.AtomicLong) PartitionClearingStartRecord(org.apache.ignite.internal.pagemem.wal.record.PartitionClearingStartRecord) IgniteCacheOffheapManager(org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManager) StripedExecutor(org.apache.ignite.internal.util.StripedExecutor) MetastoreDataRecord(org.apache.ignite.internal.pagemem.wal.record.MetastoreDataRecord) RollbackRecord(org.apache.ignite.internal.pagemem.wal.record.RollbackRecord) CacheGroupContext(org.apache.ignite.internal.processors.cache.CacheGroupContext) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) IndexRenameRootPageRecord(org.apache.ignite.internal.pagemem.wal.record.IndexRenameRootPageRecord)

Example 2 with GroupPartitionId

use of org.apache.ignite.internal.processors.cache.persistence.partstate.GroupPartitionId in project ignite by apache.

the class CheckpointHistory method searchCheckpointEntry.

/**
 * Tries to search for a WAL pointer for the given partition counter start.
 *
 * @param searchCntrMap Search map contains (Group Id, partition, counter).
 * @return Map of group-partition on checkpoint entry or empty map if nothing found.
 */
public Map<GroupPartitionId, CheckpointEntry> searchCheckpointEntry(Map<T2<Integer, Integer>, Long> searchCntrMap) {
    if (F.isEmpty(searchCntrMap))
        return Collections.emptyMap();
    Map<T2<Integer, Integer>, Long> modifiedSearchMap = new HashMap<>(searchCntrMap);
    Map<GroupPartitionId, CheckpointEntry> res = new HashMap<>();
    for (Long cpTs : checkpoints(true)) {
        try {
            CheckpointEntry cpEntry = entry(cpTs);
            Iterator<Map.Entry<T2<Integer, Integer>, Long>> iter = modifiedSearchMap.entrySet().iterator();
            while (iter.hasNext()) {
                Map.Entry<T2<Integer, Integer>, Long> entry = iter.next();
                Long foundCntr = cpEntry.partitionCounter(wal, entry.getKey().get1(), entry.getKey().get2());
                if (foundCntr != null && foundCntr <= entry.getValue()) {
                    iter.remove();
                    res.put(new GroupPartitionId(entry.getKey().get1(), entry.getKey().get2()), cpEntry);
                }
            }
            if (F.isEmpty(modifiedSearchMap))
                return res;
        } catch (IgniteCheckedException e) {
            log.warning("Checkpoint data is unavailable in WAL [cpTs=" + U.format(cpTs) + ']', e);
            break;
        }
    }
    if (!F.isEmpty(modifiedSearchMap))
        return Collections.emptyMap();
    return res;
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) NavigableMap(java.util.NavigableMap) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) T2(org.apache.ignite.internal.util.typedef.T2) GroupPartitionId(org.apache.ignite.internal.processors.cache.persistence.partstate.GroupPartitionId)

Example 3 with GroupPartitionId

use of org.apache.ignite.internal.processors.cache.persistence.partstate.GroupPartitionId in project ignite by apache.

the class CheckpointHistory method addCpGroupStatesToEarliestCpMap.

/**
 * Add last checkpoint to map of the earliest checkpoints.
 *
 * @param entry Checkpoint entry.
 * @param cacheGrpStates Group states map.
 */
private void addCpGroupStatesToEarliestCpMap(CheckpointEntry entry, Map<Integer, GroupState> cacheGrpStates) {
    for (Integer grpId : cacheGrpStates.keySet()) {
        GroupState grpState = cacheGrpStates.get(grpId);
        for (int pIdx = 0; pIdx < grpState.size(); pIdx++) {
            int part = grpState.getPartitionByIndex(pIdx);
            GroupPartitionId grpPartKey = new GroupPartitionId(grpId, part);
            addPartitionToEarliestCheckpoints(grpPartKey, entry);
        }
    }
}
Also used : GroupState(org.apache.ignite.internal.processors.cache.persistence.checkpoint.CheckpointEntry.GroupState) GroupPartitionId(org.apache.ignite.internal.processors.cache.persistence.partstate.GroupPartitionId)

Example 4 with GroupPartitionId

use of org.apache.ignite.internal.processors.cache.persistence.partstate.GroupPartitionId in project ignite by apache.

the class CheckpointHistory method addCpCacheStatesToEarliestCpMap.

/**
 * Add last checkpoint to map of the earliest checkpoints.
 *
 * @param entry Checkpoint entry.
 * @param cacheStates Cache states map.
 */
private void addCpCacheStatesToEarliestCpMap(CheckpointEntry entry, Map<Integer, CacheState> cacheStates) {
    for (Integer grpId : cacheStates.keySet()) {
        CacheState cacheState = cacheStates.get(grpId);
        for (int pIdx = 0; pIdx < cacheState.size(); pIdx++) {
            int part = cacheState.partitionByIndex(pIdx);
            GroupPartitionId grpPartKey = new GroupPartitionId(grpId, part);
            addPartitionToEarliestCheckpoints(grpPartKey, entry);
        }
    }
}
Also used : CacheState(org.apache.ignite.internal.pagemem.wal.record.CacheState) GroupPartitionId(org.apache.ignite.internal.processors.cache.persistence.partstate.GroupPartitionId)

Example 5 with GroupPartitionId

use of org.apache.ignite.internal.processors.cache.persistence.partstate.GroupPartitionId in project ignite by apache.

the class CheckpointHistory method searchAndReserveCheckpoints.

/**
 * Finds and reserves earliest valid checkpoint for each of given groups and partitions.
 *
 * @param groupsAndPartitions Groups and partitions to find and reserve earliest valid checkpoint.
 * @return Checkpoint history reult: Map (groupId, Reason (the reason why reservation cannot be made deeper): Map
 * (partitionId, earliest valid checkpoint to history search)) and reserved checkpoint.
 */
public CheckpointHistoryResult searchAndReserveCheckpoints(final Map<Integer, Set<Integer>> groupsAndPartitions) {
    if (F.isEmpty(groupsAndPartitions) || reservationDisabled)
        return new CheckpointHistoryResult(Collections.emptyMap(), null);
    final Map<Integer, T2<ReservationReason, Map<Integer, CheckpointEntry>>> res = new HashMap<>();
    CheckpointEntry oldestCpForReservation = null;
    synchronized (earliestCp) {
        CheckpointEntry oldestHistCpEntry = firstCheckpoint();
        for (Integer grpId : groupsAndPartitions.keySet()) {
            CheckpointEntry oldestGrpCpEntry = null;
            for (Integer part : groupsAndPartitions.get(grpId)) {
                CheckpointEntry cpEntry = earliestCp.get(new GroupPartitionId(grpId, part));
                if (cpEntry == null)
                    continue;
                if (oldestCpForReservation == null || oldestCpForReservation.timestamp() > cpEntry.timestamp())
                    oldestCpForReservation = cpEntry;
                if (oldestGrpCpEntry == null || oldestGrpCpEntry.timestamp() > cpEntry.timestamp())
                    oldestGrpCpEntry = cpEntry;
                res.computeIfAbsent(grpId, partCpMap -> new T2<>(ReservationReason.NO_MORE_HISTORY, new HashMap<>())).get2().put(part, cpEntry);
            }
            if (oldestGrpCpEntry == null || oldestGrpCpEntry != oldestHistCpEntry)
                res.computeIfAbsent(grpId, (partCpMap) -> new T2<>(ReservationReason.CHECKPOINT_NOT_APPLICABLE, null)).set1(ReservationReason.CHECKPOINT_NOT_APPLICABLE);
        }
    }
    if (oldestCpForReservation != null) {
        if (!wal.reserve(oldestCpForReservation.checkpointMark())) {
            log.warning("Could not reserve cp " + oldestCpForReservation.checkpointMark());
            for (Map.Entry<Integer, T2<ReservationReason, Map<Integer, CheckpointEntry>>> entry : res.entrySet()) entry.setValue(new T2<>(ReservationReason.WAL_RESERVATION_ERROR, null));
            oldestCpForReservation = null;
        }
    }
    return new CheckpointHistoryResult(res, oldestCpForReservation);
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) NavigableMap(java.util.NavigableMap) ConcurrentSkipListMap(java.util.concurrent.ConcurrentSkipListMap) T2(org.apache.ignite.internal.util.typedef.T2) GroupPartitionId(org.apache.ignite.internal.processors.cache.persistence.partstate.GroupPartitionId)

Aggregations

GroupPartitionId (org.apache.ignite.internal.processors.cache.persistence.partstate.GroupPartitionId)19 Map (java.util.Map)10 IgniteException (org.apache.ignite.IgniteException)10 File (java.io.File)9 HashMap (java.util.HashMap)9 Set (java.util.Set)7 UUID (java.util.UUID)7 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)7 IgniteEx (org.apache.ignite.internal.IgniteEx)7 HashSet (java.util.HashSet)6 Test (org.junit.Test)6 ArrayList (java.util.ArrayList)5 Collections (java.util.Collections)5 List (java.util.List)5 CountDownLatch (java.util.concurrent.CountDownLatch)5 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)5 IgniteInterruptedCheckedException (org.apache.ignite.internal.IgniteInterruptedCheckedException)5 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)4 IgniteInternalFuture (org.apache.ignite.internal.IgniteInternalFuture)4 GridCacheSharedContext (org.apache.ignite.internal.processors.cache.GridCacheSharedContext)4