Search in sources :

Example 1 with MvccTxRecord

use of org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord in project ignite by apache.

the class TxRecordSerializer method readMvccTx.

/**
 * Reads {@link MvccTxRecord} from given input.
 *
 * @param in Input
 * @return MvccTxRecord.
 * @throws IOException In case of fail.
 */
public MvccTxRecord readMvccTx(ByteBufferBackedDataInput in) throws IOException {
    byte txState = in.readByte();
    TransactionState state = TransactionState.fromOrdinal(txState);
    GridCacheVersion nearXidVer = RecordV1Serializer.readVersion(in, true);
    GridCacheVersion writeVer = RecordV1Serializer.readVersion(in, true);
    MvccVersion mvccVer = readMvccVersion(in);
    int participatingNodesSize = in.readInt();
    Map<Short, Collection<Short>> participatingNodes = U.newHashMap(participatingNodesSize);
    for (int i = 0; i < participatingNodesSize; i++) {
        short primaryNode = in.readShort();
        int backupNodesSize = in.readInt();
        Collection<Short> backupNodes = new ArrayList<>(backupNodesSize);
        for (int j = 0; j < backupNodesSize; j++) {
            short backupNode = in.readShort();
            backupNodes.add(backupNode);
        }
        participatingNodes.put(primaryNode, backupNodes);
    }
    long ts = in.readLong();
    return new MvccTxRecord(state, nearXidVer, writeVer, participatingNodes, mvccVer, ts);
}
Also used : TransactionState(org.apache.ignite.transactions.TransactionState) ArrayList(java.util.ArrayList) MvccTxRecord(org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord) GridCacheVersion(org.apache.ignite.internal.processors.cache.version.GridCacheVersion) MvccVersion(org.apache.ignite.internal.processors.cache.mvcc.MvccVersion) Collection(java.util.Collection)

Example 2 with MvccTxRecord

use of org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord in project ignite by apache.

the class GridCacheDatabaseSharedManager method applyLogicalUpdates.

/**
 * @param status Last registered checkpoint status.
 * @param restoreMeta Metastore restore phase if {@code true}.
 * @throws IgniteCheckedException If failed to apply updates.
 * @throws StorageException If IO exception occurred while reading write-ahead log.
 */
private RestoreLogicalState applyLogicalUpdates(CheckpointStatus status, IgnitePredicate<Integer> cacheGroupsPredicate, IgniteBiPredicate<WALRecord.RecordType, WALPointer> recordTypePredicate, boolean restoreMeta) throws IgniteCheckedException {
    if (log.isInfoEnabled())
        log.info("Applying lost " + (restoreMeta ? "metastore" : "cache") + " updates since last checkpoint record [lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
    if (!restoreMeta)
        cctx.kernalContext().query().skipFieldLookup(true);
    long start = U.currentTimeMillis();
    AtomicReference<Throwable> applyError = new AtomicReference<>();
    AtomicLong applied = new AtomicLong();
    long lastArchivedSegment = cctx.wal().lastArchivedSegment();
    StripedExecutor exec = cctx.kernalContext().pools().getStripedExecutorService();
    Semaphore semaphore = new Semaphore(semaphorePertmits(exec));
    Map<GroupPartitionId, Integer> partitionRecoveryStates = new HashMap<>();
    WALIterator it = cctx.wal().replay(status.startPtr, recordTypePredicate);
    RestoreLogicalState restoreLogicalState = new RestoreLogicalState(status, it, lastArchivedSegment, cacheGroupsPredicate, partitionRecoveryStates);
    final IgniteTxManager txManager = cctx.tm();
    try {
        while (restoreLogicalState.hasNext()) {
            WALRecord rec = restoreLogicalState.next();
            if (rec == null)
                break;
            switch(rec.type()) {
                case TX_RECORD:
                    if (restoreMeta) {
                        // Also restore tx states.
                        TxRecord txRec = (TxRecord) rec;
                        txManager.collectTxStates(txRec);
                    }
                    break;
                case // Calculate initial partition states
                CHECKPOINT_RECORD:
                    CheckpointRecord cpRec = (CheckpointRecord) rec;
                    for (Map.Entry<Integer, CacheState> entry : cpRec.cacheGroupStates().entrySet()) {
                        CacheState cacheState = entry.getValue();
                        for (int i = 0; i < cacheState.size(); i++) {
                            int partId = cacheState.partitionByIndex(i);
                            byte state = cacheState.stateByIndex(i);
                            // Ignore undefined state.
                            if (state != -1) {
                                partitionRecoveryStates.put(new GroupPartitionId(entry.getKey(), partId), (int) state);
                            }
                        }
                    }
                    break;
                case ROLLBACK_TX_RECORD:
                    RollbackRecord rbRec = (RollbackRecord) rec;
                    CacheGroupContext ctx = cctx.cache().cacheGroup(rbRec.groupId());
                    if (ctx != null && !ctx.isLocal()) {
                        GridDhtLocalPartition part = ctx.topology().forceCreatePartition(rbRec.partitionId());
                        ctx.offheap().dataStore(part).updateInitialCounter(rbRec.start(), rbRec.range());
                    }
                    break;
                case MVCC_DATA_RECORD:
                case DATA_RECORD:
                case DATA_RECORD_V2:
                case ENCRYPTED_DATA_RECORD:
                case ENCRYPTED_DATA_RECORD_V2:
                case ENCRYPTED_DATA_RECORD_V3:
                    DataRecord dataRec = (DataRecord) rec;
                    int entryCnt = dataRec.entryCount();
                    for (int i = 0; i < entryCnt; i++) {
                        DataEntry dataEntry = dataRec.get(i);
                        if (!restoreMeta && txManager.uncommitedTx(dataEntry))
                            continue;
                        int cacheId = dataEntry.cacheId();
                        DynamicCacheDescriptor cacheDesc = cctx.cache().cacheDescriptor(cacheId);
                        // Can empty in case recovery node on blt changed.
                        if (cacheDesc == null)
                            continue;
                        stripedApply(() -> {
                            GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
                            if (skipRemovedIndexUpdates(cacheCtx.groupId(), PageIdAllocator.INDEX_PARTITION))
                                cctx.kernalContext().query().markAsRebuildNeeded(cacheCtx, true);
                            try {
                                applyUpdate(cacheCtx, dataEntry);
                            } catch (IgniteCheckedException e) {
                                U.error(log, "Failed to apply data entry, dataEntry=" + dataEntry + ", ptr=" + dataRec.position());
                                applyError.compareAndSet(null, e);
                            }
                            applied.incrementAndGet();
                        }, cacheDesc.groupId(), dataEntry.partitionId(), exec, semaphore);
                    }
                    break;
                case MVCC_TX_RECORD:
                    MvccTxRecord txRecord = (MvccTxRecord) rec;
                    byte txState = convertToTxState(txRecord.state());
                    cctx.coordinators().updateState(txRecord.mvccVersion(), txState, true);
                    break;
                case PART_META_UPDATE_STATE:
                    PartitionMetaStateRecord metaStateRecord = (PartitionMetaStateRecord) rec;
                    GroupPartitionId groupPartitionId = new GroupPartitionId(metaStateRecord.groupId(), metaStateRecord.partitionId());
                    restoreLogicalState.partitionRecoveryStates.put(groupPartitionId, (int) metaStateRecord.state());
                    break;
                case METASTORE_DATA_RECORD:
                    MetastoreDataRecord metastoreDataRecord = (MetastoreDataRecord) rec;
                    metaStorage.applyUpdate(metastoreDataRecord.key(), metastoreDataRecord.value());
                    break;
                case META_PAGE_UPDATE_NEXT_SNAPSHOT_ID:
                case META_PAGE_UPDATE_LAST_SUCCESSFUL_SNAPSHOT_ID:
                case META_PAGE_UPDATE_LAST_SUCCESSFUL_FULL_SNAPSHOT_ID:
                case META_PAGE_UPDATE_LAST_ALLOCATED_INDEX:
                    PageDeltaRecord pageDelta = (PageDeltaRecord) rec;
                    stripedApplyPage((pageMem) -> {
                        try {
                            applyPageDelta(pageMem, pageDelta, false);
                        } catch (IgniteCheckedException e) {
                            U.error(log, "Failed to apply page delta, " + pageDelta);
                            applyError.compareAndSet(null, e);
                        }
                    }, pageDelta.groupId(), partId(pageDelta.pageId()), exec, semaphore);
                    break;
                case MASTER_KEY_CHANGE_RECORD_V2:
                    cctx.kernalContext().encryption().applyKeys((MasterKeyChangeRecordV2) rec);
                    break;
                case REENCRYPTION_START_RECORD:
                    cctx.kernalContext().encryption().applyReencryptionStartRecord((ReencryptionStartRecord) rec);
                    break;
                case INDEX_ROOT_PAGE_RENAME_RECORD:
                    IndexRenameRootPageRecord record = (IndexRenameRootPageRecord) rec;
                    int cacheId = record.cacheId();
                    GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
                    if (cacheCtx != null) {
                        IgniteCacheOffheapManager offheap = cacheCtx.offheap();
                        for (int i = 0; i < record.segments(); i++) offheap.renameRootPageForIndex(cacheId, record.oldTreeName(), record.newTreeName(), i);
                    }
                    break;
                case PARTITION_CLEARING_START_RECORD:
                    PartitionClearingStartRecord rec0 = (PartitionClearingStartRecord) rec;
                    CacheGroupContext grp = this.ctx.cache().cacheGroup(rec0.groupId());
                    if (grp != null) {
                        GridDhtLocalPartition part;
                        try {
                            part = grp.topology().forceCreatePartition(rec0.partitionId());
                        } catch (IgniteCheckedException e) {
                            throw new IgniteException("Cannot get or create a partition [groupId=" + rec0.groupId() + ", partitionId=" + rec0.partitionId() + "]", e);
                        }
                        stripedApply(() -> {
                            try {
                                part.updateClearVersion(rec0.clearVersion());
                                IgniteInternalFuture<?> clearFut = grp.shared().evict().evictPartitionAsync(grp, part, new GridFutureAdapter<>());
                                clearFut.get();
                                part.updateClearVersion();
                            } catch (IgniteCheckedException e) {
                                U.error(log, "Failed to apply partition clearing record, " + rec0);
                                applyError.compareAndSet(null, e);
                            }
                        }, rec0.groupId(), rec0.partitionId(), exec, semaphore);
                    }
                    break;
                default:
            }
        }
    } finally {
        it.close();
        if (!restoreMeta)
            cctx.kernalContext().query().skipFieldLookup(false);
    }
    awaitApplyComplete(exec, applyError);
    if (log.isInfoEnabled())
        log.info("Finished applying WAL changes [updatesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + " ms]");
    for (DatabaseLifecycleListener lsnr : getDatabaseListeners(cctx.kernalContext())) lsnr.afterLogicalUpdatesApplied(this, restoreLogicalState);
    return restoreLogicalState;
}
Also used : WALRecord(org.apache.ignite.internal.pagemem.wal.record.WALRecord) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) PageDeltaRecord(org.apache.ignite.internal.pagemem.wal.record.delta.PageDeltaRecord) Semaphore(java.util.concurrent.Semaphore) MvccTxRecord(org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord) DataEntry(org.apache.ignite.internal.pagemem.wal.record.DataEntry) MvccDataEntry(org.apache.ignite.internal.pagemem.wal.record.MvccDataEntry) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) WALIterator(org.apache.ignite.internal.pagemem.wal.WALIterator) IgniteException(org.apache.ignite.IgniteException) GridDhtLocalPartition(org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition) DataRecord(org.apache.ignite.internal.pagemem.wal.record.DataRecord) MetastoreDataRecord(org.apache.ignite.internal.pagemem.wal.record.MetastoreDataRecord) GroupPartitionId(org.apache.ignite.internal.processors.cache.persistence.partstate.GroupPartitionId) GridCacheContext(org.apache.ignite.internal.processors.cache.GridCacheContext) PartitionMetaStateRecord(org.apache.ignite.internal.pagemem.wal.record.delta.PartitionMetaStateRecord) DynamicCacheDescriptor(org.apache.ignite.internal.processors.cache.DynamicCacheDescriptor) AtomicReference(java.util.concurrent.atomic.AtomicReference) CheckpointRecord(org.apache.ignite.internal.pagemem.wal.record.CheckpointRecord) IgniteTxManager(org.apache.ignite.internal.processors.cache.transactions.IgniteTxManager) CacheState(org.apache.ignite.internal.pagemem.wal.record.CacheState) TxRecord(org.apache.ignite.internal.pagemem.wal.record.TxRecord) MvccTxRecord(org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) IgniteSystemProperties.getInteger(org.apache.ignite.IgniteSystemProperties.getInteger) AtomicLong(java.util.concurrent.atomic.AtomicLong) PartitionClearingStartRecord(org.apache.ignite.internal.pagemem.wal.record.PartitionClearingStartRecord) IgniteCacheOffheapManager(org.apache.ignite.internal.processors.cache.IgniteCacheOffheapManager) StripedExecutor(org.apache.ignite.internal.util.StripedExecutor) MetastoreDataRecord(org.apache.ignite.internal.pagemem.wal.record.MetastoreDataRecord) RollbackRecord(org.apache.ignite.internal.pagemem.wal.record.RollbackRecord) CacheGroupContext(org.apache.ignite.internal.processors.cache.CacheGroupContext) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) IndexRenameRootPageRecord(org.apache.ignite.internal.pagemem.wal.record.IndexRenameRootPageRecord)

Example 3 with MvccTxRecord

use of org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord in project ignite by apache.

the class GridCacheDatabaseSharedManager method applyUpdatesOnRecovery.

/**
 * Apply update from some iterator and with specific filters.
 *
 * @param it WalIterator.
 * @param recPredicate Wal record filter.
 * @param entryPredicate Entry filter.
 */
public void applyUpdatesOnRecovery(@Nullable WALIterator it, IgniteBiPredicate<WALPointer, WALRecord> recPredicate, IgnitePredicate<DataEntry> entryPredicate) throws IgniteCheckedException {
    if (it == null)
        return;
    cctx.walState().runWithOutWAL(() -> {
        while (it.hasNext()) {
            IgniteBiTuple<WALPointer, WALRecord> next = it.next();
            WALRecord rec = next.get2();
            if (!recPredicate.apply(next.get1(), rec))
                break;
            switch(rec.type()) {
                case MVCC_DATA_RECORD:
                case DATA_RECORD:
                case DATA_RECORD_V2:
                    checkpointReadLock();
                    try {
                        DataRecord dataRec = (DataRecord) rec;
                        int entryCnt = dataRec.entryCount();
                        for (int i = 0; i < entryCnt; i++) {
                            DataEntry dataEntry = dataRec.get(i);
                            if (entryPredicate.apply(dataEntry)) {
                                checkpointReadLock();
                                try {
                                    int cacheId = dataEntry.cacheId();
                                    GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
                                    if (cacheCtx != null)
                                        applyUpdate(cacheCtx, dataEntry);
                                    else if (log != null)
                                        log.warning("Cache is not started. Updates cannot be applied " + "[cacheId=" + cacheId + ']');
                                } finally {
                                    checkpointReadUnlock();
                                }
                            }
                        }
                    } catch (IgniteCheckedException e) {
                        throw new IgniteException(e);
                    } finally {
                        checkpointReadUnlock();
                    }
                    break;
                case MVCC_TX_RECORD:
                    checkpointReadLock();
                    try {
                        MvccTxRecord txRecord = (MvccTxRecord) rec;
                        byte txState = convertToTxState(txRecord.state());
                        cctx.coordinators().updateState(txRecord.mvccVersion(), txState, true);
                    } finally {
                        checkpointReadUnlock();
                    }
                    break;
                default:
            }
        }
    });
}
Also used : WALRecord(org.apache.ignite.internal.pagemem.wal.record.WALRecord) DataEntry(org.apache.ignite.internal.pagemem.wal.record.DataEntry) MvccDataEntry(org.apache.ignite.internal.pagemem.wal.record.MvccDataEntry) GridCacheContext(org.apache.ignite.internal.processors.cache.GridCacheContext) IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteException(org.apache.ignite.IgniteException) DataRecord(org.apache.ignite.internal.pagemem.wal.record.DataRecord) MetastoreDataRecord(org.apache.ignite.internal.pagemem.wal.record.MetastoreDataRecord) WALPointer(org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer) MvccTxRecord(org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord)

Example 4 with MvccTxRecord

use of org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord in project ignite by apache.

the class IgniteTxManager method logTxRecord.

/**
 * Logs Tx state to WAL if needed.
 *
 * @param tx Transaction.
 * @return WALPointer or {@code null} if nothing was logged.
 */
@Nullable
WALPointer logTxRecord(IgniteTxAdapter tx) {
    BaselineTopology baselineTop;
    // Log tx state change to WAL.
    if (cctx.wal() == null || (baselineTop = cctx.kernalContext().state().clusterState().baselineTopology()) == null || !baselineTop.consistentIds().contains(cctx.localNode().consistentId()))
        return null;
    Map<Short, Collection<Short>> nodes = tx.consistentIdMapper.mapToCompactIds(tx.topVer, tx.txNodes, baselineTop);
    TxRecord record;
    if (tx.txState().mvccEnabled())
        record = new MvccTxRecord(tx.state(), tx.nearXidVersion(), tx.writeVersion(), nodes, tx.mvccSnapshot());
    else
        record = new TxRecord(tx.state(), tx.nearXidVersion(), tx.writeVersion(), nodes);
    try {
        return cctx.wal().log(record);
    } catch (IgniteCheckedException e) {
        U.error(log, "Failed to log TxRecord: " + record, e);
        throw new IgniteException("Failed to log TxRecord: " + record, e);
    }
}
Also used : IgniteCheckedException(org.apache.ignite.IgniteCheckedException) IgniteException(org.apache.ignite.IgniteException) BaselineTopology(org.apache.ignite.internal.processors.cluster.BaselineTopology) Collection(java.util.Collection) TxRecord(org.apache.ignite.internal.pagemem.wal.record.TxRecord) MvccTxRecord(org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord) MvccTxRecord(org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord) Nullable(org.jetbrains.annotations.Nullable)

Aggregations

MvccTxRecord (org.apache.ignite.internal.pagemem.wal.record.MvccTxRecord)4 IgniteCheckedException (org.apache.ignite.IgniteCheckedException)3 IgniteException (org.apache.ignite.IgniteException)3 Collection (java.util.Collection)2 DataEntry (org.apache.ignite.internal.pagemem.wal.record.DataEntry)2 DataRecord (org.apache.ignite.internal.pagemem.wal.record.DataRecord)2 MetastoreDataRecord (org.apache.ignite.internal.pagemem.wal.record.MetastoreDataRecord)2 MvccDataEntry (org.apache.ignite.internal.pagemem.wal.record.MvccDataEntry)2 TxRecord (org.apache.ignite.internal.pagemem.wal.record.TxRecord)2 WALRecord (org.apache.ignite.internal.pagemem.wal.record.WALRecord)2 GridCacheContext (org.apache.ignite.internal.processors.cache.GridCacheContext)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 Semaphore (java.util.concurrent.Semaphore)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1 IgniteSystemProperties.getInteger (org.apache.ignite.IgniteSystemProperties.getInteger)1