use of org.apache.ignite.internal.pagemem.wal.record.RollbackRecord in project ignite by apache.
the class RecordDataV2Serializer method writePlainRecord.
/**
* {@inheritDoc}
*/
@Override
protected void writePlainRecord(WALRecord rec, ByteBuffer buf) throws IgniteCheckedException {
if (rec instanceof HeaderRecord)
throw new UnsupportedOperationException("Writing header records is forbidden since version 2 of serializer");
switch(rec.type()) {
case CHECKPOINT_RECORD:
CheckpointRecord cpRec = (CheckpointRecord) rec;
WALPointer walPtr = cpRec.checkpointMark();
UUID cpId = cpRec.checkpointId();
buf.putLong(cpId.getMostSignificantBits());
buf.putLong(cpId.getLeastSignificantBits());
buf.put(walPtr == null ? (byte) 0 : 1);
if (walPtr != null) {
buf.putLong(walPtr.index());
buf.putInt(walPtr.fileOffset());
buf.putInt(walPtr.length());
}
putCacheStates(buf, cpRec.cacheGroupStates());
buf.put(cpRec.end() ? (byte) 1 : 0);
break;
case MVCC_DATA_RECORD:
case DATA_RECORD_V2:
DataRecord dataRec = (DataRecord) rec;
int entryCnt = dataRec.entryCount();
buf.putInt(entryCnt);
buf.putLong(dataRec.timestamp());
boolean encrypted = isDataRecordEncrypted(dataRec);
for (int i = 0; i < entryCnt; i++) {
DataEntry dataEntry = dataRec.get(i);
if (encrypted)
putEncryptedDataEntry(buf, dataEntry);
else
putPlainDataEntry(buf, dataEntry);
}
break;
case SNAPSHOT:
SnapshotRecord snpRec = (SnapshotRecord) rec;
buf.putLong(snpRec.getSnapshotId());
buf.put(snpRec.isFull() ? (byte) 1 : 0);
break;
case EXCHANGE:
ExchangeRecord r = (ExchangeRecord) rec;
buf.putInt(r.getType().ordinal());
buf.putShort(r.getConstId());
buf.putLong(r.timestamp());
break;
case TX_RECORD:
txRecordSerializer.write((TxRecord) rec, buf);
break;
case MVCC_TX_RECORD:
txRecordSerializer.write((MvccTxRecord) rec, buf);
break;
case ROLLBACK_TX_RECORD:
RollbackRecord rb = (RollbackRecord) rec;
buf.putInt(rb.groupId());
buf.putInt(rb.partitionId());
buf.putLong(rb.start());
buf.putLong(rb.range());
break;
case TRACKING_PAGE_REPAIR_DELTA:
TrackingPageRepairDeltaRecord tprDelta = (TrackingPageRepairDeltaRecord) rec;
buf.putInt(tprDelta.groupId());
buf.putLong(tprDelta.pageId());
break;
default:
super.writePlainRecord(rec, buf);
}
}
use of org.apache.ignite.internal.pagemem.wal.record.RollbackRecord in project ignite by apache.
the class GridCacheDatabaseSharedManager method applyLogicalUpdates.
/**
* @param status Last registered checkpoint status.
* @param restoreMeta Metastore restore phase if {@code true}.
* @throws IgniteCheckedException If failed to apply updates.
* @throws StorageException If IO exception occurred while reading write-ahead log.
*/
private RestoreLogicalState applyLogicalUpdates(CheckpointStatus status, IgnitePredicate<Integer> cacheGroupsPredicate, IgniteBiPredicate<WALRecord.RecordType, WALPointer> recordTypePredicate, boolean restoreMeta) throws IgniteCheckedException {
if (log.isInfoEnabled())
log.info("Applying lost " + (restoreMeta ? "metastore" : "cache") + " updates since last checkpoint record [lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
if (!restoreMeta)
cctx.kernalContext().query().skipFieldLookup(true);
long start = U.currentTimeMillis();
AtomicReference<Throwable> applyError = new AtomicReference<>();
AtomicLong applied = new AtomicLong();
long lastArchivedSegment = cctx.wal().lastArchivedSegment();
StripedExecutor exec = cctx.kernalContext().pools().getStripedExecutorService();
Semaphore semaphore = new Semaphore(semaphorePertmits(exec));
Map<GroupPartitionId, Integer> partitionRecoveryStates = new HashMap<>();
WALIterator it = cctx.wal().replay(status.startPtr, recordTypePredicate);
RestoreLogicalState restoreLogicalState = new RestoreLogicalState(status, it, lastArchivedSegment, cacheGroupsPredicate, partitionRecoveryStates);
final IgniteTxManager txManager = cctx.tm();
try {
while (restoreLogicalState.hasNext()) {
WALRecord rec = restoreLogicalState.next();
if (rec == null)
break;
switch(rec.type()) {
case TX_RECORD:
if (restoreMeta) {
// Also restore tx states.
TxRecord txRec = (TxRecord) rec;
txManager.collectTxStates(txRec);
}
break;
case // Calculate initial partition states
CHECKPOINT_RECORD:
CheckpointRecord cpRec = (CheckpointRecord) rec;
for (Map.Entry<Integer, CacheState> entry : cpRec.cacheGroupStates().entrySet()) {
CacheState cacheState = entry.getValue();
for (int i = 0; i < cacheState.size(); i++) {
int partId = cacheState.partitionByIndex(i);
byte state = cacheState.stateByIndex(i);
// Ignore undefined state.
if (state != -1) {
partitionRecoveryStates.put(new GroupPartitionId(entry.getKey(), partId), (int) state);
}
}
}
break;
case ROLLBACK_TX_RECORD:
RollbackRecord rbRec = (RollbackRecord) rec;
CacheGroupContext ctx = cctx.cache().cacheGroup(rbRec.groupId());
if (ctx != null && !ctx.isLocal()) {
GridDhtLocalPartition part = ctx.topology().forceCreatePartition(rbRec.partitionId());
ctx.offheap().dataStore(part).updateInitialCounter(rbRec.start(), rbRec.range());
}
break;
case MVCC_DATA_RECORD:
case DATA_RECORD:
case DATA_RECORD_V2:
case ENCRYPTED_DATA_RECORD:
case ENCRYPTED_DATA_RECORD_V2:
case ENCRYPTED_DATA_RECORD_V3:
DataRecord dataRec = (DataRecord) rec;
int entryCnt = dataRec.entryCount();
for (int i = 0; i < entryCnt; i++) {
DataEntry dataEntry = dataRec.get(i);
if (!restoreMeta && txManager.uncommitedTx(dataEntry))
continue;
int cacheId = dataEntry.cacheId();
DynamicCacheDescriptor cacheDesc = cctx.cache().cacheDescriptor(cacheId);
// Can empty in case recovery node on blt changed.
if (cacheDesc == null)
continue;
stripedApply(() -> {
GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
if (skipRemovedIndexUpdates(cacheCtx.groupId(), PageIdAllocator.INDEX_PARTITION))
cctx.kernalContext().query().markAsRebuildNeeded(cacheCtx, true);
try {
applyUpdate(cacheCtx, dataEntry);
} catch (IgniteCheckedException e) {
U.error(log, "Failed to apply data entry, dataEntry=" + dataEntry + ", ptr=" + dataRec.position());
applyError.compareAndSet(null, e);
}
applied.incrementAndGet();
}, cacheDesc.groupId(), dataEntry.partitionId(), exec, semaphore);
}
break;
case MVCC_TX_RECORD:
MvccTxRecord txRecord = (MvccTxRecord) rec;
byte txState = convertToTxState(txRecord.state());
cctx.coordinators().updateState(txRecord.mvccVersion(), txState, true);
break;
case PART_META_UPDATE_STATE:
PartitionMetaStateRecord metaStateRecord = (PartitionMetaStateRecord) rec;
GroupPartitionId groupPartitionId = new GroupPartitionId(metaStateRecord.groupId(), metaStateRecord.partitionId());
restoreLogicalState.partitionRecoveryStates.put(groupPartitionId, (int) metaStateRecord.state());
break;
case METASTORE_DATA_RECORD:
MetastoreDataRecord metastoreDataRecord = (MetastoreDataRecord) rec;
metaStorage.applyUpdate(metastoreDataRecord.key(), metastoreDataRecord.value());
break;
case META_PAGE_UPDATE_NEXT_SNAPSHOT_ID:
case META_PAGE_UPDATE_LAST_SUCCESSFUL_SNAPSHOT_ID:
case META_PAGE_UPDATE_LAST_SUCCESSFUL_FULL_SNAPSHOT_ID:
case META_PAGE_UPDATE_LAST_ALLOCATED_INDEX:
PageDeltaRecord pageDelta = (PageDeltaRecord) rec;
stripedApplyPage((pageMem) -> {
try {
applyPageDelta(pageMem, pageDelta, false);
} catch (IgniteCheckedException e) {
U.error(log, "Failed to apply page delta, " + pageDelta);
applyError.compareAndSet(null, e);
}
}, pageDelta.groupId(), partId(pageDelta.pageId()), exec, semaphore);
break;
case MASTER_KEY_CHANGE_RECORD_V2:
cctx.kernalContext().encryption().applyKeys((MasterKeyChangeRecordV2) rec);
break;
case REENCRYPTION_START_RECORD:
cctx.kernalContext().encryption().applyReencryptionStartRecord((ReencryptionStartRecord) rec);
break;
case INDEX_ROOT_PAGE_RENAME_RECORD:
IndexRenameRootPageRecord record = (IndexRenameRootPageRecord) rec;
int cacheId = record.cacheId();
GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
if (cacheCtx != null) {
IgniteCacheOffheapManager offheap = cacheCtx.offheap();
for (int i = 0; i < record.segments(); i++) offheap.renameRootPageForIndex(cacheId, record.oldTreeName(), record.newTreeName(), i);
}
break;
case PARTITION_CLEARING_START_RECORD:
PartitionClearingStartRecord rec0 = (PartitionClearingStartRecord) rec;
CacheGroupContext grp = this.ctx.cache().cacheGroup(rec0.groupId());
if (grp != null) {
GridDhtLocalPartition part;
try {
part = grp.topology().forceCreatePartition(rec0.partitionId());
} catch (IgniteCheckedException e) {
throw new IgniteException("Cannot get or create a partition [groupId=" + rec0.groupId() + ", partitionId=" + rec0.partitionId() + "]", e);
}
stripedApply(() -> {
try {
part.updateClearVersion(rec0.clearVersion());
IgniteInternalFuture<?> clearFut = grp.shared().evict().evictPartitionAsync(grp, part, new GridFutureAdapter<>());
clearFut.get();
part.updateClearVersion();
} catch (IgniteCheckedException e) {
U.error(log, "Failed to apply partition clearing record, " + rec0);
applyError.compareAndSet(null, e);
}
}, rec0.groupId(), rec0.partitionId(), exec, semaphore);
}
break;
default:
}
}
} finally {
it.close();
if (!restoreMeta)
cctx.kernalContext().query().skipFieldLookup(false);
}
awaitApplyComplete(exec, applyError);
if (log.isInfoEnabled())
log.info("Finished applying WAL changes [updatesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + " ms]");
for (DatabaseLifecycleListener lsnr : getDatabaseListeners(cctx.kernalContext())) lsnr.afterLogicalUpdatesApplied(this, restoreLogicalState);
return restoreLogicalState;
}
use of org.apache.ignite.internal.pagemem.wal.record.RollbackRecord in project ignite by apache.
the class IgniteTxHandler method applyPartitionsUpdatesCounters.
/**
* Applies partition counter updates for transactions.
* <p>
* Called after entries are written to WAL on commit or during rollback to close gaps in update counter sequence.
* <p>
* On rollback counters should be applied on the primary only after backup nodes, otherwise if the primary fail
* before sending rollback requests to backups remote transactions can be committed by recovery protocol and
* partition consistency will not be restored when primary returns to the grid because RollbackRecord was written
* (actual for persistent mode only).
*
* @param counters Counter values to be updated.
* @param rollback {@code True} if applied during rollbacks.
* @param rollbackOnPrimary {@code True} if rollback happens on primary node. Passed to CQ engine.
*/
public void applyPartitionsUpdatesCounters(Iterable<PartitionUpdateCountersMessage> counters, boolean rollback, boolean rollbackOnPrimary) throws IgniteCheckedException {
if (counters == null)
return;
WALPointer ptr = null;
try {
for (PartitionUpdateCountersMessage counter : counters) {
GridCacheContext ctx0 = ctx.cacheContext(counter.cacheId());
GridDhtPartitionTopology top = ctx0.topology();
AffinityTopologyVersion topVer = top.readyTopologyVersion();
assert top != null;
for (int i = 0; i < counter.size(); i++) {
boolean invalid = false;
try {
GridDhtLocalPartition part = top.localPartition(counter.partition(i));
if (part != null && part.reserve()) {
try {
if (part.state() != RENTING) {
// Check is actual only for backup node.
long start = counter.initialCounter(i);
long delta = counter.updatesCount(i);
boolean updated = part.updateCounter(start, delta);
// Need to log rolled back range for logical recovery.
if (updated && rollback) {
CacheGroupContext grpCtx = part.group();
if (grpCtx.persistenceEnabled() && grpCtx.walEnabled() && !grpCtx.mvccEnabled()) {
RollbackRecord rec = new RollbackRecord(grpCtx.groupId(), part.id(), start, delta);
ptr = ctx.wal().log(rec);
}
for (int cntr = 1; cntr <= delta; cntr++) {
ctx0.continuousQueries().skipUpdateCounter(null, part.id(), start + cntr, topVer, rollbackOnPrimary);
}
}
} else
invalid = true;
} finally {
part.release();
}
} else
invalid = true;
} catch (GridDhtInvalidPartitionException e) {
invalid = true;
}
if (log.isDebugEnabled() && invalid) {
log.debug("Received partition update counters message for invalid partition, ignoring: " + "[cacheId=" + counter.cacheId() + ", part=" + counter.partition(i) + ']');
}
}
}
} finally {
if (ptr != null)
ctx.wal().flush(ptr, false);
}
}
use of org.apache.ignite.internal.pagemem.wal.record.RollbackRecord in project ignite by apache.
the class WalRecoveryTxLogicalRecordsTest method testRollbackRecordOverlap.
/**
* Simple test for rollback record overlap count.
*/
@Test
public void testRollbackRecordOverlap() {
RollbackRecord r0 = new RollbackRecord(0, 0, 1, 1);
RollbackRecord r1 = new RollbackRecord(0, 0, 1, 4);
assertEquals(0, r0.overlap(0, 1));
assertEquals(1, r0.overlap(1, 2));
assertEquals(1, r0.overlap(0, 2));
assertEquals(0, r0.overlap(2, 3));
assertEquals(1, r0.overlap(1, 2));
assertEquals(0, r1.overlap(5, 6));
assertEquals(1, r1.overlap(4, 6));
assertEquals(0, r1.overlap(0, 1));
assertEquals(1, r1.overlap(2, 3));
assertEquals(2, r1.overlap(2, 4));
assertEquals(3, r1.overlap(2, 7));
assertEquals(1, r1.overlap(0, 2));
assertEquals(2, r1.overlap(0, 3));
assertEquals(3, r1.overlap(0, 4));
assertEquals(4, r1.overlap(0, 5));
assertEquals(4, r1.overlap(1, 5));
}
use of org.apache.ignite.internal.pagemem.wal.record.RollbackRecord in project ignite by apache.
the class GridDhtPartitionTopologyImpl method finalizeUpdateCounters.
/**
* {@inheritDoc}
*/
@Override
public void finalizeUpdateCounters(Set<Integer> parts) {
// It is need to acquire checkpoint lock before topology lock acquiring.
ctx.database().checkpointReadLock();
try {
WALPointer ptr = null;
lock.readLock().lock();
try {
for (int p : parts) {
GridDhtLocalPartition part = locParts.get(p);
if (part != null && part.state().active()) {
// We need to close all gaps in partition update counters sequence. We assume this finalizing is
// happened on exchange and hence all txs are completed. Therefore each gap in update counters
// sequence is a result of undelivered DhtTxFinishMessage on backup (sequences on primary nodes
// do not have gaps). Here we close these gaps and asynchronously notify continuous query engine
// about the skipped events.
AffinityTopologyVersion topVer = ctx.exchange().readyAffinityVersion();
GridLongList gaps = part.finalizeUpdateCounters();
if (gaps != null) {
for (int j = 0; j < gaps.size() / 2; j++) {
long gapStart = gaps.get(j * 2);
long gapStop = gaps.get(j * 2 + 1);
if (part.group().persistenceEnabled() && part.group().walEnabled() && !part.group().mvccEnabled()) {
// Rollback record tracks applied out-of-order updates while finalizeUpdateCounters
// return gaps (missing updates). The code below transforms gaps to updates.
RollbackRecord rec = new RollbackRecord(part.group().groupId(), part.id(), gapStart - 1, gapStop - gapStart + 1);
try {
ptr = ctx.wal().log(rec);
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
}
}
}
for (GridCacheContext ctx0 : grp.caches()) ctx0.continuousQueries().closeBackupUpdateCountersGaps(ctx0, part.id(), topVer, gaps);
}
}
}
} finally {
try {
if (ptr != null)
ctx.wal().flush(ptr, false);
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
} finally {
lock.readLock().unlock();
}
}
} finally {
ctx.database().checkpointReadUnlock();
}
}
Aggregations