use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class GridDhtPartitionTopologyImpl method finalizeUpdateCounters.
/**
* {@inheritDoc}
*/
@Override
public void finalizeUpdateCounters(Set<Integer> parts) {
// It is need to acquire checkpoint lock before topology lock acquiring.
ctx.database().checkpointReadLock();
try {
WALPointer ptr = null;
lock.readLock().lock();
try {
for (int p : parts) {
GridDhtLocalPartition part = locParts.get(p);
if (part != null && part.state().active()) {
// We need to close all gaps in partition update counters sequence. We assume this finalizing is
// happened on exchange and hence all txs are completed. Therefore each gap in update counters
// sequence is a result of undelivered DhtTxFinishMessage on backup (sequences on primary nodes
// do not have gaps). Here we close these gaps and asynchronously notify continuous query engine
// about the skipped events.
AffinityTopologyVersion topVer = ctx.exchange().readyAffinityVersion();
GridLongList gaps = part.finalizeUpdateCounters();
if (gaps != null) {
for (int j = 0; j < gaps.size() / 2; j++) {
long gapStart = gaps.get(j * 2);
long gapStop = gaps.get(j * 2 + 1);
if (part.group().persistenceEnabled() && part.group().walEnabled() && !part.group().mvccEnabled()) {
// Rollback record tracks applied out-of-order updates while finalizeUpdateCounters
// return gaps (missing updates). The code below transforms gaps to updates.
RollbackRecord rec = new RollbackRecord(part.group().groupId(), part.id(), gapStart - 1, gapStop - gapStart + 1);
try {
ptr = ctx.wal().log(rec);
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
}
}
}
for (GridCacheContext ctx0 : grp.caches()) ctx0.continuousQueries().closeBackupUpdateCountersGaps(ctx0, part.id(), topVer, gaps);
}
}
}
} finally {
try {
if (ptr != null)
ctx.wal().flush(ptr, false);
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
} finally {
lock.readLock().unlock();
}
}
} finally {
ctx.database().checkpointReadUnlock();
}
}
use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class RecordDataV2Serializer method readPlainRecord.
/**
* {@inheritDoc}
*/
@Override
WALRecord readPlainRecord(RecordType type, ByteBufferBackedDataInput in, boolean encrypted, int recordSize) throws IOException, IgniteCheckedException {
switch(type) {
case PAGE_RECORD:
int cacheId = in.readInt();
long pageId = in.readLong();
byte[] arr = new byte[recordSize - 4 - /* cacheId */
8];
in.readFully(arr);
return new PageSnapshot(new FullPageId(pageId, cacheId), arr, encrypted ? realPageSize : pageSize);
case CHECKPOINT_RECORD:
long msb = in.readLong();
long lsb = in.readLong();
boolean hasPtr = in.readByte() != 0;
long idx0 = hasPtr ? in.readLong() : 0;
int off = hasPtr ? in.readInt() : 0;
int len = hasPtr ? in.readInt() : 0;
Map<Integer, CacheState> states = readPartitionStates(in);
boolean end = in.readByte() != 0;
WALPointer walPtr = hasPtr ? new WALPointer(idx0, off, len) : null;
CheckpointRecord cpRec = new CheckpointRecord(new UUID(msb, lsb), walPtr, end);
cpRec.cacheGroupStates(states);
return cpRec;
case DATA_RECORD:
case DATA_RECORD_V2:
int entryCnt = in.readInt();
long timeStamp = in.readLong();
if (entryCnt == 1)
return new DataRecord(readPlainDataEntry(in, type), timeStamp);
else {
List<DataEntry> entries = new ArrayList<>(entryCnt);
for (int i = 0; i < entryCnt; i++) entries.add(readPlainDataEntry(in, type));
return new DataRecord(entries, timeStamp);
}
case MVCC_DATA_RECORD:
entryCnt = in.readInt();
timeStamp = in.readLong();
List<DataEntry> entries = new ArrayList<>(entryCnt);
for (int i = 0; i < entryCnt; i++) entries.add(readMvccDataEntry(in));
return new MvccDataRecord(entries, timeStamp);
case ENCRYPTED_DATA_RECORD:
case ENCRYPTED_DATA_RECORD_V2:
case ENCRYPTED_DATA_RECORD_V3:
entryCnt = in.readInt();
timeStamp = in.readLong();
if (entryCnt == 1)
return new DataRecord(readEncryptedDataEntry(in, type), timeStamp);
else {
entries = new ArrayList<>(entryCnt);
for (int i = 0; i < entryCnt; i++) entries.add(readEncryptedDataEntry(in, type));
return new DataRecord(entries, timeStamp);
}
case SNAPSHOT:
long snpId = in.readLong();
byte full = in.readByte();
return new SnapshotRecord(snpId, full == 1);
case EXCHANGE:
int idx = in.readInt();
short constId = in.readShort();
long ts = in.readLong();
return new ExchangeRecord(constId, ExchangeRecord.Type.values()[idx], ts);
case TX_RECORD:
return txRecordSerializer.readTx(in);
case MVCC_TX_RECORD:
return txRecordSerializer.readMvccTx(in);
case ROLLBACK_TX_RECORD:
int grpId = in.readInt();
int partId = in.readInt();
long start = in.readLong();
long range = in.readLong();
return new RollbackRecord(grpId, partId, start, range);
case TRACKING_PAGE_REPAIR_DELTA:
cacheId = in.readInt();
pageId = in.readLong();
return new TrackingPageRepairDeltaRecord(cacheId, pageId);
default:
return super.readPlainRecord(type, in, encrypted, recordSize);
}
}
use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class StandaloneWalRecordsIterator method advanceSegment.
/**
* {@inheritDoc}
*/
@Override
protected AbstractReadFileHandle advanceSegment(@Nullable final AbstractReadFileHandle curWalSegment) throws IgniteCheckedException {
if (curWalSegment != null)
curWalSegment.close();
FileDescriptor fd;
do {
curWalSegmIdx++;
curIdx++;
if (curIdx >= walFileDescriptors.size())
return null;
fd = walFileDescriptors.get(curIdx);
} while (!checkBounds(fd.idx()));
if (log.isDebugEnabled())
log.debug("Reading next file [absIdx=" + curWalSegmIdx + ", file=" + fd.file().getAbsolutePath() + ']');
assert fd != null;
curRec = null;
try {
WALPointer initPtr = null;
if (lowBound.index() == fd.idx())
initPtr = lowBound;
return initReadHandle(fd, initPtr);
} catch (FileNotFoundException e) {
if (log.isInfoEnabled())
log.info("Missing WAL segment in the archive: " + e.getMessage());
return null;
}
}
use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class CdcMain method consumeSegment.
/**
* Reads all available records from segment.
*/
private void consumeSegment(Path segment) {
if (log.isInfoEnabled())
log.info("Processing WAL segment [segment=" + segment + ']');
lastSegmentConsumptionTs.value(System.currentTimeMillis());
IgniteWalIteratorFactory.IteratorParametersBuilder builder = new IgniteWalIteratorFactory.IteratorParametersBuilder().log(log).binaryMetadataFileStoreDir(binaryMeta).marshallerMappingFileStoreDir(marshaller).keepBinary(cdcCfg.isKeepBinary()).filesOrDirs(segment.toFile()).addFilter((type, ptr) -> type == DATA_RECORD_V2);
if (igniteCfg.getDataStorageConfiguration().getPageSize() != 0)
builder.pageSize(igniteCfg.getDataStorageConfiguration().getPageSize());
long segmentIdx = segmentIndex(segment);
curSegmentIdx.value(segmentIdx);
if (initState != null) {
if (segmentIdx > initState.index()) {
throw new IgniteException("Found segment greater then saved state. Some events are missed. Exiting! " + "[state=" + initState + ", segment=" + segmentIdx + ']');
}
if (segmentIdx < initState.index()) {
if (log.isInfoEnabled()) {
log.info("Already processed segment found. Skipping and deleting the file [segment=" + segmentIdx + ", state=" + initState.index() + ']');
}
// So, we can safely delete it after processing.
try {
Files.delete(segment);
return;
} catch (IOException e) {
throw new IgniteException(e);
}
}
builder.from(initState);
initState = null;
}
try (WALIterator it = factory.iterator(builder)) {
boolean interrupted = Thread.interrupted();
while (it.hasNext() && !interrupted) {
Iterator<DataRecord> iter = F.iterator(it.iterator(), t -> (DataRecord) t.get2(), true);
boolean commit = consumer.onRecords(iter);
if (commit) {
assert it.lastRead().isPresent();
WALPointer ptr = it.lastRead().get().next();
if (log.isDebugEnabled())
log.debug("Saving state [ptr=" + ptr + ']');
state.save(ptr);
committedSegmentIdx.value(ptr.index());
committedSegmentOffset.value(ptr.fileOffset());
// Can delete after new file state save.
if (!processedSegments.isEmpty()) {
// So we can safely delete it after success processing.
for (Path processedSegment : processedSegments) {
// Can't delete current segment, because state points to it.
if (processedSegment.equals(segment))
continue;
Files.delete(processedSegment);
}
processedSegments.clear();
}
}
interrupted = Thread.interrupted();
}
if (interrupted)
throw new IgniteException("Change Data Capture Application interrupted");
processedSegments.add(segment);
} catch (IgniteCheckedException | IOException e) {
throw new IgniteException(e);
}
}
use of org.apache.ignite.internal.processors.cache.persistence.wal.WALPointer in project ignite by apache.
the class GridDistributedTxRemoteAdapter method commitIfLocked.
/**
* @throws IgniteCheckedException If commit failed.
*/
private void commitIfLocked() throws IgniteCheckedException {
if (state() == COMMITTING) {
for (IgniteTxEntry txEntry : writeEntries()) {
assert txEntry != null : "Missing transaction entry for tx: " + this;
while (true) {
GridCacheEntryEx entry = txEntry.cached();
assert entry != null : "Missing cached entry for transaction entry: " + txEntry;
try {
GridCacheVersion ver = txEntry.explicitVersion() != null ? txEntry.explicitVersion() : xidVer;
// If locks haven't been acquired yet, keep waiting.
if (!entry.lockedBy(ver)) {
if (log.isDebugEnabled())
log.debug("Transaction does not own lock for entry (will wait) [entry=" + entry + ", tx=" + this + ']');
return;
}
// While.
break;
} catch (GridCacheEntryRemovedException ignore) {
if (log.isDebugEnabled())
log.debug("Got removed entry while committing (will retry): " + txEntry);
try {
txEntry.cached(txEntry.context().cache().entryEx(txEntry.key(), topologyVersion()));
} catch (GridDhtInvalidPartitionException e) {
break;
}
}
}
}
// Only one thread gets to commit.
if (COMMIT_ALLOWED_UPD.compareAndSet(this, 0, 1)) {
IgniteCheckedException err = null;
Map<IgniteTxKey, IgniteTxEntry> writeMap = txState.writeMap();
GridCacheReturnCompletableWrapper wrapper = null;
if (!F.isEmpty(writeMap) || mvccSnapshot != null) {
GridCacheReturn ret = null;
if (!near() && !local() && onePhaseCommit()) {
if (needReturnValue()) {
ret = new GridCacheReturn(null, cctx.localNodeId().equals(otherNodeId()), true, null, null, true);
// Originating node.
UUID origNodeId = otherNodeId();
cctx.tm().addCommittedTxReturn(this, wrapper = new GridCacheReturnCompletableWrapper(!cctx.localNodeId().equals(origNodeId) ? origNodeId : null));
} else
cctx.tm().addCommittedTx(this, this.nearXidVersion(), null);
}
// Register this transaction as completed prior to write-phase to
// ensure proper lock ordering for removed entries.
cctx.tm().addCommittedTx(this);
AffinityTopologyVersion topVer = topologyVersion();
WALPointer ptr = null;
cctx.database().checkpointReadLock();
// Reserved partitions (necessary to prevent race due to updates in RENTING state).
Set<GridDhtLocalPartition> reservedParts = new HashSet<>();
try {
assert !txState.mvccEnabled() || mvccSnapshot != null : "Mvcc is not initialized: " + this;
Collection<IgniteTxEntry> entries = near() || cctx.snapshot().needTxReadLogging() ? allEntries() : writeEntries();
// Data entry to write to WAL and associated with it TxEntry.
List<T2<DataEntry, IgniteTxEntry>> dataEntries = null;
batchStoreCommit(writeMap().values());
// Node that for near transactions we grab all entries.
for (IgniteTxEntry txEntry : entries) {
GridCacheContext cacheCtx = txEntry.context();
// Prevent stale updates.
GridDhtLocalPartition locPart = cacheCtx.group().topology().localPartition(txEntry.cached().partition());
if (!near()) {
if (locPart == null)
continue;
if (!reservedParts.contains(locPart) && locPart.reserve()) {
assert locPart.state() != EVICTED && locPart.reservations() > 0 : locPart;
reservedParts.add(locPart);
}
if (locPart.state() == RENTING || locPart.state() == EVICTED) {
LT.warn(log(), "Skipping update to partition that is concurrently evicting " + "[grp=" + cacheCtx.group().cacheOrGroupName() + ", part=" + locPart + "]");
continue;
}
}
boolean replicate = cacheCtx.isDrEnabled();
while (true) {
try {
GridCacheEntryEx cached = txEntry.cached();
if (cached == null)
txEntry.cached(cached = cacheCtx.cache().entryEx(txEntry.key(), topologyVersion()));
if (near() && cacheCtx.dr().receiveEnabled()) {
cached.markObsolete(xidVer);
break;
}
GridNearCacheEntry nearCached = null;
if (updateNearCache(cacheCtx, txEntry.key(), topVer))
nearCached = cacheCtx.dht().near().peekExx(txEntry.key());
if (!F.isEmpty(txEntry.entryProcessors()))
txEntry.cached().unswap(false);
IgniteBiTuple<GridCacheOperation, CacheObject> res = applyTransformClosures(txEntry, false, ret);
GridCacheOperation op = res.get1();
CacheObject val = res.get2();
GridCacheVersion explicitVer = txEntry.conflictVersion();
if (explicitVer == null)
explicitVer = writeVersion();
if (txEntry.ttl() == CU.TTL_ZERO)
op = DELETE;
boolean conflictNeedResolve = cacheCtx.conflictNeedResolve();
GridCacheVersionConflictContext conflictCtx = null;
if (conflictNeedResolve) {
IgniteBiTuple<GridCacheOperation, GridCacheVersionConflictContext> drRes = conflictResolve(op, txEntry, val, explicitVer, cached);
assert drRes != null;
conflictCtx = drRes.get2();
if (conflictCtx.isUseOld())
op = NOOP;
else if (conflictCtx.isUseNew()) {
txEntry.ttl(conflictCtx.ttl());
txEntry.conflictExpireTime(conflictCtx.expireTime());
} else if (conflictCtx.isMerge()) {
op = drRes.get1();
val = txEntry.context().toCacheObject(conflictCtx.mergeValue());
explicitVer = writeVersion();
txEntry.ttl(conflictCtx.ttl());
txEntry.conflictExpireTime(conflictCtx.expireTime());
}
} else
// Nullify explicit version so that innerSet/innerRemove will work as usual.
explicitVer = null;
GridCacheVersion dhtVer = cached.isNear() ? writeVersion() : null;
if (!near() && cacheCtx.group().persistenceEnabled() && cacheCtx.group().walEnabled() && op != NOOP && op != RELOAD && (op != READ || cctx.snapshot().needTxReadLogging())) {
if (dataEntries == null)
dataEntries = new ArrayList<>(entries.size());
dataEntries.add(new T2<>(new DataEntry(cacheCtx.cacheId(), txEntry.key(), val, op, nearXidVersion(), addConflictVersion(writeVersion(), txEntry.conflictVersion()), 0, txEntry.key().partition(), txEntry.updateCounter(), DataEntry.flags(CU.txOnPrimary(this))), txEntry));
}
if (op == CREATE || op == UPDATE) {
// Invalidate only for near nodes (backups cannot be invalidated).
if (isSystemInvalidate() || (isInvalidate() && cacheCtx.isNear()))
cached.innerRemove(this, eventNodeId(), nodeId, false, true, true, txEntry.keepBinary(), txEntry.hasOldValue(), txEntry.oldValue(), topVer, null, replicate ? DR_BACKUP : DR_NONE, near() ? null : explicitVer, resolveTaskName(), dhtVer, txEntry.updateCounter());
else {
assert val != null : txEntry;
GridCacheUpdateTxResult updRes = cached.innerSet(this, eventNodeId(), nodeId, val, false, false, txEntry.ttl(), true, true, txEntry.keepBinary(), txEntry.hasOldValue(), txEntry.oldValue(), topVer, null, replicate ? DR_BACKUP : DR_NONE, txEntry.conflictExpireTime(), near() ? null : explicitVer, resolveTaskName(), dhtVer, txEntry.updateCounter());
txEntry.updateCounter(updRes.updateCounter());
if (updRes.loggedPointer() != null)
ptr = updRes.loggedPointer();
// Keep near entry up to date.
if (nearCached != null) {
CacheObject val0 = cached.valueBytes();
nearCached.updateOrEvict(xidVer, val0, cached.expireTime(), cached.ttl(), nodeId, topVer);
}
}
} else if (op == DELETE) {
GridCacheUpdateTxResult updRes = cached.innerRemove(this, eventNodeId(), nodeId, false, true, true, txEntry.keepBinary(), txEntry.hasOldValue(), txEntry.oldValue(), topVer, null, replicate ? DR_BACKUP : DR_NONE, near() ? null : explicitVer, resolveTaskName(), dhtVer, txEntry.updateCounter());
txEntry.updateCounter(updRes.updateCounter());
if (updRes.loggedPointer() != null)
ptr = updRes.loggedPointer();
// Keep near entry up to date.
if (nearCached != null)
nearCached.updateOrEvict(xidVer, null, 0, 0, nodeId, topVer);
} else if (op == RELOAD) {
CacheObject reloaded = cached.innerReload();
if (nearCached != null) {
nearCached.innerReload();
nearCached.updateOrEvict(cached.version(), reloaded, cached.expireTime(), cached.ttl(), nodeId, topVer);
}
} else if (op == READ) {
assert near();
if (log.isDebugEnabled())
log.debug("Ignoring READ entry when committing: " + txEntry);
} else // No-op.
{
if (conflictCtx == null || !conflictCtx.isUseOld()) {
if (txEntry.ttl() != CU.TTL_NOT_CHANGED)
cached.updateTtl(null, txEntry.ttl());
if (nearCached != null) {
CacheObject val0 = cached.valueBytes();
nearCached.updateOrEvict(xidVer, val0, cached.expireTime(), cached.ttl(), nodeId, topVer);
}
}
}
// that if we replaced removed entries.
assert txEntry.op() == READ || onePhaseCommit() || // and we simply allow the commit to proceed.
!cached.hasLockCandidateUnsafe(xidVer) || cached.lockedByUnsafe(xidVer) : "Transaction does not own lock for commit [entry=" + cached + ", tx=" + this + ']';
// Break out of while loop.
break;
} catch (GridCacheEntryRemovedException ignored) {
if (log.isDebugEnabled())
log.debug("Attempting to commit a removed entry (will retry): " + txEntry);
// Renew cached entry.
txEntry.cached(cacheCtx.cache().entryEx(txEntry.key(), topologyVersion()));
}
}
}
// Apply cache size deltas.
applyTxSizes();
TxCounters txCntrs = txCounters(false);
// Apply update counters.
if (txCntrs != null)
cctx.tm().txHandler().applyPartitionsUpdatesCounters(txCntrs.updateCounters());
cctx.mvccCaching().onTxFinished(this, true);
if (!near() && !F.isEmpty(dataEntries) && cctx.wal() != null) {
// Set new update counters for data entries received from persisted tx entries.
List<DataEntry> entriesWithCounters = dataEntries.stream().map(tuple -> tuple.get1().partitionCounter(tuple.get2().updateCounter())).collect(Collectors.toList());
ptr = cctx.wal().log(new DataRecord(entriesWithCounters));
}
if (ptr != null)
cctx.wal().flush(ptr, false);
} catch (Throwable ex) {
state(UNKNOWN);
if (X.hasCause(ex, NodeStoppingException.class)) {
U.warn(log, "Failed to commit transaction, node is stopping [tx=" + CU.txString(this) + ", err=" + ex + ']');
return;
}
err = heuristicException(ex);
try {
uncommit();
} catch (Throwable e) {
err.addSuppressed(e);
}
throw err;
} finally {
for (GridDhtLocalPartition locPart : reservedParts) locPart.release();
cctx.database().checkpointReadUnlock();
if (wrapper != null)
wrapper.initialize(ret);
}
}
cctx.tm().commitTx(this);
state(COMMITTED);
}
}
}
Aggregations