use of org.apache.ignite.internal.util.StripedExecutor in project ignite by apache.
the class IgniteKernal method createExecutorDescription.
/**
* Create description of an executor service for logging.
*
* @param execSvcName Name of the service.
* @param execSvc Service to create a description for.
*/
private String createExecutorDescription(String execSvcName, ExecutorService execSvc) {
int poolSize = 0;
int poolActiveThreads = 0;
int poolQSize = 0;
if (execSvc instanceof ThreadPoolExecutor) {
ThreadPoolExecutor exec = (ThreadPoolExecutor) execSvc;
poolSize = exec.getPoolSize();
poolActiveThreads = Math.min(poolSize, exec.getActiveCount());
poolQSize = exec.getQueue().size();
} else if (execSvc instanceof StripedExecutor) {
StripedExecutor exec = (StripedExecutor) execSvc;
poolSize = exec.stripesCount();
poolActiveThreads = exec.activeStripesCount();
poolQSize = exec.queueSize();
}
int poolIdleThreads = poolSize - poolActiveThreads;
return execSvcName + " [active=" + poolActiveThreads + ", idle=" + poolIdleThreads + ", qSize=" + poolQSize + "]";
}
use of org.apache.ignite.internal.util.StripedExecutor in project ignite by apache.
the class GridCacheDatabaseSharedManager method applyLogicalUpdates.
/**
* @param status Last registered checkpoint status.
* @param restoreMeta Metastore restore phase if {@code true}.
* @throws IgniteCheckedException If failed to apply updates.
* @throws StorageException If IO exception occurred while reading write-ahead log.
*/
private RestoreLogicalState applyLogicalUpdates(CheckpointStatus status, IgnitePredicate<Integer> cacheGroupsPredicate, IgniteBiPredicate<WALRecord.RecordType, WALPointer> recordTypePredicate, boolean restoreMeta) throws IgniteCheckedException {
if (log.isInfoEnabled())
log.info("Applying lost " + (restoreMeta ? "metastore" : "cache") + " updates since last checkpoint record [lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
if (!restoreMeta)
cctx.kernalContext().query().skipFieldLookup(true);
long start = U.currentTimeMillis();
AtomicReference<Throwable> applyError = new AtomicReference<>();
AtomicLong applied = new AtomicLong();
long lastArchivedSegment = cctx.wal().lastArchivedSegment();
StripedExecutor exec = cctx.kernalContext().pools().getStripedExecutorService();
Semaphore semaphore = new Semaphore(semaphorePertmits(exec));
Map<GroupPartitionId, Integer> partitionRecoveryStates = new HashMap<>();
WALIterator it = cctx.wal().replay(status.startPtr, recordTypePredicate);
RestoreLogicalState restoreLogicalState = new RestoreLogicalState(status, it, lastArchivedSegment, cacheGroupsPredicate, partitionRecoveryStates);
final IgniteTxManager txManager = cctx.tm();
try {
while (restoreLogicalState.hasNext()) {
WALRecord rec = restoreLogicalState.next();
if (rec == null)
break;
switch(rec.type()) {
case TX_RECORD:
if (restoreMeta) {
// Also restore tx states.
TxRecord txRec = (TxRecord) rec;
txManager.collectTxStates(txRec);
}
break;
case // Calculate initial partition states
CHECKPOINT_RECORD:
CheckpointRecord cpRec = (CheckpointRecord) rec;
for (Map.Entry<Integer, CacheState> entry : cpRec.cacheGroupStates().entrySet()) {
CacheState cacheState = entry.getValue();
for (int i = 0; i < cacheState.size(); i++) {
int partId = cacheState.partitionByIndex(i);
byte state = cacheState.stateByIndex(i);
// Ignore undefined state.
if (state != -1) {
partitionRecoveryStates.put(new GroupPartitionId(entry.getKey(), partId), (int) state);
}
}
}
break;
case ROLLBACK_TX_RECORD:
RollbackRecord rbRec = (RollbackRecord) rec;
CacheGroupContext ctx = cctx.cache().cacheGroup(rbRec.groupId());
if (ctx != null && !ctx.isLocal()) {
GridDhtLocalPartition part = ctx.topology().forceCreatePartition(rbRec.partitionId());
ctx.offheap().dataStore(part).updateInitialCounter(rbRec.start(), rbRec.range());
}
break;
case MVCC_DATA_RECORD:
case DATA_RECORD:
case DATA_RECORD_V2:
case ENCRYPTED_DATA_RECORD:
case ENCRYPTED_DATA_RECORD_V2:
case ENCRYPTED_DATA_RECORD_V3:
DataRecord dataRec = (DataRecord) rec;
int entryCnt = dataRec.entryCount();
for (int i = 0; i < entryCnt; i++) {
DataEntry dataEntry = dataRec.get(i);
if (!restoreMeta && txManager.uncommitedTx(dataEntry))
continue;
int cacheId = dataEntry.cacheId();
DynamicCacheDescriptor cacheDesc = cctx.cache().cacheDescriptor(cacheId);
// Can empty in case recovery node on blt changed.
if (cacheDesc == null)
continue;
stripedApply(() -> {
GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
if (skipRemovedIndexUpdates(cacheCtx.groupId(), PageIdAllocator.INDEX_PARTITION))
cctx.kernalContext().query().markAsRebuildNeeded(cacheCtx, true);
try {
applyUpdate(cacheCtx, dataEntry);
} catch (IgniteCheckedException e) {
U.error(log, "Failed to apply data entry, dataEntry=" + dataEntry + ", ptr=" + dataRec.position());
applyError.compareAndSet(null, e);
}
applied.incrementAndGet();
}, cacheDesc.groupId(), dataEntry.partitionId(), exec, semaphore);
}
break;
case MVCC_TX_RECORD:
MvccTxRecord txRecord = (MvccTxRecord) rec;
byte txState = convertToTxState(txRecord.state());
cctx.coordinators().updateState(txRecord.mvccVersion(), txState, true);
break;
case PART_META_UPDATE_STATE:
PartitionMetaStateRecord metaStateRecord = (PartitionMetaStateRecord) rec;
GroupPartitionId groupPartitionId = new GroupPartitionId(metaStateRecord.groupId(), metaStateRecord.partitionId());
restoreLogicalState.partitionRecoveryStates.put(groupPartitionId, (int) metaStateRecord.state());
break;
case METASTORE_DATA_RECORD:
MetastoreDataRecord metastoreDataRecord = (MetastoreDataRecord) rec;
metaStorage.applyUpdate(metastoreDataRecord.key(), metastoreDataRecord.value());
break;
case META_PAGE_UPDATE_NEXT_SNAPSHOT_ID:
case META_PAGE_UPDATE_LAST_SUCCESSFUL_SNAPSHOT_ID:
case META_PAGE_UPDATE_LAST_SUCCESSFUL_FULL_SNAPSHOT_ID:
case META_PAGE_UPDATE_LAST_ALLOCATED_INDEX:
PageDeltaRecord pageDelta = (PageDeltaRecord) rec;
stripedApplyPage((pageMem) -> {
try {
applyPageDelta(pageMem, pageDelta, false);
} catch (IgniteCheckedException e) {
U.error(log, "Failed to apply page delta, " + pageDelta);
applyError.compareAndSet(null, e);
}
}, pageDelta.groupId(), partId(pageDelta.pageId()), exec, semaphore);
break;
case MASTER_KEY_CHANGE_RECORD_V2:
cctx.kernalContext().encryption().applyKeys((MasterKeyChangeRecordV2) rec);
break;
case REENCRYPTION_START_RECORD:
cctx.kernalContext().encryption().applyReencryptionStartRecord((ReencryptionStartRecord) rec);
break;
case INDEX_ROOT_PAGE_RENAME_RECORD:
IndexRenameRootPageRecord record = (IndexRenameRootPageRecord) rec;
int cacheId = record.cacheId();
GridCacheContext cacheCtx = cctx.cacheContext(cacheId);
if (cacheCtx != null) {
IgniteCacheOffheapManager offheap = cacheCtx.offheap();
for (int i = 0; i < record.segments(); i++) offheap.renameRootPageForIndex(cacheId, record.oldTreeName(), record.newTreeName(), i);
}
break;
case PARTITION_CLEARING_START_RECORD:
PartitionClearingStartRecord rec0 = (PartitionClearingStartRecord) rec;
CacheGroupContext grp = this.ctx.cache().cacheGroup(rec0.groupId());
if (grp != null) {
GridDhtLocalPartition part;
try {
part = grp.topology().forceCreatePartition(rec0.partitionId());
} catch (IgniteCheckedException e) {
throw new IgniteException("Cannot get or create a partition [groupId=" + rec0.groupId() + ", partitionId=" + rec0.partitionId() + "]", e);
}
stripedApply(() -> {
try {
part.updateClearVersion(rec0.clearVersion());
IgniteInternalFuture<?> clearFut = grp.shared().evict().evictPartitionAsync(grp, part, new GridFutureAdapter<>());
clearFut.get();
part.updateClearVersion();
} catch (IgniteCheckedException e) {
U.error(log, "Failed to apply partition clearing record, " + rec0);
applyError.compareAndSet(null, e);
}
}, rec0.groupId(), rec0.partitionId(), exec, semaphore);
}
break;
default:
}
}
} finally {
it.close();
if (!restoreMeta)
cctx.kernalContext().query().skipFieldLookup(false);
}
awaitApplyComplete(exec, applyError);
if (log.isInfoEnabled())
log.info("Finished applying WAL changes [updatesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + " ms]");
for (DatabaseLifecycleListener lsnr : getDatabaseListeners(cctx.kernalContext())) lsnr.afterLogicalUpdatesApplied(this, restoreLogicalState);
return restoreLogicalState;
}
use of org.apache.ignite.internal.util.StripedExecutor in project ignite by apache.
the class GridCacheDatabaseSharedManager method performBinaryMemoryRestore.
/**
* @param status Checkpoint status.
* @param cacheGroupsPredicate Cache groups to restore.
* @throws IgniteCheckedException If failed.
* @throws StorageException In case I/O error occurred during operations with storage.
*/
private RestoreBinaryState performBinaryMemoryRestore(CheckpointStatus status, IgnitePredicate<Integer> cacheGroupsPredicate, IgniteBiPredicate<WALRecord.RecordType, WALPointer> recordTypePredicate, boolean finalizeState) throws IgniteCheckedException {
if (log.isInfoEnabled())
log.info("Checking memory state [lastValidPos=" + status.endPtr + ", lastMarked=" + status.startPtr + ", lastCheckpointId=" + status.cpStartId + ']');
WALPointer recPtr = status.endPtr;
boolean apply = status.needRestoreMemory();
try {
WALRecord startRec = !CheckpointStatus.NULL_PTR.equals(status.startPtr) || apply ? cctx.wal().read(status.startPtr) : null;
if (apply) {
if (finalizeState)
U.quietAndWarn(log, "Ignite node stopped in the middle of checkpoint. Will restore memory state and " + "finish checkpoint on node start.");
cctx.cache().cacheGroupDescriptors().forEach((grpId, desc) -> {
if (!cacheGroupsPredicate.apply(grpId))
return;
try {
DataRegion region = cctx.database().dataRegion(desc.config().getDataRegionName());
if (region == null || !cctx.isLazyMemoryAllocation(region))
return;
region.pageMemory().start();
} catch (IgniteCheckedException e) {
throw new IgniteException(e);
}
});
cctx.pageStore().beginRecover();
if (!(startRec instanceof CheckpointRecord))
throw new StorageException("Checkpoint marker doesn't point to checkpoint record " + "[ptr=" + status.startPtr + ", rec=" + startRec + "]");
WALPointer cpMark = ((CheckpointRecord) startRec).checkpointMark();
if (cpMark != null) {
if (log.isInfoEnabled())
log.info("Restoring checkpoint after logical recovery, will start physical recovery from " + "back pointer: " + cpMark);
recPtr = cpMark;
}
} else
cctx.wal().notchLastCheckpointPtr(status.startPtr);
} catch (NoSuchElementException e) {
throw new StorageException("Failed to read checkpoint record from WAL, persistence consistency " + "cannot be guaranteed. Make sure configuration points to correct WAL folders and WAL folder is " + "properly mounted [ptr=" + status.startPtr + ", walPath=" + persistenceCfg.getWalPath() + ", walArchive=" + persistenceCfg.getWalArchivePath() + "]");
}
AtomicReference<Throwable> applyError = new AtomicReference<>();
StripedExecutor exec = cctx.kernalContext().pools().getStripedExecutorService();
Semaphore semaphore = new Semaphore(semaphorePertmits(exec));
long start = U.currentTimeMillis();
long lastArchivedSegment = cctx.wal().lastArchivedSegment();
WALIterator it = cctx.wal().replay(recPtr, recordTypePredicate);
RestoreBinaryState restoreBinaryState = new RestoreBinaryState(status, it, lastArchivedSegment, cacheGroupsPredicate);
AtomicLong applied = new AtomicLong();
try {
while (restoreBinaryState.hasNext()) {
if (applyError.get() != null)
break;
WALRecord rec = restoreBinaryState.next();
if (rec == null)
break;
switch(rec.type()) {
case PAGE_RECORD:
if (restoreBinaryState.needApplyBinaryUpdate()) {
PageSnapshot pageSnapshot = (PageSnapshot) rec;
// Here we do not require tag check because we may be applying memory changes after
// several repetitive restarts and the same pages may have changed several times.
int groupId = pageSnapshot.fullPageId().groupId();
int partId = partId(pageSnapshot.fullPageId().pageId());
if (skipRemovedIndexUpdates(groupId, partId))
break;
stripedApplyPage((pageMem) -> {
try {
applyPageSnapshot(pageMem, pageSnapshot);
applied.incrementAndGet();
} catch (Throwable t) {
U.error(log, "Failed to apply page snapshot. rec=[" + pageSnapshot + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to apply page snapshot", t));
}
}, groupId, partId, exec, semaphore);
}
break;
case PART_META_UPDATE_STATE:
PartitionMetaStateRecord metaStateRecord = (PartitionMetaStateRecord) rec;
{
int groupId = metaStateRecord.groupId();
int partId = metaStateRecord.partitionId();
stripedApplyPage((pageMem) -> {
GridDhtPartitionState state = fromOrdinal(metaStateRecord.state());
if (state == null || state == GridDhtPartitionState.EVICTED)
schedulePartitionDestroy(groupId, partId);
else {
try {
cancelOrWaitPartitionDestroy(groupId, partId);
} catch (Throwable t) {
U.error(log, "Failed to cancel or wait partition destroy. rec=[" + metaStateRecord + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to cancel or wait partition destroy", t));
}
}
}, groupId, partId, exec, semaphore);
}
break;
case PARTITION_DESTROY:
PartitionDestroyRecord destroyRecord = (PartitionDestroyRecord) rec;
{
int groupId = destroyRecord.groupId();
int partId = destroyRecord.partitionId();
stripedApplyPage((pageMem) -> {
pageMem.invalidate(groupId, partId);
schedulePartitionDestroy(groupId, partId);
}, groupId, partId, exec, semaphore);
}
break;
default:
if (restoreBinaryState.needApplyBinaryUpdate() && rec instanceof PageDeltaRecord) {
PageDeltaRecord pageDelta = (PageDeltaRecord) rec;
int groupId = pageDelta.groupId();
int partId = partId(pageDelta.pageId());
if (skipRemovedIndexUpdates(groupId, partId))
break;
stripedApplyPage((pageMem) -> {
try {
applyPageDelta(pageMem, pageDelta, true);
applied.incrementAndGet();
} catch (Throwable t) {
U.error(log, "Failed to apply page delta. rec=[" + pageDelta + ']');
applyError.compareAndSet(null, (t instanceof IgniteCheckedException) ? (IgniteCheckedException) t : new IgniteCheckedException("Failed to apply page delta", t));
}
}, groupId, partId, exec, semaphore);
}
}
}
} finally {
it.close();
awaitApplyComplete(exec, applyError);
}
if (!finalizeState)
return null;
WALPointer lastReadPtr = restoreBinaryState.lastReadRecordPointer();
if (status.needRestoreMemory()) {
if (restoreBinaryState.needApplyBinaryUpdate())
throw new StorageException("Failed to restore memory state (checkpoint marker is present " + "on disk, but checkpoint record is missed in WAL) " + "[cpStatus=" + status + ", lastRead=" + lastReadPtr + "]");
if (log.isInfoEnabled())
log.info("Finished applying memory changes [changesApplied=" + applied + ", time=" + (U.currentTimeMillis() - start) + " ms]");
finalizeCheckpointOnRecovery(status.cpStartTs, status.cpStartId, status.startPtr, exec);
}
return restoreBinaryState;
}
Aggregations