use of org.apache.ignite.internal.pagemem.PageIdAllocator.INDEX_PARTITION in project ignite by apache.
the class LoadAllWarmUpStrategy method loadDataInfo.
/**
* Calculation of cache groups, partitions and count of pages that can load
* into data region. Calculation starts and includes an index partition for
* each group.
*
* @param region Data region.
* @return Loadable groups and partitions.
* @throws IgniteCheckedException – if faild.
*/
protected Map<CacheGroupContext, List<LoadPartition>> loadDataInfo(DataRegion region) throws IgniteCheckedException {
// Get cache groups of data region.
List<CacheGroupContext> regionGrps = grpCtxSup.get().stream().filter(grpCtx -> region.equals(grpCtx.dataRegion())).collect(toList());
long availableLoadPageCnt = availableLoadPageCount(region);
// Computing groups, partitions, and pages to load into data region.
Map<CacheGroupContext, List<LoadPartition>> loadableGrps = new LinkedHashMap<>();
for (int i = 0; i < regionGrps.size() && availableLoadPageCnt > 0; i++) {
CacheGroupContext grp = regionGrps.get(i);
// Index partition in priority.
List<GridDhtLocalPartition> locParts = grp.topology().localPartitions();
for (int j = -1; j < locParts.size() && availableLoadPageCnt > 0; j++) {
int p = j == -1 ? INDEX_PARTITION : locParts.get(j).id();
long partPageCnt = grp.shared().pageStore().pages(grp.groupId(), p);
if (partPageCnt > 0) {
long pageCnt = (availableLoadPageCnt - partPageCnt) >= 0 ? partPageCnt : availableLoadPageCnt;
availableLoadPageCnt -= pageCnt;
loadableGrps.computeIfAbsent(grp, grpCtx -> new ArrayList<>()).add(new LoadPartition(p, pageCnt));
}
}
}
return loadableGrps;
}
use of org.apache.ignite.internal.pagemem.PageIdAllocator.INDEX_PARTITION in project ignite by apache.
the class SnapshotRestoreProcess method preload.
/**
* @param reqId Request id.
* @return Future which will be completed when the preload ends.
*/
private IgniteInternalFuture<Boolean> preload(UUID reqId) {
if (ctx.clientNode())
return new GridFinishedFuture<>();
SnapshotRestoreContext opCtx0 = opCtx;
GridFutureAdapter<Boolean> retFut = new GridFutureAdapter<>();
if (opCtx0 == null)
return new GridFinishedFuture<>(new IgniteCheckedException("Snapshot restore process has incorrect restore state: " + reqId));
if (opCtx0.dirs.isEmpty())
return new GridFinishedFuture<>();
try {
if (ctx.isStopping())
throw new NodeStoppingException("Node is stopping: " + ctx.localNodeId());
Set<SnapshotMetadata> allMetas = opCtx0.metasPerNode.values().stream().flatMap(List::stream).collect(Collectors.toSet());
AbstractSnapshotVerificationTask.checkMissedMetadata(allMetas);
IgniteSnapshotManager snpMgr = ctx.cache().context().snapshotMgr();
synchronized (this) {
opCtx0.stopFut = new IgniteFutureImpl<>(retFut.chain(f -> null));
}
if (log.isInfoEnabled()) {
log.info("Starting snapshot preload operation to restore cache groups " + "[reqId=" + reqId + ", snapshot=" + opCtx0.snpName + ", caches=" + F.transform(opCtx0.dirs, FilePageStoreManager::cacheGroupName) + ']');
}
CompletableFuture<Void> metaFut = ctx.localNodeId().equals(opCtx0.opNodeId) ? CompletableFuture.runAsync(() -> {
try {
SnapshotMetadata meta = F.first(opCtx0.metasPerNode.get(opCtx0.opNodeId));
File binDir = binaryWorkDir(snpMgr.snapshotLocalDir(opCtx0.snpName).getAbsolutePath(), meta.folderName());
ctx.cacheObjects().updateMetadata(binDir, opCtx0.stopChecker);
} catch (Throwable t) {
log.error("Unable to perform metadata update operation for the cache groups restore process", t);
opCtx0.errHnd.accept(t);
}
}, snpMgr.snapshotExecutorService()) : CompletableFuture.completedFuture(null);
Map<String, GridAffinityAssignmentCache> affCache = new HashMap<>();
for (StoredCacheData data : opCtx0.cfgs.values()) {
affCache.computeIfAbsent(CU.cacheOrGroupName(data.config()), grp -> calculateAffinity(ctx, data.config(), opCtx0.discoCache));
}
Map<Integer, Set<PartitionRestoreFuture>> allParts = new HashMap<>();
Map<Integer, Set<PartitionRestoreFuture>> rmtLoadParts = new HashMap<>();
ClusterNode locNode = ctx.cache().context().localNode();
List<SnapshotMetadata> locMetas = opCtx0.metasPerNode.get(locNode.id());
// First preload everything from the local node.
for (File dir : opCtx0.dirs) {
String cacheOrGrpName = cacheGroupName(dir);
int grpId = CU.cacheId(cacheOrGrpName);
File tmpCacheDir = formatTmpDirName(dir);
tmpCacheDir.mkdir();
Set<PartitionRestoreFuture> leftParts;
// Partitions contained in the snapshot.
Set<Integer> availParts = new HashSet<>();
for (SnapshotMetadata meta : allMetas) {
Set<Integer> parts = meta.partitions().get(grpId);
if (parts != null)
availParts.addAll(parts);
}
List<List<ClusterNode>> assignment = affCache.get(cacheOrGrpName).idealAssignment().assignment();
Set<PartitionRestoreFuture> partFuts = availParts.stream().filter(p -> p != INDEX_PARTITION && assignment.get(p).contains(locNode)).map(p -> new PartitionRestoreFuture(p, opCtx0.processedParts)).collect(Collectors.toSet());
allParts.put(grpId, partFuts);
rmtLoadParts.put(grpId, leftParts = new HashSet<>(partFuts));
if (leftParts.isEmpty())
continue;
SnapshotMetadata full = findMetadataWithSamePartitions(locMetas, grpId, leftParts.stream().map(p -> p.partId).collect(Collectors.toSet()));
for (SnapshotMetadata meta : full == null ? locMetas : Collections.singleton(full)) {
if (leftParts.isEmpty())
break;
File snpCacheDir = new File(ctx.cache().context().snapshotMgr().snapshotLocalDir(opCtx0.snpName), Paths.get(databaseRelativePath(meta.folderName()), dir.getName()).toString());
leftParts.removeIf(partFut -> {
boolean doCopy = ofNullable(meta.partitions().get(grpId)).orElse(Collections.emptySet()).contains(partFut.partId);
if (doCopy) {
copyLocalAsync(ctx.cache().context().snapshotMgr(), opCtx0, snpCacheDir, tmpCacheDir, partFut);
}
return doCopy;
});
if (meta == full) {
assert leftParts.isEmpty() : leftParts;
if (log.isInfoEnabled()) {
log.info("The snapshot was taken on the same cluster topology. The index will be copied to " + "restoring cache group if necessary [reqId=" + reqId + ", snapshot=" + opCtx0.snpName + ", dir=" + dir.getName() + ']');
}
File idxFile = new File(snpCacheDir, FilePageStoreManager.getPartitionFileName(INDEX_PARTITION));
if (idxFile.exists()) {
PartitionRestoreFuture idxFut;
allParts.computeIfAbsent(grpId, g -> new HashSet<>()).add(idxFut = new PartitionRestoreFuture(INDEX_PARTITION, opCtx0.processedParts));
copyLocalAsync(ctx.cache().context().snapshotMgr(), opCtx0, snpCacheDir, tmpCacheDir, idxFut);
}
}
}
}
// Load other partitions from remote nodes.
List<PartitionRestoreFuture> rmtAwaitParts = rmtLoadParts.values().stream().flatMap(Collection::stream).collect(Collectors.toList());
// This is necessary for sending only one partitions request per each cluster node.
Map<UUID, Map<Integer, Set<Integer>>> snpAff = snapshotAffinity(opCtx0.metasPerNode.entrySet().stream().filter(e -> !e.getKey().equals(ctx.localNodeId())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)), (grpId, partId) -> rmtLoadParts.get(grpId) != null && rmtLoadParts.get(grpId).remove(new PartitionRestoreFuture(partId, opCtx0.processedParts)));
Map<Integer, File> grpToDir = opCtx0.dirs.stream().collect(Collectors.toMap(d -> CU.cacheId(FilePageStoreManager.cacheGroupName(d)), d -> d));
try {
if (log.isInfoEnabled() && !snpAff.isEmpty()) {
log.info("Trying to request partitions from remote nodes " + "[reqId=" + reqId + ", snapshot=" + opCtx0.snpName + ", map=" + snpAff.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> partitionsMapToCompactString(e.getValue()))) + ']');
}
for (Map.Entry<UUID, Map<Integer, Set<Integer>>> m : snpAff.entrySet()) {
ctx.cache().context().snapshotMgr().requestRemoteSnapshotFiles(m.getKey(), opCtx0.snpName, m.getValue(), opCtx0.stopChecker, (snpFile, t) -> {
if (opCtx0.stopChecker.getAsBoolean())
throw new IgniteInterruptedException("Snapshot remote operation request cancelled.");
if (t == null) {
int grpId = CU.cacheId(cacheGroupName(snpFile.getParentFile()));
int partId = partId(snpFile.getName());
PartitionRestoreFuture partFut = F.find(allParts.get(grpId), null, new IgnitePredicate<PartitionRestoreFuture>() {
@Override
public boolean apply(PartitionRestoreFuture f) {
return f.partId == partId;
}
});
assert partFut != null : snpFile.getAbsolutePath();
File tmpCacheDir = formatTmpDirName(grpToDir.get(grpId));
Path partFile = Paths.get(tmpCacheDir.getAbsolutePath(), snpFile.getName());
try {
Files.move(snpFile.toPath(), partFile);
partFut.complete(partFile);
} catch (Exception e) {
opCtx0.errHnd.accept(e);
completeListExceptionally(rmtAwaitParts, e);
}
} else {
opCtx0.errHnd.accept(t);
completeListExceptionally(rmtAwaitParts, t);
}
});
}
} catch (IgniteCheckedException e) {
opCtx0.errHnd.accept(e);
completeListExceptionally(rmtAwaitParts, e);
}
List<PartitionRestoreFuture> allPartFuts = allParts.values().stream().flatMap(Collection::stream).collect(Collectors.toList());
int size = allPartFuts.size();
opCtx0.totalParts = size;
CompletableFuture.allOf(allPartFuts.toArray(new CompletableFuture[size])).runAfterBothAsync(metaFut, () -> {
try {
if (opCtx0.stopChecker.getAsBoolean())
throw new IgniteInterruptedException("The operation has been stopped on temporary directory switch.");
for (File src : opCtx0.dirs) Files.move(formatTmpDirName(src).toPath(), src.toPath(), StandardCopyOption.ATOMIC_MOVE);
} catch (IOException e) {
throw new IgniteException(e);
}
}, snpMgr.snapshotExecutorService()).whenComplete((r, t) -> opCtx0.errHnd.accept(t)).whenComplete((res, t) -> {
Throwable t0 = ofNullable(opCtx0.err.get()).orElse(t);
if (t0 == null)
retFut.onDone(true);
else {
log.error("Unable to restore cache group(s) from a snapshot " + "[reqId=" + opCtx.reqId + ", snapshot=" + opCtx.snpName + ']', t0);
retFut.onDone(t0);
}
});
} catch (Exception ex) {
opCtx0.errHnd.accept(ex);
return new GridFinishedFuture<>(ex);
}
return retFut;
}
use of org.apache.ignite.internal.pagemem.PageIdAllocator.INDEX_PARTITION in project ignite by apache.
the class SnapshotPartitionsVerifyHandler method invoke.
/**
* {@inheritDoc}
*/
@Override
public Map<PartitionKeyV2, PartitionHashRecordV2> invoke(SnapshotHandlerContext opCtx) throws IgniteCheckedException {
SnapshotMetadata meta = opCtx.metadata();
Set<Integer> grps = F.isEmpty(opCtx.groups()) ? new HashSet<>(meta.partitions().keySet()) : opCtx.groups().stream().map(CU::cacheId).collect(Collectors.toSet());
Set<File> partFiles = new HashSet<>();
IgniteSnapshotManager snpMgr = cctx.snapshotMgr();
for (File dir : snpMgr.snapshotCacheDirectories(meta.snapshotName(), meta.folderName())) {
int grpId = CU.cacheId(cacheGroupName(dir));
if (!grps.remove(grpId))
continue;
Set<Integer> parts = meta.partitions().get(grpId) == null ? Collections.emptySet() : new HashSet<>(meta.partitions().get(grpId));
for (File part : cachePartitionFiles(dir)) {
int partId = partId(part.getName());
if (!parts.remove(partId))
continue;
partFiles.add(part);
}
if (!parts.isEmpty()) {
throw new IgniteException("Snapshot data doesn't contain required cache group partition " + "[grpId=" + grpId + ", snpName=" + meta.snapshotName() + ", consId=" + meta.consistentId() + ", missed=" + parts + ", meta=" + meta + ']');
}
}
if (!grps.isEmpty()) {
throw new IgniteException("Snapshot data doesn't contain required cache groups " + "[grps=" + grps + ", snpName=" + meta.snapshotName() + ", consId=" + meta.consistentId() + ", meta=" + meta + ']');
}
Map<PartitionKeyV2, PartitionHashRecordV2> res = new ConcurrentHashMap<>();
ThreadLocal<ByteBuffer> buff = ThreadLocal.withInitial(() -> ByteBuffer.allocateDirect(meta.pageSize()).order(ByteOrder.nativeOrder()));
GridKernalContext snpCtx = snpMgr.createStandaloneKernalContext(meta.snapshotName(), meta.folderName());
for (GridComponent comp : snpCtx) comp.start();
try {
U.doInParallel(snpMgr.snapshotExecutorService(), partFiles, part -> {
String grpName = cacheGroupName(part.getParentFile());
int grpId = CU.cacheId(grpName);
int partId = partId(part.getName());
FilePageStoreManager storeMgr = (FilePageStoreManager) cctx.pageStore();
try (FilePageStore pageStore = (FilePageStore) storeMgr.getPageStoreFactory(grpId, false).createPageStore(getTypeByPartId(partId), part::toPath, val -> {
})) {
if (partId == INDEX_PARTITION) {
checkPartitionsPageCrcSum(() -> pageStore, INDEX_PARTITION, FLAG_IDX);
return null;
}
if (grpId == MetaStorage.METASTORAGE_CACHE_ID) {
checkPartitionsPageCrcSum(() -> pageStore, partId, FLAG_DATA);
return null;
}
ByteBuffer pageBuff = buff.get();
pageBuff.clear();
pageStore.read(0, pageBuff, true);
long pageAddr = GridUnsafe.bufferAddress(pageBuff);
PagePartitionMetaIO io = PageIO.getPageIO(pageBuff);
GridDhtPartitionState partState = fromOrdinal(io.getPartitionState(pageAddr));
if (partState != OWNING) {
throw new IgniteCheckedException("Snapshot partitions must be in the OWNING " + "state only: " + partState);
}
long updateCntr = io.getUpdateCounter(pageAddr);
long size = io.getSize(pageAddr);
if (log.isDebugEnabled()) {
log.debug("Partition [grpId=" + grpId + ", id=" + partId + ", counter=" + updateCntr + ", size=" + size + "]");
}
// Snapshot partitions must always be in OWNING state.
// There is no `primary` partitions for snapshot.
PartitionKeyV2 key = new PartitionKeyV2(grpId, partId, grpName);
PartitionHashRecordV2 hash = calculatePartitionHash(key, updateCntr, meta.consistentId(), GridDhtPartitionState.OWNING, false, size, snpMgr.partitionRowIterator(snpCtx, grpName, partId, pageStore));
assert hash != null : "OWNING must have hash: " + key;
res.put(key, hash);
} catch (IOException e) {
throw new IgniteCheckedException(e);
}
return null;
});
} catch (Throwable t) {
log.error("Error executing handler: ", t);
throw t;
} finally {
for (GridComponent comp : snpCtx) comp.stop(true);
}
return res;
}
use of org.apache.ignite.internal.pagemem.PageIdAllocator.INDEX_PARTITION in project ignite by apache.
the class PageMemoryImplTest method runThrottlingEmptifyCpBufFirst.
/**
* @throws Exception if failed.
*/
public void runThrottlingEmptifyCpBufFirst(PageMemoryImpl.ThrottlingPolicy plc) throws Exception {
TestPageStoreManager pageStoreMgr = new TestPageStoreManager();
final List<FullPageId> allocated = new ArrayList<>();
int pagesForStartThrottling = 10;
// Number of pages which were poll from checkpoint buffer for throttling.
AtomicInteger cpBufferPollPages = new AtomicInteger();
// Create a 1 mb page memory.
PageMemoryImpl memory = createPageMemory(1, plc, pageStoreMgr, pageStoreMgr, (IgniteInClosure<FullPageId>) fullPageId -> {
assertEquals(cpBufferPollPages.incrementAndGet(), pageStoreMgr.storedPages.size());
});
assert pagesForStartThrottling < memory.checkpointBufferPagesSize() / 3;
for (int i = 0; i < pagesForStartThrottling + (memory.checkpointBufferPagesSize() * 2 / 3); i++) {
long id = memory.allocatePage(1, INDEX_PARTITION, FLAG_IDX);
FullPageId fullId = new FullPageId(id, 1);
allocated.add(fullId);
writePage(memory, fullId, (byte) 1);
}
GridMultiCollectionWrapper<FullPageId> markedPages = memory.beginCheckpoint(new GridFinishedFuture());
for (int i = 0; i < pagesForStartThrottling + (memory.checkpointBufferPagesSize() * 2 / 3); i++) writePage(memory, allocated.get(i), (byte) 1);
doCheckpoint(markedPages, memory, pageStoreMgr);
// There is 'pagesForStartThrottling - 1' because we should write pagesForStartThrottling pages
// from checkpoint buffer before throttling will be disabled but at least one page always would be written
// outside of throttling and in our case we certainly know that this page is also contained in checkpoint buffer
// (because all of our pages are in checkpoint buffer).
assertEquals(pagesForStartThrottling - 1, cpBufferPollPages.get());
}
use of org.apache.ignite.internal.pagemem.PageIdAllocator.INDEX_PARTITION in project ignite by apache.
the class PageMemoryImplTest method testCheckpointBufferCantOverflowWithThrottlingMixedLoad.
/**
* @throws Exception If failed.
*/
private void testCheckpointBufferCantOverflowWithThrottlingMixedLoad(PageMemoryImpl.ThrottlingPolicy plc) throws Exception {
PageMemoryImpl memory = createPageMemory(plc, null);
List<FullPageId> pages = new ArrayList<>();
for (int i = 0; i < (MAX_SIZE - 10) * MB / PAGE_SIZE / 2; i++) {
long pageId = memory.allocatePage(1, INDEX_PARTITION, FLAG_IDX);
FullPageId fullPageId = new FullPageId(pageId, 1);
pages.add(fullPageId);
acquireAndReleaseWriteLock(memory, fullPageId);
}
memory.beginCheckpoint(new GridFinishedFuture());
CheckpointMetricsTracker mockTracker = Mockito.mock(CheckpointMetricsTracker.class);
for (FullPageId checkpointPage : pages) memory.checkpointWritePage(checkpointPage, ByteBuffer.allocate(PAGE_SIZE), (fullPageId, buffer, tag) -> {
// No-op.
}, mockTracker);
memory.finishCheckpoint();
for (int i = (int) ((MAX_SIZE - 10) * MB / PAGE_SIZE / 2); i < (MAX_SIZE - 20) * MB / PAGE_SIZE; i++) {
long pageId = memory.allocatePage(1, INDEX_PARTITION, FLAG_IDX);
FullPageId fullPageId = new FullPageId(pageId, 1);
pages.add(fullPageId);
acquireAndReleaseWriteLock(memory, fullPageId);
}
memory.beginCheckpoint(new GridFinishedFuture());
// Mix pages in checkpoint with clean pages
Collections.shuffle(pages);
AtomicBoolean stop = new AtomicBoolean(false);
try {
GridTestUtils.runAsync(() -> {
for (FullPageId page : pages) {
if (// Mark dirty 50% of pages
ThreadLocalRandom.current().nextDouble() < 0.5)
try {
acquireAndReleaseWriteLock(memory, page);
if (stop.get())
break;
} catch (IgniteCheckedException e) {
log.error("runAsync ended with exception", e);
fail();
}
}
}).get(5_000);
} catch (IgniteFutureTimeoutCheckedException ignore) {
// Expected.
} finally {
stop.set(true);
}
memory.finishCheckpoint();
LongAdderMetric totalThrottlingTime = U.field(memory.metrics(), "totalThrottlingTime");
assertNotNull(totalThrottlingTime);
assertTrue(totalThrottlingTime.value() > 0);
}
Aggregations