use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.OWNING in project ignite by apache.
the class GridDhtPartitionTopologyImpl method resetOwners.
/**
* {@inheritDoc}
*/
@Override
public Map<UUID, Set<Integer>> resetOwners(Map<Integer, Set<UUID>> ownersByUpdCounters, Set<Integer> haveHist, GridDhtPartitionsExchangeFuture exchFut) {
Map<UUID, Set<Integer>> res = new HashMap<>();
Collection<DiscoveryEvent> evts = exchFut.events().events();
Set<UUID> joinedNodes = U.newHashSet(evts.size());
for (DiscoveryEvent evt : evts) {
if (evt.type() == EVT_NODE_JOINED)
joinedNodes.add(evt.eventNode().id());
}
ctx.database().checkpointReadLock();
try {
Map<UUID, Set<Integer>> addToWaitGroups = new HashMap<>();
lock.writeLock().lock();
try {
// First process local partitions.
UUID locNodeId = ctx.localNodeId();
for (Map.Entry<Integer, Set<UUID>> entry : ownersByUpdCounters.entrySet()) {
int part = entry.getKey();
Set<UUID> maxCounterPartOwners = entry.getValue();
GridDhtLocalPartition locPart = localPartition(part);
if (locPart == null || locPart.state() != OWNING)
continue;
// Partition state should be mutated only on joining nodes if they are exists for the exchange.
if (joinedNodes.isEmpty() && !maxCounterPartOwners.contains(locNodeId)) {
rebalancePartition(part, !haveHist.contains(part), exchFut);
res.computeIfAbsent(locNodeId, n -> new HashSet<>()).add(part);
}
}
// Then process node maps.
for (Map.Entry<Integer, Set<UUID>> entry : ownersByUpdCounters.entrySet()) {
int part = entry.getKey();
Set<UUID> maxCounterPartOwners = entry.getValue();
for (Map.Entry<UUID, GridDhtPartitionMap> remotes : node2part.entrySet()) {
UUID remoteNodeId = remotes.getKey();
if (!joinedNodes.isEmpty() && !joinedNodes.contains(remoteNodeId))
continue;
GridDhtPartitionMap partMap = remotes.getValue();
GridDhtPartitionState state = partMap.get(part);
if (state != OWNING)
continue;
if (!maxCounterPartOwners.contains(remoteNodeId)) {
partMap.put(part, MOVING);
partMap.updateSequence(partMap.updateSequence() + 1, partMap.topologyVersion());
if (partMap.nodeId().equals(locNodeId))
updateSeq.setIfGreater(partMap.updateSequence());
res.computeIfAbsent(remoteNodeId, n -> new HashSet<>()).add(part);
}
}
}
for (Map.Entry<UUID, Set<Integer>> entry : res.entrySet()) {
UUID nodeId = entry.getKey();
Set<Integer> rebalancedParts = entry.getValue();
addToWaitGroups.put(nodeId, new HashSet<>(rebalancedParts));
if (!rebalancedParts.isEmpty()) {
Set<Integer> historical = rebalancedParts.stream().filter(haveHist::contains).collect(Collectors.toSet());
// Filter out partitions having WAL history.
rebalancedParts.removeAll(historical);
U.warn(log, "Partitions have been scheduled for rebalancing due to outdated update counter " + "[grp=" + grp.cacheOrGroupName() + ", readyTopVer=" + readyTopVer + ", topVer=" + exchFut.initialVersion() + ", nodeId=" + nodeId + ", partsFull=" + S.compact(rebalancedParts) + ", partsHistorical=" + S.compact(historical) + "]");
}
}
node2part = new GridDhtPartitionFullMap(node2part, updateSeq.incrementAndGet());
} finally {
lock.writeLock().unlock();
}
List<List<ClusterNode>> ideal = ctx.affinity().affinity(groupId()).idealAssignmentRaw();
for (Map.Entry<UUID, Set<Integer>> entry : addToWaitGroups.entrySet()) {
// Add to wait groups to ensure late assignment switch after all partitions are rebalanced.
for (Integer part : entry.getValue()) {
ctx.cache().context().affinity().addToWaitGroup(groupId(), part, topologyVersionFuture().initialVersion(), ideal.get(part));
}
}
} finally {
ctx.database().checkpointReadUnlock();
}
return res;
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.OWNING in project ignite by apache.
the class SnapshotPartitionsVerifyHandler method invoke.
/**
* {@inheritDoc}
*/
@Override
public Map<PartitionKeyV2, PartitionHashRecordV2> invoke(SnapshotHandlerContext opCtx) throws IgniteCheckedException {
SnapshotMetadata meta = opCtx.metadata();
Set<Integer> grps = F.isEmpty(opCtx.groups()) ? new HashSet<>(meta.partitions().keySet()) : opCtx.groups().stream().map(CU::cacheId).collect(Collectors.toSet());
Set<File> partFiles = new HashSet<>();
IgniteSnapshotManager snpMgr = cctx.snapshotMgr();
for (File dir : snpMgr.snapshotCacheDirectories(meta.snapshotName(), meta.folderName())) {
int grpId = CU.cacheId(cacheGroupName(dir));
if (!grps.remove(grpId))
continue;
Set<Integer> parts = meta.partitions().get(grpId) == null ? Collections.emptySet() : new HashSet<>(meta.partitions().get(grpId));
for (File part : cachePartitionFiles(dir)) {
int partId = partId(part.getName());
if (!parts.remove(partId))
continue;
partFiles.add(part);
}
if (!parts.isEmpty()) {
throw new IgniteException("Snapshot data doesn't contain required cache group partition " + "[grpId=" + grpId + ", snpName=" + meta.snapshotName() + ", consId=" + meta.consistentId() + ", missed=" + parts + ", meta=" + meta + ']');
}
}
if (!grps.isEmpty()) {
throw new IgniteException("Snapshot data doesn't contain required cache groups " + "[grps=" + grps + ", snpName=" + meta.snapshotName() + ", consId=" + meta.consistentId() + ", meta=" + meta + ']');
}
Map<PartitionKeyV2, PartitionHashRecordV2> res = new ConcurrentHashMap<>();
ThreadLocal<ByteBuffer> buff = ThreadLocal.withInitial(() -> ByteBuffer.allocateDirect(meta.pageSize()).order(ByteOrder.nativeOrder()));
GridKernalContext snpCtx = snpMgr.createStandaloneKernalContext(meta.snapshotName(), meta.folderName());
for (GridComponent comp : snpCtx) comp.start();
try {
U.doInParallel(snpMgr.snapshotExecutorService(), partFiles, part -> {
String grpName = cacheGroupName(part.getParentFile());
int grpId = CU.cacheId(grpName);
int partId = partId(part.getName());
FilePageStoreManager storeMgr = (FilePageStoreManager) cctx.pageStore();
try (FilePageStore pageStore = (FilePageStore) storeMgr.getPageStoreFactory(grpId, false).createPageStore(getTypeByPartId(partId), part::toPath, val -> {
})) {
if (partId == INDEX_PARTITION) {
checkPartitionsPageCrcSum(() -> pageStore, INDEX_PARTITION, FLAG_IDX);
return null;
}
if (grpId == MetaStorage.METASTORAGE_CACHE_ID) {
checkPartitionsPageCrcSum(() -> pageStore, partId, FLAG_DATA);
return null;
}
ByteBuffer pageBuff = buff.get();
pageBuff.clear();
pageStore.read(0, pageBuff, true);
long pageAddr = GridUnsafe.bufferAddress(pageBuff);
PagePartitionMetaIO io = PageIO.getPageIO(pageBuff);
GridDhtPartitionState partState = fromOrdinal(io.getPartitionState(pageAddr));
if (partState != OWNING) {
throw new IgniteCheckedException("Snapshot partitions must be in the OWNING " + "state only: " + partState);
}
long updateCntr = io.getUpdateCounter(pageAddr);
long size = io.getSize(pageAddr);
if (log.isDebugEnabled()) {
log.debug("Partition [grpId=" + grpId + ", id=" + partId + ", counter=" + updateCntr + ", size=" + size + "]");
}
// Snapshot partitions must always be in OWNING state.
// There is no `primary` partitions for snapshot.
PartitionKeyV2 key = new PartitionKeyV2(grpId, partId, grpName);
PartitionHashRecordV2 hash = calculatePartitionHash(key, updateCntr, meta.consistentId(), GridDhtPartitionState.OWNING, false, size, snpMgr.partitionRowIterator(snpCtx, grpName, partId, pageStore));
assert hash != null : "OWNING must have hash: " + key;
res.put(key, hash);
} catch (IOException e) {
throw new IgniteCheckedException(e);
}
return null;
});
} catch (Throwable t) {
log.error("Error executing handler: ", t);
throw t;
} finally {
for (GridComponent comp : snpCtx) comp.stop(true);
}
return res;
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.OWNING in project ignite by apache.
the class WalScannerTest method shouldFindCorrectRecordsForMoreThanOnePages.
/**
* @throws Exception If failed.
*/
@Test
public void shouldFindCorrectRecordsForMoreThanOnePages() throws Exception {
// given: Iterator with random value and value which should be find by scanner with several ids.
long expPageId1 = 984;
long expPageId2 = 9584;
long expPageId3 = 98344;
int grpId = 123;
PageSnapshot expPageSnapshot = new PageSnapshot(new FullPageId(expPageId1, grpId), dummyPage(1024, expPageId1), 1024);
CheckpointRecord expCheckpoint = new CheckpointRecord(new WALPointer(5738, 0, 0));
FixCountRecord expDeltaPage1 = new FixCountRecord(grpId, expPageId2, 4);
FixCountRecord expDeltaPage2 = new FixCountRecord(grpId, expPageId3, 4);
WALIterator mockedIter = mockWalIterator(new IgniteBiTuple<>(NULL_PTR, expPageSnapshot), new IgniteBiTuple<>(NULL_PTR, new PageSnapshot(new FullPageId(455, grpId), dummyPage(1024, 455), 1024)), new IgniteBiTuple<>(NULL_PTR, expCheckpoint), new IgniteBiTuple<>(NULL_PTR, new MetastoreDataRecord("key", new byte[0])), new IgniteBiTuple<>(NULL_PTR, new PartitionMetaStateRecord(grpId, 1, OWNING, 1)), new IgniteBiTuple<>(NULL_PTR, expDeltaPage1), new IgniteBiTuple<>(NULL_PTR, new FixCountRecord(grpId, 98348, 4)), new IgniteBiTuple<>(NULL_PTR, new PartitionMetaStateRecord(grpId, 1, OWNING, 1)), new IgniteBiTuple<>(NULL_PTR, expDeltaPage2));
IgniteWalIteratorFactory mockedFactory = mock(IgniteWalIteratorFactory.class);
when(mockedFactory.iterator(any(IteratorParametersBuilder.class))).thenReturn(mockedIter);
List<WALRecord> holder = new ArrayList<>();
ScannerHandler recordCaptor = (rec) -> holder.add(rec.get2());
Set<T2<Integer, Long>> groupAndPageIds = new HashSet<>();
groupAndPageIds.add(new T2<>(grpId, expPageId1));
groupAndPageIds.add(new T2<>(grpId, expPageId2));
groupAndPageIds.add(new T2<>(grpId, expPageId3));
// when: Scanning WAL for searching expected page.
buildWalScanner(withIteratorParameters(), mockedFactory).findAllRecordsFor(groupAndPageIds).forEach(recordCaptor);
// then: Should be find only expected value.
assertEquals(4, holder.size());
assertEquals(expPageSnapshot, holder.get(0));
assertEquals(expCheckpoint, holder.get(1));
assertEquals(expDeltaPage1, holder.get(2));
assertEquals(expDeltaPage2, holder.get(3));
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.OWNING in project ignite by apache.
the class SchemaIndexCachePartitionWorker method processPartition.
/**
* Process partition.
*
* @throws IgniteCheckedException If failed.
*/
private void processPartition() throws IgniteCheckedException {
if (stop())
return;
checkCancelled();
boolean reserved = false;
GridDhtPartitionState partState = locPart.state();
if (partState != EVICTED)
reserved = (partState == OWNING || partState == MOVING || partState == LOST) && locPart.reserve();
if (!reserved)
return;
try {
GridCursor<? extends CacheDataRow> cursor = locPart.dataStore().cursor(cctx.cacheId(), null, null, KEY_ONLY);
boolean locked = false;
try {
int cntr = 0;
while (!stop() && cursor.next()) {
KeyCacheObject key = cursor.get().key();
if (!locked) {
cctx.shared().database().checkpointReadLock();
locked = true;
}
processKey(key);
if (++cntr % batchSize == 0) {
cctx.shared().database().checkpointReadUnlock();
locked = false;
}
cctx.cache().metrics0().addIndexRebuildKeyProcessed(1);
if (locPart.state() == RENTING)
break;
}
wrappedClo.addNumberProcessedKeys(cntr);
} finally {
if (locked)
cctx.shared().database().checkpointReadUnlock();
}
} finally {
locPart.release();
if (partsCnt.getAndUpdate(v -> v > 0 ? v - 1 : 0) > 0)
cctx.group().metrics().decrementIndexBuildCountPartitionsLeft();
}
}
use of org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState.OWNING in project ignite by apache.
the class WalScannerTest method shouldFindCorrectRecords.
/**
* @throws Exception If failed.
*/
@Test
public void shouldFindCorrectRecords() throws Exception {
// given: Iterator with random value and value which should be find by scanner.
long expPageId = 984;
int grpId = 123;
PageSnapshot expPageSnapshot = new PageSnapshot(new FullPageId(expPageId, grpId), dummyPage(1024, expPageId), 1024);
CheckpointRecord expCheckpoint = new CheckpointRecord(new WALPointer(5738, 0, 0));
FixCountRecord expDeltaPage = new FixCountRecord(grpId, expPageId, 4);
WALIterator mockedIter = mockWalIterator(new IgniteBiTuple<>(NULL_PTR, expPageSnapshot), new IgniteBiTuple<>(NULL_PTR, new PageSnapshot(new FullPageId(455, grpId), dummyPage(4096, 455), 1024)), new IgniteBiTuple<>(NULL_PTR, expCheckpoint), new IgniteBiTuple<>(NULL_PTR, new MetastoreDataRecord("key", new byte[0])), new IgniteBiTuple<>(NULL_PTR, new PartitionMetaStateRecord(grpId, 1, OWNING, 1)), new IgniteBiTuple<>(NULL_PTR, expDeltaPage), new IgniteBiTuple<>(NULL_PTR, new FixCountRecord(grpId, 98348, 4)));
IgniteWalIteratorFactory mockedFactory = mock(IgniteWalIteratorFactory.class);
when(mockedFactory.iterator(any(IteratorParametersBuilder.class))).thenReturn(mockedIter);
// Test scanner handler for holding found value instead of printing its.
List<WALRecord> holder = new ArrayList<>();
ScannerHandler recordCaptor = (rec) -> holder.add(rec.get2());
Set<T2<Integer, Long>> groupAndPageIds = new HashSet<>();
groupAndPageIds.add(new T2<>(grpId, expPageId));
// when: Scanning WAL for searching expected page.
buildWalScanner(withIteratorParameters(), mockedFactory).findAllRecordsFor(groupAndPageIds).forEach(recordCaptor);
// then: Should be find only expected value.
assertEquals(holder.size(), 3);
assertEquals(expPageSnapshot, holder.get(0));
assertEquals(expCheckpoint, holder.get(1));
assertEquals(expDeltaPage, holder.get(2));
}
Aggregations