use of org.apache.ignite.internal.processors.cache.IgniteRebalanceIterator in project ignite by apache.
the class GridDhtPartitionSupplier method handleDemandMessage.
/**
* For each demand message method lookups (or creates new) supply context and starts to iterate entries across requested partitions.
* Each entry in iterator is placed to prepared supply message.
*
* If supply message size in bytes becomes greater than {@link IgniteConfiguration#getRebalanceBatchSize()}
* method sends this message to demand node and saves partial state of iterated entries to supply context,
* then restores the context again after new demand message with the same context id is arrived.
*
* @param topicId Id of the topic is used for the supply-demand communication.
* @param nodeId Id of the node which sent the demand message.
* @param demandMsg Demand message.
*/
public void handleDemandMessage(int topicId, UUID nodeId, GridDhtPartitionDemandMessage demandMsg) {
assert demandMsg != null;
assert nodeId != null;
T3<UUID, Integer, AffinityTopologyVersion> contextId = new T3<>(nodeId, topicId, demandMsg.topologyVersion());
if (demandMsg.rebalanceId() < 0) {
// Demand node requested context cleanup.
synchronized (scMap) {
SupplyContext sctx = scMap.get(contextId);
if (sctx != null && sctx.rebalanceId == -demandMsg.rebalanceId()) {
clearContext(scMap.remove(contextId), log);
if (log.isDebugEnabled())
log.debug("Supply context cleaned [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", supplyContext=" + sctx + "]");
} else {
if (log.isDebugEnabled())
log.debug("Stale supply context cleanup message [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", supplyContext=" + sctx + "]");
}
return;
}
}
ClusterNode demanderNode = grp.shared().discovery().node(nodeId);
if (demanderNode == null) {
if (log.isDebugEnabled())
log.debug("Demand message rejected (demander left cluster) [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
return;
}
IgniteRebalanceIterator iter = null;
SupplyContext sctx = null;
Set<Integer> remainingParts = null;
GridDhtPartitionSupplyMessage supplyMsg = new GridDhtPartitionSupplyMessage(demandMsg.rebalanceId(), grp.groupId(), demandMsg.topologyVersion(), grp.deploymentEnabled());
try {
synchronized (scMap) {
sctx = scMap.remove(contextId);
if (sctx != null && demandMsg.rebalanceId() < sctx.rebalanceId) {
// Stale message, return context back and return.
scMap.put(contextId, sctx);
if (log.isDebugEnabled())
log.debug("Stale demand message [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", actualContext=" + sctx + "]");
return;
}
}
// Demand request should not contain empty partitions if no supply context is associated with it.
if (sctx == null && (demandMsg.partitions() == null || demandMsg.partitions().isEmpty())) {
if (log.isDebugEnabled())
log.debug("Empty demand message (no context and partitions) [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
return;
}
if (log.isDebugEnabled())
log.debug("Demand message accepted [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
assert !(sctx != null && !demandMsg.partitions().isEmpty());
long maxBatchesCnt = /* Each thread should gain prefetched batches. */
grp.preloader().batchesPrefetchCount() * grp.shared().gridConfig().getRebalanceThreadPoolSize();
if (sctx == null) {
if (log.isDebugEnabled())
log.debug("Starting supplying rebalancing [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", fullPartitions=" + S.compact(demandMsg.partitions().fullSet()) + ", histPartitions=" + S.compact(demandMsg.partitions().historicalSet()) + "]");
} else
maxBatchesCnt = 1;
if (sctx == null || sctx.iterator == null) {
remainingParts = new HashSet<>(demandMsg.partitions().fullSet());
CachePartitionPartialCountersMap histMap = demandMsg.partitions().historicalMap();
for (int i = 0; i < histMap.size(); i++) {
int p = histMap.partitionAt(i);
remainingParts.add(p);
}
iter = grp.offheap().rebalanceIterator(demandMsg.partitions(), demandMsg.topologyVersion());
for (Integer part : demandMsg.partitions().fullSet()) {
if (iter.isPartitionMissing(part))
continue;
GridDhtLocalPartition loc = top.localPartition(part, demandMsg.topologyVersion(), false);
assert loc != null && loc.state() == GridDhtPartitionState.OWNING : "Partition should be in OWNING state: " + loc;
supplyMsg.addEstimatedKeysCount(loc.dataStore().fullSize());
}
for (int i = 0; i < histMap.size(); i++) {
int p = histMap.partitionAt(i);
if (iter.isPartitionMissing(p))
continue;
supplyMsg.addEstimatedKeysCount(histMap.updateCounterAt(i) - histMap.initialUpdateCounterAt(i));
}
} else {
iter = sctx.iterator;
remainingParts = sctx.remainingParts;
}
final int msgMaxSize = grp.preloader().batchSize();
long batchesCnt = 0;
CacheDataRow prevRow = null;
while (iter.hasNext()) {
CacheDataRow row = iter.peek();
// Prevent mvcc entry history splitting into separate batches.
boolean canFlushHistory = !grp.mvccEnabled() || prevRow != null && ((grp.sharedGroup() && row.cacheId() != prevRow.cacheId()) || !row.key().equals(prevRow.key()));
if (canFlushHistory && supplyMsg.messageSize() >= msgMaxSize) {
if (++batchesCnt >= maxBatchesCnt) {
saveSupplyContext(contextId, iter, remainingParts, demandMsg.rebalanceId());
reply(topicId, demanderNode, demandMsg, supplyMsg, contextId);
return;
} else {
if (!reply(topicId, demanderNode, demandMsg, supplyMsg, contextId))
return;
supplyMsg = new GridDhtPartitionSupplyMessage(demandMsg.rebalanceId(), grp.groupId(), demandMsg.topologyVersion(), grp.deploymentEnabled());
}
}
row = iter.next();
prevRow = row;
int part = row.partition();
GridDhtLocalPartition loc = top.localPartition(part, demandMsg.topologyVersion(), false);
assert (loc != null && loc.state() == OWNING && loc.reservations() > 0) || iter.isPartitionMissing(part) : "Partition should be in OWNING state and has at least 1 reservation " + loc;
if (iter.isPartitionMissing(part) && remainingParts.contains(part)) {
supplyMsg.missed(part);
remainingParts.remove(part);
if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_MISSED))
grp.addRebalanceMissEvent(part);
if (log.isDebugEnabled())
log.debug("Requested partition is marked as missing [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", p=" + part + "]");
continue;
}
if (!remainingParts.contains(part))
continue;
GridCacheEntryInfo info = extractEntryInfo(row);
if (info == null)
continue;
supplyMsg.addEntry0(part, iter.historical(part), info, grp.shared(), grp.cacheObjectContext());
if (iter.isPartitionDone(part)) {
supplyMsg.last(part, loc.updateCounter());
remainingParts.remove(part);
if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_SUPPLIED))
grp.addRebalanceSupplyEvent(part);
}
}
Iterator<Integer> remainingIter = remainingParts.iterator();
while (remainingIter.hasNext()) {
int p = remainingIter.next();
if (iter.isPartitionDone(p)) {
GridDhtLocalPartition loc = top.localPartition(p, demandMsg.topologyVersion(), false);
assert loc != null : "Supply partition is gone: grp=" + grp.cacheOrGroupName() + ", p=" + p;
supplyMsg.last(p, loc.updateCounter());
remainingIter.remove();
if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_SUPPLIED))
grp.addRebalanceSupplyEvent(p);
} else if (iter.isPartitionMissing(p)) {
supplyMsg.missed(p);
remainingIter.remove();
if (grp.eventRecordable(EVT_CACHE_REBALANCE_PART_MISSED))
grp.addRebalanceMissEvent(p);
}
}
assert remainingParts.isEmpty() : "Partitions after rebalance should be either done or missing: " + remainingParts;
if (sctx != null)
clearContext(sctx, log);
else
iter.close();
reply(topicId, demanderNode, demandMsg, supplyMsg, contextId);
if (log.isInfoEnabled())
log.info("Finished supplying rebalancing [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + "]");
} catch (Throwable t) {
if (iter != null && !iter.isClosed()) {
try {
iter.close();
} catch (IgniteCheckedException e) {
t.addSuppressed(e);
}
}
if (grp.shared().kernalContext().isStopping())
return;
// Sending supply messages with error requires new protocol.
boolean sendErrMsg = demanderNode.version().compareTo(GridDhtPartitionSupplyMessageV2.AVAILABLE_SINCE) >= 0;
if (t instanceof IgniteSpiException) {
if (log.isDebugEnabled())
log.debug("Failed to send message to node (current node is stopping?) [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ", msg=" + t.getMessage() + ']');
sendErrMsg = false;
} else
U.error(log, "Failed to continue supplying [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t);
try {
if (sctx != null)
clearContext(sctx, log);
} catch (Throwable t1) {
U.error(log, "Failed to cleanup supplying context [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t1);
}
if (!sendErrMsg)
return;
boolean fallbackToFullRebalance = X.hasCause(t, IgniteHistoricalIteratorException.class);
try {
GridDhtPartitionSupplyMessage errMsg;
if (fallbackToFullRebalance) {
// Mark the last checkpoint as not applicable for WAL rebalance.
grp.shared().database().lastCheckpointInapplicableForWalRebalance(grp.groupId());
// Mark all remaining partitions as missed to trigger full rebalance.
if (iter == null && F.isEmpty(remainingParts)) {
remainingParts = new HashSet<>(demandMsg.partitions().fullSet());
remainingParts.addAll(demandMsg.partitions().historicalSet());
}
for (int p : Optional.ofNullable(remainingParts).orElseGet(Collections::emptySet)) supplyMsg.missed(p);
errMsg = supplyMsg;
} else {
errMsg = new GridDhtPartitionSupplyMessageV2(demandMsg.rebalanceId(), grp.groupId(), demandMsg.topologyVersion(), grp.deploymentEnabled(), t);
}
reply(topicId, demanderNode, demandMsg, errMsg, contextId);
} catch (Throwable t1) {
U.error(log, "Failed to send supply error message [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t1);
}
// instead of triggering failure handler.
if (!fallbackToFullRebalance) {
grp.shared().kernalContext().failure().process(new FailureContext(FailureType.CRITICAL_ERROR, new IgniteCheckedException("Failed to continue supplying [" + supplyRoutineInfo(topicId, nodeId, demandMsg) + ']', t)));
}
}
}
use of org.apache.ignite.internal.processors.cache.IgniteRebalanceIterator in project ignite by apache.
the class WalRecoveryTxLogicalRecordsTest method testHistoricalRebalanceIterator.
/**
* @throws Exception if failed.
*/
@Test
public void testHistoricalRebalanceIterator() throws Exception {
System.setProperty(IgniteSystemProperties.IGNITE_PDS_WAL_REBALANCE_THRESHOLD, "0");
extraCcfg = new CacheConfiguration(CACHE_NAME + "2");
extraCcfg.setAffinity(new RendezvousAffinityFunction(false, PARTS));
Ignite ignite = startGrid();
try {
ignite.cluster().active(true);
GridCacheDatabaseSharedManager dbMgr = (GridCacheDatabaseSharedManager) ((IgniteEx) ignite).context().cache().context().database();
dbMgr.waitForCheckpoint("test");
// This number depends on wal history size.
int entries = 25;
IgniteCache<Integer, Integer> cache = ignite.cache(CACHE_NAME);
IgniteCache<Integer, Integer> cache2 = ignite.cache(CACHE_NAME + "2");
for (int i = 0; i < entries; i++) {
// Put to partition 0.
cache.put(i * PARTS, i * PARTS);
// Put to partition 1.
cache.put(i * PARTS + 1, i * PARTS + 1);
// Put to another cache.
cache2.put(i, i);
dbMgr.waitForCheckpoint("test");
}
for (int i = 0; i < entries; i++) {
assertEquals((Integer) (i * PARTS), cache.get(i * PARTS));
assertEquals((Integer) (i * PARTS + 1), cache.get(i * PARTS + 1));
assertEquals((Integer) (i), cache2.get(i));
}
CacheGroupContext grp = ((IgniteEx) ignite).context().cache().cacheGroup(CU.cacheId(CACHE_NAME));
IgniteCacheOffheapManager offh = grp.offheap();
AffinityTopologyVersion topVer = grp.affinity().lastVersion();
IgniteDhtDemandedPartitionsMap map;
for (int i = 0; i < entries; i++) {
map = new IgniteDhtDemandedPartitionsMap();
map.addHistorical(0, i, entries, PARTS);
WALPointer ptr = reserveWalPointerForIterator(grp.shared());
try (IgniteRebalanceIterator it = offh.rebalanceIterator(map, topVer)) {
assertNotNull(it);
assertTrue("Not historical for iteration: " + i, it.historical(0));
for (int j = i; j < entries; j++) {
assertTrue("i=" + i + ", j=" + j, it.hasNextX());
CacheDataRow row = it.next();
assertEquals(j * PARTS, (int) row.key().value(grp.cacheObjectContext(), false));
assertEquals(j * PARTS, (int) row.value().value(grp.cacheObjectContext(), false));
}
assertFalse(it.hasNext());
} finally {
releaseWalPointerForIterator(grp.shared(), ptr);
}
map = new IgniteDhtDemandedPartitionsMap();
map.addHistorical(1, i, entries, PARTS);
ptr = reserveWalPointerForIterator(grp.shared());
try (IgniteRebalanceIterator it = offh.rebalanceIterator(map, topVer)) {
assertNotNull(it);
assertTrue("Not historical for iteration: " + i, it.historical(1));
for (int j = i; j < entries; j++) {
assertTrue(it.hasNextX());
CacheDataRow row = it.next();
assertEquals(j * PARTS + 1, (int) row.key().value(grp.cacheObjectContext(), false));
assertEquals(j * PARTS + 1, (int) row.value().value(grp.cacheObjectContext(), false));
}
assertFalse(it.hasNext());
} finally {
releaseWalPointerForIterator(grp.shared(), ptr);
}
}
stopAllGrids();
// Check that iterator is valid after restart.
ignite = startGrid();
ignite.cluster().active(true);
grp = ((IgniteEx) ignite).context().cache().cacheGroup(CU.cacheId(CACHE_NAME));
offh = grp.offheap();
topVer = grp.affinity().lastVersion();
for (int i = 0; i < entries; i++) {
long start = System.currentTimeMillis();
map = new IgniteDhtDemandedPartitionsMap();
map.addHistorical(0, i, entries, PARTS);
WALPointer ptr = reserveWalPointerForIterator(grp.shared());
try (IgniteRebalanceIterator it = offh.rebalanceIterator(map, topVer)) {
long end = System.currentTimeMillis();
info("Time to get iterator: " + (end - start));
assertTrue("Not historical for iteration: " + i, it.historical(0));
assertNotNull(it);
start = System.currentTimeMillis();
for (int j = i; j < entries; j++) {
assertTrue("i=" + i + ", j=" + j, it.hasNextX());
CacheDataRow row = it.next();
assertEquals(j * PARTS, (int) row.key().value(grp.cacheObjectContext(), false));
assertEquals(j * PARTS, (int) row.value().value(grp.cacheObjectContext(), false));
}
end = System.currentTimeMillis();
info("Time to iterate: " + (end - start));
assertFalse(it.hasNext());
} finally {
releaseWalPointerForIterator(grp.shared(), ptr);
}
map = new IgniteDhtDemandedPartitionsMap();
map.addHistorical(1, i, entries, PARTS);
ptr = reserveWalPointerForIterator(grp.shared());
try (IgniteRebalanceIterator it = offh.rebalanceIterator(map, topVer)) {
assertNotNull(it);
assertTrue("Not historical for iteration: " + i, it.historical(1));
for (int j = i; j < entries; j++) {
assertTrue(it.hasNextX());
CacheDataRow row = it.next();
assertEquals(j * PARTS + 1, (int) row.key().value(grp.cacheObjectContext(), false));
assertEquals(j * PARTS + 1, (int) row.value().value(grp.cacheObjectContext(), false));
}
assertFalse(it.hasNext());
} finally {
releaseWalPointerForIterator(grp.shared(), ptr);
}
}
} finally {
stopAllGrids();
System.clearProperty(IgniteSystemProperties.IGNITE_PDS_WAL_REBALANCE_THRESHOLD);
}
}
use of org.apache.ignite.internal.processors.cache.IgniteRebalanceIterator in project ignite by apache.
the class WalRecoveryTxLogicalRecordsTest method rows.
/**
* @param ignite Ignite.
* @param part Partition.
* @param from From counter.
* @param to To counter.
*/
private List<CacheDataRow> rows(Ignite ignite, int part, long from, long to) throws IgniteCheckedException {
CacheGroupContext grp = ((IgniteEx) ignite).context().cache().cacheGroup(CU.cacheId(CACHE_NAME));
IgniteCacheOffheapManager offh = grp.offheap();
AffinityTopologyVersion topVer = grp.affinity().lastVersion();
IgniteDhtDemandedPartitionsMap map = new IgniteDhtDemandedPartitionsMap();
map.addHistorical(part, from, to, PARTS);
List<CacheDataRow> rows = new ArrayList<>();
WALPointer ptr = reserveWalPointerForIterator(grp.shared());
try (IgniteRebalanceIterator it = offh.rebalanceIterator(map, topVer)) {
assertNotNull(it);
while (it.hasNextX()) rows.add(it.next());
} finally {
releaseWalPointerForIterator(grp.shared(), ptr);
}
return rows;
}
Aggregations