use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class TxCrossCacheMapOnInvalidTopologyTest method doTestCrossCacheTxMapOnInvalidTopology.
/**
* Test scenario: cross-cache tx is started when node is left in the middle of rebalance, first cache is rebalanced
* and second is partially rebalanced.
*
* First cache map request will trigger client compatible remap for pessimistic txs,
* second cache map request should use new topology version.
*
* For optimistic tx remap is enforced if more than one mapping in transaction or all enlisted caches have compatible
* assignments.
*
* Success: tx is finished on ideal topology version over all mapped nodes.
*
* @param concurrency Concurrency.
* @param isolation Isolation.
*/
private void doTestCrossCacheTxMapOnInvalidTopology(TransactionConcurrency concurrency, TransactionIsolation isolation) throws Exception {
try {
IgniteEx crd = startGrid(0);
IgniteEx g1 = startGrid(1);
awaitPartitionMapExchange();
IgniteEx client = startClientGrid("client");
assertNotNull(client.cache(CACHE1));
assertNotNull(client.cache(CACHE2));
try (IgniteDataStreamer<Object, Object> streamer = crd.dataStreamer(CACHE1)) {
// Put 500 keys per partition.
for (int k = 0; k < PARTS_CNT * 500; k++) streamer.addData(k, new byte[10]);
}
try (IgniteDataStreamer<Object, Object> streamer = crd.dataStreamer(CACHE2)) {
// Put 500 keys per partition.
for (int k = 0; k < PARTS_CNT * 500; k++) streamer.addData(k, new byte[10]);
}
TestRecordingCommunicationSpi crdSpi = TestRecordingCommunicationSpi.spi(crd);
final AffinityTopologyVersion joinVer = new AffinityTopologyVersion(4, 0);
AffinityTopologyVersion leftVer = new AffinityTopologyVersion(5, 0);
AffinityTopologyVersion idealVer = new AffinityTopologyVersion(5, 1);
AtomicReference<Set<Integer>> full = new AtomicReference<>();
GridConcurrentSkipListSet<Integer> leftVerParts = new GridConcurrentSkipListSet<>();
crdSpi.blockMessages((node, m) -> {
if (m instanceof GridDhtPartitionSupplyMessage) {
GridDhtPartitionSupplyMessage msg = (GridDhtPartitionSupplyMessage) m;
// Allow full rebalance for cache 1 and system cache.
if (msg.groupId() != CU.cacheId(CACHE2))
return false;
// Allow only first batch for cache 2.
if (msg.topologyVersion().equals(joinVer)) {
if (full.get() == null) {
Map<Integer, Long> last = U.field(msg, "last");
full.set(last.keySet());
return false;
}
return true;
}
if (msg.topologyVersion().equals(leftVer)) {
Map<Integer, Long> last = U.field(msg, "last");
leftVerParts.addAll(last.keySet());
return true;
}
} else if (m instanceof GridDhtPartitionsFullMessage) {
GridDhtPartitionsFullMessage msg = (GridDhtPartitionsFullMessage) m;
// Delay full message for ideal topology switch.
GridDhtPartitionExchangeId exchId = msg.exchangeId();
if (exchId != null && exchId.topologyVersion().equals(idealVer))
return true;
}
return false;
});
TestRecordingCommunicationSpi g1Spi = TestRecordingCommunicationSpi.spi(g1);
g1Spi.blockMessages((node, msg) -> {
if (msg instanceof GridDhtPartitionSupplyMessage) {
GridDhtPartitionSupplyMessage m = (GridDhtPartitionSupplyMessage) msg;
return m.groupId() == CU.cacheId(CACHE2);
}
return false;
});
startGrid(2);
crdSpi.waitForBlocked();
g1Spi.waitForBlocked();
// Wait partial owning.
assertTrue("Timed out while waiting for rebalance", GridTestUtils.waitForCondition(() -> {
// Await full rebalance for cache 2.
GridDhtPartitionTopology top0 = grid(2).cachex(CACHE1).context().topology();
for (int p = 0; p < PARTS_CNT; p++) {
if (top0.localPartition(p).state() != OWNING)
return false;
}
// Await partial rebalance for cache 1.
GridDhtPartitionTopology top1 = grid(2).cachex(CACHE2).context().topology();
for (Integer part : full.get()) {
if (top1.localPartition(part).state() != OWNING)
return false;
}
return true;
}, 10_000));
// At this point cache 1 is fully rebalanced and cache 2 is partially rebalanced.
// Stop supplier in the middle of rebalance.
g1.close();
// Wait for topologies and calculate required partitions.
grid(0).cachex(CACHE1).context().affinity().affinityReadyFuture(leftVer).get();
grid(2).cachex(CACHE1).context().affinity().affinityReadyFuture(leftVer).get();
grid(0).cachex(CACHE2).context().affinity().affinityReadyFuture(leftVer).get();
grid(2).cachex(CACHE2).context().affinity().affinityReadyFuture(leftVer).get();
AffinityAssignment assignment0 = grid(0).cachex(CACHE1).context().affinity().assignment(leftVer);
AffinityAssignment assignment = grid(0).cachex(CACHE2).context().affinity().assignment(leftVer);
// Search for a partition with incompatible assignment.
// Partition for cache1 which is mapped for both late and ideal topologies to the same primary.
int stablePart = -1;
// Partition for cache2 which is mapped for both late and ideal topologies on different primaries.
int movingPart = -1;
for (int p = 0; p < assignment0.assignment().size(); p++) {
List<ClusterNode> curr = assignment.assignment().get(p);
List<ClusterNode> ideal = assignment.idealAssignment().get(p);
if (curr.equals(ideal) && curr.get(0).order() == 1) {
stablePart = p;
break;
}
}
assertFalse(stablePart == -1);
for (int p = 0; p < assignment.assignment().size(); p++) {
List<ClusterNode> curr = assignment.assignment().get(p);
List<ClusterNode> ideal = assignment.idealAssignment().get(p);
if (!curr.equals(ideal) && curr.get(0).order() == 1) {
movingPart = p;
break;
}
}
assertFalse(movingPart == -1);
TestRecordingCommunicationSpi.spi(client).blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {
@Override
public boolean apply(ClusterNode node, Message msg) {
if (concurrency == PESSIMISTIC)
return msg instanceof GridNearLockRequest;
else
return msg instanceof GridNearTxPrepareRequest;
}
});
final int finalStablePart = stablePart;
final int finalMovingPart = movingPart;
IgniteInternalFuture<?> txFut = multithreadedAsync(() -> {
try (Transaction tx = client.transactions().txStart(concurrency, isolation)) {
// Will map on crd(order=1).
client.cache(CACHE1).put(finalStablePart, 0);
// Next request will remap to ideal topology, but it's not ready on other node except crd.
client.cache(CACHE2).put(finalMovingPart, 0);
tx.commit();
}
}, 1, "tx-thread");
// Wait until all missing supply messages are blocked.
assertTrue(GridTestUtils.waitForCondition(() -> leftVerParts.size() == PARTS_CNT - full.get().size(), 5_000));
// Delay first lock request on late topology.
TestRecordingCommunicationSpi.spi(client).waitForBlocked();
// At this point only supply messages should be blocked.
// Unblock to continue rebalance and trigger ideal topology switch.
crdSpi.stopBlock(true, null, false, true);
// Wait until ideal topology is ready on crd.
crd.context().cache().context().exchange().affinityReadyFuture(idealVer).get(10_000);
// Other node must wait for full message.
assertFalse(GridTestUtils.waitForCondition(() -> grid(2).context().cache().context().exchange().affinityReadyFuture(idealVer).isDone(), 1_000));
// Map on unstable topology (PME is in progress on other node).
TestRecordingCommunicationSpi.spi(client).stopBlock();
// Capture local transaction.
IgniteInternalTx tx0 = client.context().cache().context().tm().activeTransactions().iterator().next();
// Expected behavior: tx must hang (both pessimistic and optimistic) because topology is not ready.
try {
txFut.get(3_000);
fail("TX must not complete");
} catch (IgniteFutureTimeoutCheckedException e) {
// Expected.
}
crdSpi.stopBlock();
txFut.get();
// Check transaction map version. Should be mapped on ideal topology.
assertEquals(tx0.topologyVersionSnapshot(), idealVer);
awaitPartitionMapExchange();
checkFutures();
} finally {
stopAllGrids();
}
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class IgnitePdsSpuriousRebalancingOnNodeJoinTest method testNoSpuriousRebalancing.
/**
*/
@SuppressWarnings("ConstantConditions")
@Test
public void testNoSpuriousRebalancing() throws Exception {
try {
IgniteEx crd = startGrids(2);
crd.cluster().active(true);
crd.cluster().baselineAutoAdjustEnabled(false);
List<Integer> moving = movingKeysAfterJoin(crd, DEFAULT_CACHE_NAME, 10);
int[] primParts = crd.affinity(DEFAULT_CACHE_NAME).primaryPartitions(crd.localNode());
Arrays.sort(primParts);
// This partition will be new primary on joining node.
int primChangePartId = -1;
for (int id : moving) {
if (Arrays.binarySearch(primParts, id) >= 0) {
primChangePartId = id;
break;
}
}
assertTrue(primChangePartId != -1);
startGrid(2);
// Trigger partition movement.
resetBaselineTopology();
awaitPartitionMapExchange();
GridCacheContext<Object, Object> ctx = crd.cachex(DEFAULT_CACHE_NAME).context();
AffinityAssignment a0 = ctx.affinity().assignment(new AffinityTopologyVersion(3, 1));
List<ClusterNode> nodes = a0.get(primChangePartId);
assertEquals(3, nodes.size());
assertEquals(crd.configuration().getConsistentId(), nodes.get(0).consistentId());
awaitPartitionMapExchange();
for (int k = 0; k < PARTS * 2; k++) crd.cache(DEFAULT_CACHE_NAME).put(k, k);
forceCheckpoint();
stopGrid(2);
// Forge the counter on coordinator for switching partition.
GridDhtLocalPartition part = ctx.topology().localPartition(primChangePartId);
assertNotNull(part);
PartitionUpdateCounter cntr0 = part.dataStore().partUpdateCounter();
assertTrue(cntr0 instanceof PartitionUpdateCounterErrorWrapper);
PartitionUpdateCounterTrackingImpl delegate = U.field(cntr0, "delegate");
AtomicLong cntr = U.field(delegate, "cntr");
cntr.set(cntr.get() - 1);
TestRecordingCommunicationSpi.spi(crd).record((node, msg) -> msg instanceof GridDhtPartitionDemandMessage);
startGrid(2);
awaitPartitionMapExchange();
// Expecting no rebalancing.
List<Object> msgs = TestRecordingCommunicationSpi.spi(crd).recordedMessages(true);
assertTrue("Rebalancing is not expected " + msgs, msgs.isEmpty());
} finally {
stopAllGrids();
}
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class CachePartitionStateTest method partitionState1.
/**
* @param backups Number of backups.
* @param crdAffNode If {@code false} cache is not created on coordinator.
* @throws Exception If failed.
*/
private void partitionState1(int backups, boolean crdAffNode) throws Exception {
startGrids(3);
awaitPartitionMapExchange();
blockSupplySend(DEFAULT_CACHE_NAME);
CacheConfiguration ccfg = cacheConfiguration(DEFAULT_CACHE_NAME, backups);
if (!crdAffNode)
ccfg.setNodeFilter(new TestCacheNodeExcludingFilter(getTestIgniteInstanceName(0)));
ignite(1).createCache(ccfg);
AffinityAssignment assign0 = grid(1).context().cache().internalCache(DEFAULT_CACHE_NAME).context().affinity().assignment(new AffinityTopologyVersion(3, 2));
awaitPartitionMapExchange();
checkPartitionsState(assign0, DEFAULT_CACHE_NAME, OWNING);
checkRebalance(DEFAULT_CACHE_NAME, true);
Ignite clientNode = startClientGrid(4);
checkPartitionsState(assign0, DEFAULT_CACHE_NAME, OWNING);
clientNode.cache(DEFAULT_CACHE_NAME);
checkPartitionsState(assign0, DEFAULT_CACHE_NAME, OWNING);
checkRebalance(DEFAULT_CACHE_NAME, true);
startGrid(5);
checkRebalance(DEFAULT_CACHE_NAME, false);
for (int i = 0; i < 3; i++) checkNodePartitions(assign0, ignite(i).cluster().localNode(), DEFAULT_CACHE_NAME, OWNING);
AffinityAssignment assign1 = grid(1).context().cache().internalCache(DEFAULT_CACHE_NAME).context().affinity().assignment(new AffinityTopologyVersion(5, 0));
checkNodePartitions(assign1, ignite(5).cluster().localNode(), DEFAULT_CACHE_NAME, MOVING);
stopBlock();
awaitPartitionMapExchange();
AffinityAssignment assign2 = grid(1).context().cache().internalCache(DEFAULT_CACHE_NAME).context().affinity().assignment(new AffinityTopologyVersion(5, 1));
awaitPartitionMapExchange(true, true, null, false);
checkPartitionsState(assign2, DEFAULT_CACHE_NAME, OWNING);
checkRebalance(DEFAULT_CACHE_NAME, true);
if (!crdAffNode)
ignite(0).cache(DEFAULT_CACHE_NAME);
checkPartitionsState(assign2, DEFAULT_CACHE_NAME, OWNING);
checkRebalance(DEFAULT_CACHE_NAME, true);
startGrid(6);
awaitPartitionMapExchange();
AffinityAssignment assign3 = grid(1).context().cache().internalCache(DEFAULT_CACHE_NAME).context().affinity().assignment(new AffinityTopologyVersion(6, 1));
awaitPartitionMapExchange(true, true, null, false);
checkPartitionsState(assign3, DEFAULT_CACHE_NAME, OWNING);
checkRebalance(DEFAULT_CACHE_NAME, true);
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class GridDhtAtomicCache method updatePartialBatch.
/**
* @param hasNear {@code True} if originating node has near cache.
* @param firstEntryIdx Index of the first entry in the request keys collection.
* @param entries Entries to update.
* @param ver Version to set.
* @param nearNode Originating node.
* @param writeVals Write values.
* @param putMap Values to put.
* @param rmvKeys Keys to remove.
* @param entryProcessorMap Entry processors.
* @param dhtFut DHT update future if has backups.
* @param req Request.
* @param res Response.
* @param replicate Whether replication is enabled.
* @param batchRes Batch update result.
* @param taskName Task name.
* @param expiry Expiry policy.
* @param sndPrevVal If {@code true} sends previous value to backups.
* @return Deleted entries.
*/
@SuppressWarnings("ForLoopReplaceableByForEach")
@Nullable
private GridDhtAtomicAbstractUpdateFuture updatePartialBatch(final boolean hasNear, final int firstEntryIdx, final List<GridDhtCacheEntry> entries, final GridCacheVersion ver, final ClusterNode nearNode, @Nullable final List<CacheObject> writeVals, @Nullable final Map<KeyCacheObject, CacheObject> putMap, @Nullable final Collection<KeyCacheObject> rmvKeys, @Nullable final Map<KeyCacheObject, EntryProcessor<Object, Object, Object>> entryProcessorMap, @Nullable GridDhtAtomicAbstractUpdateFuture dhtFut, final GridNearAtomicAbstractUpdateRequest req, final GridNearAtomicUpdateResponse res, final boolean replicate, final UpdateBatchResult batchRes, final String taskName, @Nullable final IgniteCacheExpiryPolicy expiry, final boolean sndPrevVal) {
assert putMap == null ^ rmvKeys == null;
assert req.conflictVersions() == null : "Cannot be called when there are conflict entries in the batch.";
AffinityTopologyVersion topVer = req.topologyVersion();
boolean checkReaders = hasNear || ctx.discovery().hasNearCache(ctx.cacheId(), topVer);
CacheStorePartialUpdateException storeErr = null;
try {
GridCacheOperation op;
if (putMap != null) {
try {
Map<? extends KeyCacheObject, IgniteBiTuple<? extends CacheObject, GridCacheVersion>> view = F.viewReadOnly(putMap, new C1<CacheObject, IgniteBiTuple<? extends CacheObject, GridCacheVersion>>() {
@Override
public IgniteBiTuple<? extends CacheObject, GridCacheVersion> apply(CacheObject val) {
return F.t(val, ver);
}
});
ctx.store().putAll(null, view);
} catch (CacheStorePartialUpdateException e) {
storeErr = e;
}
op = UPDATE;
} else {
try {
ctx.store().removeAll(null, rmvKeys);
} catch (CacheStorePartialUpdateException e) {
storeErr = e;
}
op = DELETE;
}
boolean intercept = ctx.config().getInterceptor() != null;
AffinityAssignment affAssignment = ctx.affinity().assignment(topVer);
// Avoid iterator creation.
for (int i = 0; i < entries.size(); i++) {
GridDhtCacheEntry entry = entries.get(i);
assert Thread.holdsLock(entry);
if (entry.obsolete()) {
assert req.operation() == DELETE : "Entry can become obsolete only after remove: " + entry;
continue;
}
if (storeErr != null && storeErr.failedKeys().contains(entry.key().value(ctx.cacheObjectContext(), false)))
continue;
try {
// We are holding java-level locks on entries at this point.
CacheObject writeVal = op == UPDATE ? writeVals.get(i) : null;
assert writeVal != null || op == DELETE : "null write value found.";
Collection<UUID> readers = null;
Collection<UUID> filteredReaders = null;
if (checkReaders) {
readers = entry.readers();
filteredReaders = F.view(entry.readers(), F.notEqualTo(nearNode.id()));
}
GridCacheUpdateAtomicResult updRes = entry.innerUpdate(ver, nearNode.id(), locNodeId, op, writeVal, null, /*write-through*/
false, /*read-through*/
false, /*retval*/
sndPrevVal, req.keepBinary(), expiry, /*event*/
true, /*metrics*/
true, /*primary*/
true, /*verCheck*/
false, topVer, null, replicate ? DR_PRIMARY : DR_NONE, CU.TTL_NOT_CHANGED, CU.EXPIRE_TIME_CALCULATE, null, /*conflict resolve*/
false, /*intercept*/
false, req.subjectId(), taskName, null, null, dhtFut);
assert !updRes.success() || updRes.newTtl() == CU.TTL_NOT_CHANGED || expiry != null : "success=" + updRes.success() + ", newTtl=" + updRes.newTtl() + ", expiry=" + expiry;
if (intercept) {
if (op == UPDATE) {
ctx.config().getInterceptor().onAfterPut(new CacheLazyEntry(ctx, entry.key(), updRes.newValue(), req.keepBinary()));
} else {
assert op == DELETE : op;
// Old value should be already loaded for 'CacheInterceptor.onBeforeRemove'.
ctx.config().getInterceptor().onAfterRemove(new CacheLazyEntry(ctx, entry.key(), updRes.oldValue(), req.keepBinary()));
}
}
batchRes.addDeleted(entry, updRes, entries);
if (dhtFut != null) {
EntryProcessor<Object, Object, Object> entryProcessor = entryProcessorMap == null ? null : entryProcessorMap.get(entry.key());
dhtFut.addWriteEntry(affAssignment, entry, writeVal, entryProcessor, updRes.newTtl(), CU.EXPIRE_TIME_CALCULATE, null, sndPrevVal, updRes.oldValue(), updRes.updateCounter());
if (!F.isEmpty(filteredReaders))
dhtFut.addNearWriteEntries(filteredReaders, entry, writeVal, entryProcessor, updRes.newTtl(), CU.EXPIRE_TIME_CALCULATE);
}
if (hasNear) {
if (!ctx.affinity().partitionBelongs(nearNode, entry.partition(), topVer)) {
int idx = firstEntryIdx + i;
if (req.operation() == TRANSFORM) {
res.addNearValue(idx, writeVal, updRes.newTtl(), CU.EXPIRE_TIME_CALCULATE);
} else
res.addNearTtl(idx, updRes.newTtl(), CU.EXPIRE_TIME_CALCULATE);
if (writeVal != null || entry.hasValue()) {
IgniteInternalFuture<Boolean> f = entry.addReader(nearNode.id(), req.messageId(), topVer);
assert f == null : f;
}
} else if (// Reader became primary or backup.
readers.contains(nearNode.id()))
entry.removeReader(nearNode.id(), req.messageId());
else
res.addSkippedIndex(firstEntryIdx + i);
}
} catch (GridCacheEntryRemovedException e) {
assert false : "Entry cannot become obsolete while holding lock.";
e.printStackTrace();
}
}
} catch (IgniteCheckedException e) {
res.addFailedKeys(putMap != null ? putMap.keySet() : rmvKeys, e);
}
if (storeErr != null) {
ArrayList<KeyCacheObject> failed = new ArrayList<>(storeErr.failedKeys().size());
for (Object failedKey : storeErr.failedKeys()) failed.add(ctx.toCacheKeyObject(failedKey));
res.addFailedKeys(failed, storeErr.getCause());
}
return dhtFut;
}
use of org.apache.ignite.internal.processors.affinity.AffinityAssignment in project ignite by apache.
the class CacheGroupMetricsMXBeanImpl method getAffinityPartitionsAssignmentMap.
/**
* {@inheritDoc}
*/
@Override
public Map<Integer, List<String>> getAffinityPartitionsAssignmentMap() {
AffinityAssignment assignment = ctx.affinity().cachedAffinity(AffinityTopologyVersion.NONE);
int part = 0;
Map<Integer, List<String>> assignmentMap = new LinkedHashMap<>();
for (List<ClusterNode> partAssignment : assignment.assignment()) {
List<String> partNodeIds = new ArrayList<>(partAssignment.size());
for (ClusterNode node : partAssignment) partNodeIds.add(node.id().toString());
assignmentMap.put(part, partNodeIds);
part++;
}
return assignmentMap;
}
Aggregations