use of org.apache.ignite.testframework.junits.WithSystemProperty in project ignite by apache.
the class FullHistRebalanceOnClientStopTest method testFullRebalanceNotTriggeredWhenClientNodeStopsDuringPme.
/**
* @throws Exception if failed.
*/
@Test
@WithSystemProperty(key = IGNITE_PDS_WAL_REBALANCE_THRESHOLD, value = "0")
public void testFullRebalanceNotTriggeredWhenClientNodeStopsDuringPme() throws Exception {
startGrids(2);
IgniteEx ig0 = grid(0);
ig0.cluster().active(true);
IgniteCache<Object, Object> cache = ig0.cache(CACHE_NAME);
startClientGrid(5);
final int entryCnt = PARTS_CNT * 1000;
final int preloadEntryCnt = PARTS_CNT * 1001;
for (int i = 0; i < preloadEntryCnt; i++) cache.put(i, i);
forceCheckpoint();
stopGrid(1);
for (int i = 0; i < entryCnt; i++) cache.put(i, i + 100);
forceCheckpoint();
final CountDownLatch exchangeLatch = new CountDownLatch(1);
final CountDownLatch hangingPmeStartedLatch = new CountDownLatch(1);
ig0.context().cache().context().exchange().registerExchangeAwareComponent(new PartitionsExchangeAware() {
@Override
public void onInitAfterTopologyLock(GridDhtPartitionsExchangeFuture fut) {
try {
hangingPmeStartedLatch.countDown();
exchangeLatch.await();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
});
IgniteInternalFuture fut = GridTestUtils.runAsync(() -> {
try {
startGrid(1);
awaitPartitionMapExchange();
} catch (Exception e) {
e.printStackTrace();
}
});
IgniteInternalFuture clientStopFut = GridTestUtils.runAsync(() -> {
try {
hangingPmeStartedLatch.await();
stopGrid(5);
} catch (Exception e) {
e.printStackTrace();
}
}, "client-starter");
assertFalse(fut.isDone());
exchangeLatch.countDown();
clientStopFut.get();
fut.get();
awaitPartitionMapExchange();
boolean histRebalanceInvoked = RebalanceCheckingCommunicationSpi.histRebalances();
boolean fullRebalanceInvoked = RebalanceCheckingCommunicationSpi.fullRebalances();
RebalanceCheckingCommunicationSpi.cleanup();
assertTrue("Historical rebalance hasn't been invoked.", histRebalanceInvoked);
assertFalse("Full rebalance has been invoked.", fullRebalanceInvoked);
}
use of org.apache.ignite.testframework.junits.WithSystemProperty in project ignite by apache.
the class DeadLockOnNodeLeftExchangeTest method test.
/**
*/
@Test
@WithSystemProperty(key = ExchangeContext.IGNITE_EXCHANGE_COMPATIBILITY_VER_1, value = "true")
public void test() throws Exception {
startGrids(4);
IgniteClusterEx cluster = grid(0).cluster();
cluster.baselineAutoAdjustEnabled(false);
cluster.active(true);
TestRecordingCommunicationSpi spi = TestRecordingCommunicationSpi.spi(grid(3));
spi.blockMessages(GridDhtPartitionsSingleMessage.class, getTestIgniteInstanceName(0));
stopGrid(1);
spi.waitForBlocked();
IgniteInternalFuture setBaselineTopFut = GridTestUtils.runAsync(() -> {
cluster.setBaselineTopology(cluster.topologyVersion());
});
assertTrue(GridTestUtils.waitForCondition(() -> grid(0).context().state().clusterState().transition(), 5_000L));
stopGrid(2);
spi.stopBlock();
setBaselineTopFut.get(30_000L);
}
use of org.apache.ignite.testframework.junits.WithSystemProperty in project ignite by apache.
the class PartitionsExchangeCoordinatorFailoverTest method testChangeCoordinatorToLocallyJoiningNode.
/**
* Test checks that changing coordinator to a node that joining to cluster at the moment works correctly
* in case of exchanges merge and completed exchange on other joining nodes.
*/
@Test
@WithSystemProperty(key = IgniteSystemProperties.IGNITE_SKIP_CONFIGURATION_CONSISTENCY_CHECK, value = "true")
public void testChangeCoordinatorToLocallyJoiningNode() throws Exception {
newCaches = false;
spiFactory = TestRecordingCommunicationSpi::new;
IgniteEx crd = startGrid(CRD_NONE);
final int newCrdNodeIdx = 1;
// A full message shouldn't be send to new coordinator.
blockSendingFullMessage(crd, node -> node.consistentId().equals(getTestIgniteInstanceName(newCrdNodeIdx)));
CountDownLatch joiningNodeSentSingleMsg = new CountDownLatch(1);
// For next joining node delay sending single message to emulate exchanges merge.
spiFactory = () -> new DynamicDelayingCommunicationSpi(msg -> {
final int delay = 5_000;
if (msg instanceof GridDhtPartitionsSingleMessage) {
GridDhtPartitionsSingleMessage singleMsg = (GridDhtPartitionsSingleMessage) msg;
if (singleMsg.exchangeId() != null) {
joiningNodeSentSingleMsg.countDown();
return delay;
}
}
return 0;
});
IgniteInternalFuture<?> newCrdJoinFut = GridTestUtils.runAsync(() -> startGrid(newCrdNodeIdx));
// Wait till new coordinator node sent single message.
joiningNodeSentSingleMsg.await();
spiFactory = TcpCommunicationSpi::new;
// Additionally start 2 new nodes. Their exchange should be merged with exchange on join new coordinator node.
startGridsMultiThreaded(2, 2);
Assert.assertFalse("New coordinator join shouldn't be happened before stopping old coordinator.", newCrdJoinFut.isDone());
// Stop coordinator.
stopGrid(CRD_NONE);
// New coordinator join process should succeed after that.
newCrdJoinFut.get();
awaitPartitionMapExchange();
// Check that affinity are equal on all nodes.
AffinityTopologyVersion affVer = ((IgniteEx) ignite(1)).cachex(CACHE_NAME).context().shared().exchange().readyAffinityVersion();
List<List<ClusterNode>> expAssignment = null;
IgniteEx expAssignmentNode = null;
for (Ignite node : G.allGrids()) {
IgniteEx nodeEx = (IgniteEx) node;
List<List<ClusterNode>> assignment = nodeEx.cachex(CACHE_NAME).context().affinity().assignments(affVer);
if (expAssignment == null) {
expAssignment = assignment;
expAssignmentNode = nodeEx;
} else
Assert.assertEquals("Affinity assignments are different " + "[expectedNode=" + expAssignmentNode + ", actualNode=" + nodeEx + "]", expAssignment, assignment);
}
}
use of org.apache.ignite.testframework.junits.WithSystemProperty in project ignite by apache.
the class IgniteWalRebalanceTest method testMultipleNodesFailHistoricalRebalance.
/**
* Tests that demander switches to full rebalance if the previously chosen two of three of suppliers
* for a group have failed to perform historical rebalance due to an unexpected error.
*
* @throws Exception If failed
*/
@Test
@WithSystemProperty(key = "IGNITE_DISABLE_WAL_DURING_REBALANCING", value = "true")
public void testMultipleNodesFailHistoricalRebalance() throws Exception {
backups = 1;
int node_cnt = 4;
int demanderId = node_cnt - 1;
// Start a new cluster with 3 suppliers.
startGrids(node_cnt - 1);
// Start demander node.
userAttrs.put("TEST_ATTR", "TEST_ATTR");
startGrid(node_cnt - 1);
grid(0).cluster().state(ACTIVE);
// Create a new cache that places a full set of partitions on demander node.
RendezvousAffinityFunction aff = new RendezvousAffinityFunction(false, PARTS_CNT);
aff.setAffinityBackupFilter(new ClusterNodeAttributeAffinityBackupFilter("TEST_ATTR"));
String cacheName = "test-cache-1";
IgniteCache<Integer, IndexedObject> cache0 = grid(0).getOrCreateCache(new CacheConfiguration<Integer, IndexedObject>(cacheName).setBackups(backups).setAffinity(aff).setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC));
// Fill initial data and force checkpoint.
final int entryCnt = PARTS_CNT * 200;
final int preloadEntryCnt = PARTS_CNT * 201;
int val = 0;
for (int k = 0; k < preloadEntryCnt; k++) cache0.put(k, new IndexedObject(val++));
forceCheckpoint();
// Stop demander node.
stopGrid(demanderId);
// Rewrite data to trigger further rebalance.
for (int k = 0; k < entryCnt; k++) {
// even though the corresponding RebalanceFuture will be cancelled.
if (grid(0).affinity(cacheName).partition(k) != 12)
cache0.put(k, new IndexedObject(val++));
}
// Upload additional data to a particular partition (primary partition belongs to coordinator, for instance)
// in order to trigger full rebalance for that partition instead of historical one.
int[] primaries0 = grid(0).affinity(cacheName).primaryPartitions(grid(0).localNode());
for (int i = 0; i < preloadEntryCnt; ++i) cache0.put(primaries0[0], new IndexedObject(val++));
forceCheckpoint();
// Delay rebalance process for specified group.
blockMsgPred = (node, msg) -> {
if (msg instanceof GridDhtPartitionDemandMessage) {
GridDhtPartitionDemandMessage msg0 = (GridDhtPartitionDemandMessage) msg;
return msg0.groupId() == CU.cacheId(cacheName);
}
return false;
};
Queue<RecordedDemandMessage> recorderedMsgs = new ConcurrentLinkedQueue<>();
// Record demand messages for specified group.
recordMsgPred = (node, msg) -> {
if (msg instanceof GridDhtPartitionDemandMessage) {
GridDhtPartitionDemandMessage msg0 = (GridDhtPartitionDemandMessage) msg;
if (msg0.groupId() == CU.cacheId(cacheName)) {
recorderedMsgs.add(new RecordedDemandMessage(node.id(), msg0.groupId(), msg0.partitions().hasFull(), msg0.partitions().hasHistorical()));
}
}
return false;
};
// Corrupt WAL on suppliers, except the one.
injectFailingIOFactory(grid(0));
injectFailingIOFactory(grid(1));
// Trigger rebalance process from suppliers.
IgniteEx restartedDemander = startGrid(node_cnt - 1);
TestRecordingCommunicationSpi demanderSpi = TestRecordingCommunicationSpi.spi(restartedDemander);
// Wait until demander starts historical rebalancning.
demanderSpi.waitForBlocked();
final IgniteInternalFuture<Boolean> preloadFut = restartedDemander.cachex(cacheName).context().group().preloader().rebalanceFuture();
// Unblock messages and start tracking demand and supply messages.
demanderSpi.stopBlock();
// Wait until rebalancing will be cancelled for both suppliers.
assertTrue("Rebalance future was not cancelled [fut=" + preloadFut + ']', GridTestUtils.waitForCondition(preloadFut::isDone, getTestTimeout()));
Assert.assertEquals("Rebalance should be cancelled on demander node: " + preloadFut, false, preloadFut.get());
awaitPartitionMapExchange(true, true, null);
// Check data consistency.
assertPartitionsSame(idleVerify(restartedDemander, cacheName));
// Check that historical rebalance switched to full for supplier 1 & 2 and it was historical for supplier3.
IgnitePredicate<RecordedDemandMessage> histPred = msg -> msg.hasHistorical() && !msg.hasFull();
IgnitePredicate<RecordedDemandMessage> fullPred = msg -> !msg.hasHistorical() && msg.hasFull();
IgnitePredicate<RecordedDemandMessage> mixedPred = msg -> msg.hasHistorical() && msg.hasFull();
IgniteBiInClosure<UUID, Boolean> supplierChecker = (supplierId, mixed) -> {
List<RecordedDemandMessage> demandMsgsForSupplier = recorderedMsgs.stream().filter(msg -> msg.supplierId().equals(supplierId)).filter(msg -> msg.groupId() == CU.cacheId(cacheName)).filter(msg -> msg.hasFull() || msg.hasHistorical()).collect(toList());
assertEquals("There should only two demand messages [supplierId=" + supplierId + ']', 2, demandMsgsForSupplier.size());
assertTrue("The first message should require " + (mixed ? "mixed" : "historical") + " rebalance [msg=" + demandMsgsForSupplier.get(0) + ']', (mixed ? mixedPred.apply(demandMsgsForSupplier.get(0)) : histPred.apply(demandMsgsForSupplier.get(0))));
assertTrue("The second message should require full rebalance [msg=" + demandMsgsForSupplier.get(0) + ']', fullPred.apply(demandMsgsForSupplier.get(1)));
};
supplierChecker.apply(grid(0).cluster().localNode().id(), true);
supplierChecker.apply(grid(1).cluster().localNode().id(), false);
// Check supplier3
List<RecordedDemandMessage> demandMsgsForSupplier = recorderedMsgs.stream().filter(msg -> msg.supplierId().equals(grid(2).cluster().localNode().id())).filter(msg -> msg.groupId() == CU.cacheId(cacheName)).filter(msg -> msg.hasFull() || msg.hasHistorical()).collect(toList());
assertEquals("There should only one demand message.", 1, demandMsgsForSupplier.size());
assertTrue("The first message should require historical rebalance [msg=" + demandMsgsForSupplier.get(0) + ']', histPred.apply(demandMsgsForSupplier.get(0)));
}
use of org.apache.ignite.testframework.junits.WithSystemProperty in project ignite by apache.
the class CheckpointFreeListTest method testRestoreFreeListCorrectlyAfterRandomStop.
/**
* Note: Test assumes that PDS size didn't change between the first checkpoint and after several node stops.
* It's not true anymore with free-list caching since the only final free-list state is persisted on checkpoint.
* Some changed, but currently empty buckets are not persisted and PDS size is smaller after the first checkpoint.
* Test makes sense only with disabled caching.
*
* @throws Exception if fail.
*/
@Test
@WithSystemProperty(key = IgniteSystemProperties.IGNITE_PAGES_LIST_DISABLE_ONHEAP_CACHING, value = "true")
public void testRestoreFreeListCorrectlyAfterRandomStop() throws Exception {
IgniteEx ignite0 = startGrid(0);
ignite0.cluster().active(true);
Random random = new Random();
List<T2<Integer, byte[]>> cachedEntry = new ArrayList<>();
IgniteCache<Integer, Object> cache = ignite0.cache(CACHE_NAME);
for (int j = 0; j < CACHE_SIZE; j++) {
byte[] val = new byte[random.nextInt(SF.apply(3072))];
cache.put(j, val);
cachedEntry.add(new T2<>(j, val));
}
Collections.shuffle(cachedEntry);
// Remove half of entries.
Collection<T2<Integer, byte[]>> entriesToRemove = cachedEntry.stream().limit(cachedEntry.size() / 2).collect(Collectors.toCollection(ConcurrentLinkedQueue::new));
entriesToRemove.forEach(t2 -> cache.remove(t2.get1()));
// During removing of entries free list grab a lot of free pages to itself
// so will do put/remove again for stabilization of free pages.
entriesToRemove.forEach(t2 -> cache.put(t2.get1(), t2.get2()));
entriesToRemove.forEach(t2 -> cache.remove(t2.get1()));
forceCheckpoint();
Path cacheFolder = Paths.get(U.defaultWorkDirectory(), DFLT_STORE_DIR, ignite0.name().replaceAll("\\.", "_"), CACHE_DIR_PREFIX + CACHE_NAME);
Optional<Long> totalPartSizeBeforeStop = totalPartitionsSize(cacheFolder);
CyclicBarrier nodeStartBarrier = new CyclicBarrier(2);
int approximateIterationCount = SF.applyLB(10, 6);
// Approximate count of entries to put per one iteration.
int iterationDataCount = entriesToRemove.size() / approximateIterationCount;
startAsyncPutThread(entriesToRemove, nodeStartBarrier);
// Will stop node during put data several times.
while (true) {
stopGrid(0, true);
ignite0 = startGrid(0);
ignite0.cluster().active(true);
if (entriesToRemove.isEmpty())
break;
// Notify put thread that node successfully started.
nodeStartBarrier.await();
nodeStartBarrier.reset();
int awaitSize = entriesToRemove.size() - iterationDataCount;
waitForCondition(() -> entriesToRemove.size() < awaitSize || entriesToRemove.size() == 0, 20000);
}
forceCheckpoint();
Optional<Long> totalPartSizeAfterRestore = totalPartitionsSize(cacheFolder);
// It allow that size after repeated put operations should be not more than on 15%(heuristic value) greater than before operations.
// In fact, it should not be multiplied in twice every time.
long correctedRestoreSize = totalPartSizeAfterRestore.get() - (long) (totalPartSizeBeforeStop.get() * 0.15);
assertTrue("Size after repeated put operations should be not more than on 15% greater. " + "Size before = " + totalPartSizeBeforeStop.get() + ", Size after = " + totalPartSizeAfterRestore.get(), totalPartSizeBeforeStop.get() > correctedRestoreSize);
}
Aggregations