use of org.apache.ignite.cluster.ClusterState.ACTIVE in project ignite by apache.
the class ClusterStateChangeEventTest method test.
/**
*/
@Test
public void test() throws Exception {
IgniteEx crd = grid(0);
crd.cluster().state(ACTIVE);
Map<Integer, Integer> data = IntStream.range(0, 1000).boxed().collect(Collectors.toMap(i -> i, i -> i));
crd.cache(DEFAULT_CACHE_NAME).putAll(data);
for (Ignite node : G.allGrids()) assertEquals(node.name(), ACTIVE, node.cluster().state());
// ACTIVE -> READ_ONLY
changeStateAndCheckEvents(ACTIVE_READ_ONLY);
// READ_ONLY -> ACTIVE
changeStateAndCheckEvents(ACTIVE);
// ACTIVE -> INACTIVE
changeStateAndCheckEvents(INACTIVE);
// INACTIVE -> READ_ONLY
changeStateAndCheckEvents(ACTIVE_READ_ONLY);
// READ_ONLY -> INACTIVE
changeStateAndCheckEvents(INACTIVE);
// INACTIVE -> ACTIVE
changeStateAndCheckEvents(ACTIVE);
}
use of org.apache.ignite.cluster.ClusterState.ACTIVE in project ignite by apache.
the class IgniteWalRebalanceTest method testSwitchHistoricalRebalanceToFull.
/**
* Tests that demander switches to full rebalance if the previously chosen supplier for a group has failed
* to perform historical rebalance due to an unexpected error.
*
* @param corruptWalClo Closure that corrupts wal iterating on supplier node.
* @param clientClo Closure that is called between the demand message sent and the supply message received.
* Returns {@code true} if it is assumed that the rebalancing from the second supplier should be reassigned.
* @throws Exception If failed
*/
public void testSwitchHistoricalRebalanceToFull(IgniteInClosure<IgniteEx> corruptWalClo, IgniteCallable<Boolean> clientClo) throws Exception {
backups = 3;
IgniteEx supplier1 = startGrid(0);
IgniteEx supplier2 = startGrid(1);
IgniteEx demander = startGrid(2);
supplier1.cluster().state(ACTIVE);
String supplier1Name = supplier1.localNode().consistentId().toString();
String supplier2Name = supplier2.localNode().consistentId().toString();
String demanderName = demander.localNode().consistentId().toString();
String cacheName1 = "test-cache-1";
String cacheName2 = "test-cache-2";
// Cache resides on supplier1 and demander nodes.
IgniteCache<Integer, IndexedObject> c1 = supplier1.getOrCreateCache(new CacheConfiguration<Integer, IndexedObject>(cacheName1).setBackups(backups).setAffinity(new RendezvousAffinityFunction(false, PARTS_CNT)).setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC).setRebalanceOrder(10).setNodeFilter(n -> n.consistentId().equals(supplier1Name) || n.consistentId().equals(demanderName)));
// Cache resides on supplier2 and demander nodes.
IgniteCache<Integer, IndexedObject> c2 = supplier1.getOrCreateCache(new CacheConfiguration<Integer, IndexedObject>("test-cache-2").setBackups(backups).setAffinity(new RendezvousAffinityFunction(false, PARTS_CNT)).setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC).setRebalanceOrder(20).setNodeFilter(n -> n.consistentId().equals(supplier2Name) || n.consistentId().equals(demanderName)));
// Fill initial data.
final int entryCnt = PARTS_CNT * 200;
final int preloadEntryCnt = PARTS_CNT * 400;
int val = 0;
for (int k = 0; k < preloadEntryCnt; k++) {
c1.put(k, new IndexedObject(val++));
c2.put(k, new IndexedObject(val++));
}
forceCheckpoint();
stopGrid(2);
// Rewrite data to trigger further rebalance.
for (int i = 0; i < entryCnt; i++) {
c1.put(i, new IndexedObject(val++));
c2.put(i, new IndexedObject(val++));
}
// Delay rebalance process for specified groups.
blockMsgPred = (node, msg) -> {
if (msg instanceof GridDhtPartitionDemandMessage) {
GridDhtPartitionDemandMessage msg0 = (GridDhtPartitionDemandMessage) msg;
return msg0.groupId() == CU.cacheId(cacheName1) || msg0.groupId() == CU.cacheId(cacheName2);
}
return false;
};
Queue<RecordedDemandMessage> recorderedMsgs = new ConcurrentLinkedQueue<>();
// Record demand messages for specified groups.
recordMsgPred = (node, msg) -> {
if (msg instanceof GridDhtPartitionDemandMessage) {
GridDhtPartitionDemandMessage msg0 = (GridDhtPartitionDemandMessage) msg;
if (msg0.groupId() == CU.cacheId(cacheName1) || msg0.groupId() == CU.cacheId(cacheName2)) {
recorderedMsgs.add(new RecordedDemandMessage(node.id(), msg0.groupId(), msg0.partitions().hasFull(), msg0.partitions().hasHistorical()));
}
}
return false;
};
// Delay rebalance process for specified group from supplier2.
TestRecordingCommunicationSpi supplierSpi2 = TestRecordingCommunicationSpi.spi(supplier2);
supplierSpi2.blockMessages((node, msg) -> {
if (msg instanceof GridDhtPartitionSupplyMessage) {
GridDhtPartitionSupplyMessage msg0 = (GridDhtPartitionSupplyMessage) msg;
return node.consistentId().equals(demanderName) && msg0.groupId() == CU.cacheId(cacheName2);
}
return false;
});
// Corrupt WAL on supplier1
corruptWalClo.apply(supplier1);
// Trigger rebalance process from suppliers.
IgniteEx restartedDemander = startGrid(2);
recordMsgPred = null;
blockMsgPred = null;
TestRecordingCommunicationSpi demanderSpi = TestRecordingCommunicationSpi.spi(grid(2));
// Wait until demander starts historical rebalancning.
demanderSpi.waitForBlocked();
final IgniteInternalFuture<Boolean> preloadFut1 = restartedDemander.cachex(cacheName1).context().group().preloader().rebalanceFuture();
final IgniteInternalFuture<Boolean> preloadFut2 = restartedDemander.cachex(cacheName2).context().group().preloader().rebalanceFuture();
boolean rebalanceReassigned = clientClo.call();
// Unblock messages and start tracking demand and supply messages.
demanderSpi.stopBlock();
// Wait until rebalancing will be cancelled.
GridTestUtils.waitForCondition(() -> preloadFut1.isDone() && (!rebalanceReassigned || (rebalanceReassigned && preloadFut2.isDone())), getTestTimeout());
Assert.assertEquals("Rebalance should be cancelled on demander node: " + preloadFut1, false, preloadFut1.get());
Assert.assertEquals("Rebalance should be cancelled on demander node: " + preloadFut2, false, rebalanceReassigned && preloadFut2.get());
// Unblock supply messages from supplier2
supplierSpi2.stopBlock();
awaitPartitionMapExchange(true, true, null);
// Check data consistency.
assertPartitionsSame(idleVerify(restartedDemander, cacheName2, cacheName1));
// Check that historical rebalance switched to full for supplier1 and it is still historical for supplier2.
IgnitePredicate<RecordedDemandMessage> histPred = (msg) -> msg.hasHistorical() && !msg.hasFull();
IgnitePredicate<RecordedDemandMessage> fullPred = (msg) -> !msg.hasHistorical() && msg.hasFull();
// Supplier1
List<RecordedDemandMessage> demandMsgsForSupplier1 = recorderedMsgs.stream().filter(msg -> msg.groupId() == CU.cacheId(cacheName1)).filter(msg -> msg.hasFull() || msg.hasHistorical()).collect(toList());
assertEquals("There should only two demand messages.", 2, demandMsgsForSupplier1.size());
assertTrue("The first message should require historical rebalance [msg=" + demandMsgsForSupplier1.get(0) + ']', histPred.apply(demandMsgsForSupplier1.get(0)));
assertTrue("The second message should require full rebalance [msg=" + demandMsgsForSupplier1.get(0) + ']', fullPred.apply(demandMsgsForSupplier1.get(1)));
// Supplier2
List<RecordedDemandMessage> demandMsgsForSupplier2 = recorderedMsgs.stream().filter(msg -> msg.groupId() == CU.cacheId(cacheName2)).filter(msg -> msg.hasFull() || msg.hasHistorical()).collect(toList());
if (rebalanceReassigned) {
assertEquals("There should be only two demand messages.", 2, demandMsgsForSupplier2.size());
assertTrue("Both messages should require historical rebalance [" + "msg=" + demandMsgsForSupplier2.get(0) + ", msg=" + demandMsgsForSupplier2.get(1) + ']', histPred.apply(demandMsgsForSupplier2.get(0)) && histPred.apply(demandMsgsForSupplier2.get(1)));
} else {
assertEquals("There should be only one demand message.", 1, demandMsgsForSupplier2.size());
assertTrue("Message should require historical rebalance [" + "msg=" + demandMsgsForSupplier2.get(0) + ']', histPred.apply(demandMsgsForSupplier2.get(0)));
}
}
use of org.apache.ignite.cluster.ClusterState.ACTIVE in project ignite by apache.
the class IgniteWalRebalanceTest method testMultipleNodesFailHistoricalRebalance.
/**
* Tests that demander switches to full rebalance if the previously chosen two of three of suppliers
* for a group have failed to perform historical rebalance due to an unexpected error.
*
* @throws Exception If failed
*/
@Test
@WithSystemProperty(key = "IGNITE_DISABLE_WAL_DURING_REBALANCING", value = "true")
public void testMultipleNodesFailHistoricalRebalance() throws Exception {
backups = 1;
int node_cnt = 4;
int demanderId = node_cnt - 1;
// Start a new cluster with 3 suppliers.
startGrids(node_cnt - 1);
// Start demander node.
userAttrs.put("TEST_ATTR", "TEST_ATTR");
startGrid(node_cnt - 1);
grid(0).cluster().state(ACTIVE);
// Create a new cache that places a full set of partitions on demander node.
RendezvousAffinityFunction aff = new RendezvousAffinityFunction(false, PARTS_CNT);
aff.setAffinityBackupFilter(new ClusterNodeAttributeAffinityBackupFilter("TEST_ATTR"));
String cacheName = "test-cache-1";
IgniteCache<Integer, IndexedObject> cache0 = grid(0).getOrCreateCache(new CacheConfiguration<Integer, IndexedObject>(cacheName).setBackups(backups).setAffinity(aff).setWriteSynchronizationMode(CacheWriteSynchronizationMode.FULL_SYNC));
// Fill initial data and force checkpoint.
final int entryCnt = PARTS_CNT * 200;
final int preloadEntryCnt = PARTS_CNT * 201;
int val = 0;
for (int k = 0; k < preloadEntryCnt; k++) cache0.put(k, new IndexedObject(val++));
forceCheckpoint();
// Stop demander node.
stopGrid(demanderId);
// Rewrite data to trigger further rebalance.
for (int k = 0; k < entryCnt; k++) {
// even though the corresponding RebalanceFuture will be cancelled.
if (grid(0).affinity(cacheName).partition(k) != 12)
cache0.put(k, new IndexedObject(val++));
}
// Upload additional data to a particular partition (primary partition belongs to coordinator, for instance)
// in order to trigger full rebalance for that partition instead of historical one.
int[] primaries0 = grid(0).affinity(cacheName).primaryPartitions(grid(0).localNode());
for (int i = 0; i < preloadEntryCnt; ++i) cache0.put(primaries0[0], new IndexedObject(val++));
forceCheckpoint();
// Delay rebalance process for specified group.
blockMsgPred = (node, msg) -> {
if (msg instanceof GridDhtPartitionDemandMessage) {
GridDhtPartitionDemandMessage msg0 = (GridDhtPartitionDemandMessage) msg;
return msg0.groupId() == CU.cacheId(cacheName);
}
return false;
};
Queue<RecordedDemandMessage> recorderedMsgs = new ConcurrentLinkedQueue<>();
// Record demand messages for specified group.
recordMsgPred = (node, msg) -> {
if (msg instanceof GridDhtPartitionDemandMessage) {
GridDhtPartitionDemandMessage msg0 = (GridDhtPartitionDemandMessage) msg;
if (msg0.groupId() == CU.cacheId(cacheName)) {
recorderedMsgs.add(new RecordedDemandMessage(node.id(), msg0.groupId(), msg0.partitions().hasFull(), msg0.partitions().hasHistorical()));
}
}
return false;
};
// Corrupt WAL on suppliers, except the one.
injectFailingIOFactory(grid(0));
injectFailingIOFactory(grid(1));
// Trigger rebalance process from suppliers.
IgniteEx restartedDemander = startGrid(node_cnt - 1);
TestRecordingCommunicationSpi demanderSpi = TestRecordingCommunicationSpi.spi(restartedDemander);
// Wait until demander starts historical rebalancning.
demanderSpi.waitForBlocked();
final IgniteInternalFuture<Boolean> preloadFut = restartedDemander.cachex(cacheName).context().group().preloader().rebalanceFuture();
// Unblock messages and start tracking demand and supply messages.
demanderSpi.stopBlock();
// Wait until rebalancing will be cancelled for both suppliers.
assertTrue("Rebalance future was not cancelled [fut=" + preloadFut + ']', GridTestUtils.waitForCondition(preloadFut::isDone, getTestTimeout()));
Assert.assertEquals("Rebalance should be cancelled on demander node: " + preloadFut, false, preloadFut.get());
awaitPartitionMapExchange(true, true, null);
// Check data consistency.
assertPartitionsSame(idleVerify(restartedDemander, cacheName));
// Check that historical rebalance switched to full for supplier 1 & 2 and it was historical for supplier3.
IgnitePredicate<RecordedDemandMessage> histPred = msg -> msg.hasHistorical() && !msg.hasFull();
IgnitePredicate<RecordedDemandMessage> fullPred = msg -> !msg.hasHistorical() && msg.hasFull();
IgnitePredicate<RecordedDemandMessage> mixedPred = msg -> msg.hasHistorical() && msg.hasFull();
IgniteBiInClosure<UUID, Boolean> supplierChecker = (supplierId, mixed) -> {
List<RecordedDemandMessage> demandMsgsForSupplier = recorderedMsgs.stream().filter(msg -> msg.supplierId().equals(supplierId)).filter(msg -> msg.groupId() == CU.cacheId(cacheName)).filter(msg -> msg.hasFull() || msg.hasHistorical()).collect(toList());
assertEquals("There should only two demand messages [supplierId=" + supplierId + ']', 2, demandMsgsForSupplier.size());
assertTrue("The first message should require " + (mixed ? "mixed" : "historical") + " rebalance [msg=" + demandMsgsForSupplier.get(0) + ']', (mixed ? mixedPred.apply(demandMsgsForSupplier.get(0)) : histPred.apply(demandMsgsForSupplier.get(0))));
assertTrue("The second message should require full rebalance [msg=" + demandMsgsForSupplier.get(0) + ']', fullPred.apply(demandMsgsForSupplier.get(1)));
};
supplierChecker.apply(grid(0).cluster().localNode().id(), true);
supplierChecker.apply(grid(1).cluster().localNode().id(), false);
// Check supplier3
List<RecordedDemandMessage> demandMsgsForSupplier = recorderedMsgs.stream().filter(msg -> msg.supplierId().equals(grid(2).cluster().localNode().id())).filter(msg -> msg.groupId() == CU.cacheId(cacheName)).filter(msg -> msg.hasFull() || msg.hasHistorical()).collect(toList());
assertEquals("There should only one demand message.", 1, demandMsgsForSupplier.size());
assertTrue("The first message should require historical rebalance [msg=" + demandMsgsForSupplier.get(0) + ']', histPred.apply(demandMsgsForSupplier.get(0)));
}
use of org.apache.ignite.cluster.ClusterState.ACTIVE in project ignite by apache.
the class TxRecoveryWithConcurrentRollbackTest method testTxDoesntBecomePreparedAfterError.
/**
* Start 3 servers,
* start 2 clients,
* start two OPTIMISTIC transactions with the same key from different client nodes,
* trying to transfer both to PREPARED state,
* stop one client node.
*/
@Test
public void testTxDoesntBecomePreparedAfterError() throws Exception {
backups = 2;
persistence = true;
syncMode = FULL_ASYNC;
final IgniteEx node0 = startGrids(3);
node0.cluster().state(ACTIVE);
final IgniteEx client1 = startGrid("client1");
final IgniteEx client2 = startGrid("client2");
awaitPartitionMapExchange();
final IgniteCache<Object, Object> cache = client1.cache(DEFAULT_CACHE_NAME);
final IgniteCache<Object, Object> cache2 = client2.cache(DEFAULT_CACHE_NAME);
final Integer pk = primaryKey(node0.cache(DEFAULT_CACHE_NAME));
CountDownLatch txPrepareLatch = new CountDownLatch(1);
GridTestUtils.runMultiThreadedAsync(() -> {
try (final Transaction tx = client1.transactions().withLabel("tx1").txStart(OPTIMISTIC, READ_COMMITTED, 5000, 1)) {
cache.put(pk, Boolean.TRUE);
TransactionProxyImpl p = (TransactionProxyImpl) tx;
// To prevent tx rollback on exit from try-with-resource block, this should cause another tx timeout fail.
spi(client1).blockMessages((node, msg) -> msg instanceof GridNearTxFinishRequest);
log.info("Test, preparing tx: xid=" + tx.xid() + ", tx=" + tx);
// Doing only prepare to try to lock the key, commit is not needed here.
p.tx().prepareNearTxLocal();
p.tx().currentPrepareFuture().listen(fut -> txPrepareLatch.countDown());
} catch (Exception e) {
// No-op.
}
}, 1, "tx1-thread");
try (final Transaction tx = client2.transactions().withLabel("tx2").txStart(OPTIMISTIC, READ_COMMITTED, 5000, 1)) {
cache2.put(pk, Boolean.TRUE);
TransactionProxyImpl p = (TransactionProxyImpl) tx;
log.info("Test, preparing tx: xid=" + tx.xid() + ", tx=" + tx);
p.tx().prepareNearTxLocal();
p.tx().currentPrepareFuture().listen(fut -> txPrepareLatch.countDown());
txPrepareLatch.await(6, TimeUnit.SECONDS);
if (txPrepareLatch.getCount() > 0)
fail("Failed to await for tx prepare.");
AtomicReference<GridDhtTxLocal> dhtTxLocRef = new AtomicReference<>();
assertTrue(waitForCondition(() -> {
dhtTxLocRef.set((GridDhtTxLocal) txs(node0).stream().filter(t -> t.state() == TransactionState.PREPARING).findFirst().orElse(null));
return dhtTxLocRef.get() != null;
}, 6_000));
assertNotNull(dhtTxLocRef.get());
UUID clientNodeToFail = dhtTxLocRef.get().eventNodeId();
GridDhtTxPrepareFuture prep = GridTestUtils.getFieldValue(dhtTxLocRef.get(), "prepFut");
prep.get();
List<IgniteInternalTx> txs = txs(node0);
String txsStr = txs.stream().map(Object::toString).collect(Collectors.joining(", "));
log.info("Transactions check point [count=" + txs.size() + ", txs=" + txsStr + "]");
if (clientNodeToFail.equals(client1.localNode().id()))
client1.close();
else if (clientNodeToFail.equals(client2.localNode().id()))
client2.close();
} catch (Exception e) {
log.error(e.getMessage(), e);
}
U.sleep(500);
assertEquals(3, grid(1).context().discovery().aliveServerNodes().size());
assertEquals(txs(client1).toString() + ", " + txs(client2).toString(), 1, txs(client1).size() + txs(client2).size());
}
use of org.apache.ignite.cluster.ClusterState.ACTIVE in project ignite by apache.
the class IgniteClusterActivateDeactivateTestWithPersistence method testDeactivateClusterWithPersistentCachesAndDifferentDataRegions.
/**
* Tests "soft" deactivation (without using the --force flag)
* when the cluster contains persistent caches and cluster nodes "support" different lists of data regions.
*
* Expected behavior: the cluster should be deactivated successfully (there is no data loss)..
*
* @throws Exception If failed.
*/
@Test
public void testDeactivateClusterWithPersistentCachesAndDifferentDataRegions() throws Exception {
IgniteEx srv = startGrid(0);
addAdditionalDataRegion = true;
IgniteEx srv1 = startGrid(1);
IgniteEx clientNode = startClientGrid(2);
clientNode.cluster().state(ACTIVE);
DataStorageConfiguration dsCfg = srv1.configuration().getDataStorageConfiguration();
DataRegionConfiguration persistentRegion = Arrays.stream(dsCfg.getDataRegionConfigurations()).filter(region -> ADDITIONAL_PERSISTENT_DATA_REGION.equals(region.getName())).findFirst().orElse(null);
assertTrue("It is assumed that the '" + ADDITIONAL_PERSISTENT_DATA_REGION + "' data storage region exists and persistent.", persistentRegion != null && persistentRegion.isPersistenceEnabled());
final UUID srv1NodeId = srv1.localNode().id();
// Create a new cache that is placed into persistent data region.
srv.getOrCreateCache(new CacheConfiguration<>("test-client-cache").setDataRegionName(persistentRegion.getName()).setAffinity(new RendezvousAffinityFunction(false, 1)).setNodeFilter(node -> node.id().equals(srv1NodeId)));
// Try to deactivate the cluster without the `force` flag.
IgniteInternalFuture<?> deactivateFut = srv.context().state().changeGlobalState(INACTIVE, false, Collections.emptyList(), false);
try {
deactivateFut.get(10, SECONDS);
} catch (IgniteCheckedException e) {
log.error("Failed to deactivate the cluster.", e);
fail("Failed to deactivate the cluster. [err=" + e.getMessage() + ']');
}
awaitPartitionMapExchange();
// Let's check that all nodes in the cluster have the same state.
for (Ignite node : G.allGrids()) {
IgniteEx n = (IgniteEx) node;
ClusterState state = n.context().state().clusterState().state();
assertTrue("Node must be in inactive state. " + "[node=" + n.configuration().getIgniteInstanceName() + ", actual=" + state + ']', INACTIVE == state);
}
}
Aggregations