use of org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier in project helix by apache.
the class TestRestartParticipant method testRestartParticipant.
@Test()
public void testRestartParticipant() throws Exception {
// Logger.getRootLogger().setLevel(Level.INFO);
System.out.println("START testRestartParticipant at " + new Date(System.currentTimeMillis()));
String clusterName = getShortClassName();
MockParticipantManager[] participants = new MockParticipantManager[5];
// participant port
TestHelper.setupCluster(// participant port
clusterName, // participant port
ZK_ADDR, // participant port
12918, // participant name prefix
"localhost", // resource name prefix
"TestDB", // resources
1, // partitions per resource
10, // number of nodes
5, // replicas
3, "MasterSlave", // do rebalance
true);
ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
controller.syncStart();
// start participants
for (int i = 0; i < 5; i++) {
String instanceName = "localhost_" + (12918 + i);
if (i == 4) {
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
participants[i].setTransition(new KillOtherTransition(participants[0]));
} else {
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
}
participants[i].syncStart();
}
boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
// restart
Thread.sleep(500);
MockParticipantManager participant = new MockParticipantManager(ZK_ADDR, participants[0].getClusterName(), participants[0].getInstanceName());
System.err.println("Restart " + participant.getInstanceName());
participant.syncStart();
result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
// clean up
controller.syncStop();
for (int i = 0; i < 5; i++) {
participants[i].syncStop();
}
participant.syncStop();
System.out.println("START testRestartParticipant at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier in project helix by apache.
the class TestZkReconnect method testZKReconnect.
@Test(enabled = false)
public void testZKReconnect() throws Exception {
final AtomicReference<ZkServer> zkServerRef = new AtomicReference<ZkServer>();
final int zkPort = TestHelper.getRandomPort();
final String zkAddr = String.format("localhost:%d", zkPort);
ZkServer zkServer = TestHelper.startZkServer(zkAddr);
zkServerRef.set(zkServer);
String className = TestHelper.getTestClassName();
String methodName = TestHelper.getTestMethodName();
String clusterName = className + "_" + methodName;
// Setup cluster
LOG.info("Setup clusters");
ClusterSetup clusterSetup = new ClusterSetup(zkAddr);
clusterSetup.addCluster(clusterName, true);
// Registers and starts controller
LOG.info("Starts controller");
HelixManager controller = HelixManagerFactory.getZKHelixManager(clusterName, null, InstanceType.CONTROLLER, zkAddr);
controller.connect();
// Registers and starts participant
LOG.info("Starts participant");
String hostname = "localhost";
String instanceId = String.format("%s_%d", hostname, 1);
clusterSetup.addInstanceToCluster(clusterName, instanceId);
HelixManager participant = HelixManagerFactory.getZKHelixManager(clusterName, instanceId, InstanceType.PARTICIPANT, zkAddr);
participant.connect();
LOG.info("Register state machine");
final CountDownLatch latch = new CountDownLatch(1);
participant.getStateMachineEngine().registerStateModelFactory("OnlineOffline", new StateModelFactory<StateModel>() {
@Override
public StateModel createNewStateModel(String resource, String stateUnitKey) {
return new SimpleStateModel(latch);
}
}, "test");
String resourceName = "test-resource";
LOG.info("Ideal state assignment");
HelixAdmin helixAdmin = participant.getClusterManagmentTool();
helixAdmin.addResource(clusterName, resourceName, 1, "OnlineOffline", IdealState.RebalanceMode.CUSTOMIZED.toString());
IdealState idealState = helixAdmin.getResourceIdealState(clusterName, resourceName);
idealState.setReplicas("1");
idealState.setStateModelFactoryName("test");
idealState.setPartitionState(resourceName + "_0", instanceId, "ONLINE");
LOG.info("Shutdown ZK server");
TestHelper.stopZkServer(zkServerRef.get());
Executors.newSingleThreadScheduledExecutor().schedule(new Runnable() {
@Override
public void run() {
try {
LOG.info("Restart ZK server");
// zkServer.set(TestUtils.startZookeeper(zkDir, zkPort));
zkServerRef.set(TestHelper.startZkServer(zkAddr, null, false));
} catch (Exception e) {
LOG.error(e.getMessage(), e);
}
}
}, 2L, TimeUnit.SECONDS);
// future.get();
LOG.info("Before update ideal state");
helixAdmin.setResourceIdealState(clusterName, resourceName, idealState);
LOG.info("After update ideal state");
LOG.info("Wait for OFFLINE->ONLINE state transition");
try {
Assert.assertTrue(latch.await(15, TimeUnit.SECONDS));
// wait until stable state
boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(zkAddr, clusterName));
Assert.assertTrue(result);
} finally {
participant.disconnect();
zkServerRef.get().shutdown();
}
}
use of org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier in project helix by apache.
the class TestFullAutoNodeTagging method testSafeAssignment.
/**
* Basic test for tagging behavior. 10 participants, of which 4 are tagged. Launch all 10,
* checking external view every time a tagged node is started. Then shut down all 10, checking
* external view every time a tagged node is killed.
*/
@Test
public void testSafeAssignment() throws Exception {
final int NUM_PARTICIPANTS = 10;
final int NUM_PARTITIONS = 4;
final int NUM_REPLICAS = 2;
final String RESOURCE_NAME = "TestDB0";
final String TAG = "ASSIGNABLE";
final String[] TAGGED_NODES = { "localhost_12920", "localhost_12922", "localhost_12924", "localhost_12925" };
Set<String> taggedNodes = Sets.newHashSet(TAGGED_NODES);
String className = TestHelper.getTestClassName();
String methodName = TestHelper.getTestMethodName();
String clusterName = className + "_" + methodName;
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
// Set up cluster
// participant port
TestHelper.setupCluster(// participant port
clusterName, // participant port
ZK_ADDR, // participant port
12918, // participant name prefix
"localhost", // resource name prefix
"TestDB", // resources
1, // partitions per resource
NUM_PARTITIONS, // number of nodes
NUM_PARTICIPANTS, // replicas
NUM_REPLICAS, // use FULL_AUTO mode to test node tagging
"MasterSlave", // use FULL_AUTO mode to test node tagging
RebalanceMode.FULL_AUTO, // do rebalance
true);
// tag the resource and participants
HelixAdmin helixAdmin = new ZKHelixAdmin(ZK_ADDR);
for (String taggedNode : TAGGED_NODES) {
helixAdmin.addInstanceTag(clusterName, taggedNode, TAG);
}
IdealState idealState = helixAdmin.getResourceIdealState(clusterName, RESOURCE_NAME);
idealState.setInstanceGroupTag(TAG);
helixAdmin.setResourceIdealState(clusterName, RESOURCE_NAME, idealState);
// start controller
ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller");
controller.syncStart();
// start participants
MockParticipantManager[] participants = new MockParticipantManager[NUM_PARTICIPANTS];
for (int i = 0; i < NUM_PARTICIPANTS; i++) {
final String instanceName = "localhost_" + (12918 + i);
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
participants[i].syncStart();
// ensure that everything is valid if this is a tagged node that is starting
if (taggedNodes.contains(instanceName)) {
// make sure that the best possible matches the external view
Thread.sleep(500);
boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
// make sure that the tagged state of the nodes is still balanced
result = ClusterStateVerifier.verifyByZkCallback(new TaggedZkVerifier(clusterName, RESOURCE_NAME, TAGGED_NODES, false));
Assert.assertTrue(result, "initial assignment with all tagged nodes live is invalid");
}
}
// cleanup
for (int i = 0; i < NUM_PARTICIPANTS; i++) {
String participantName = participants[i].getInstanceName();
participants[i].syncStop();
if (taggedNodes.contains(participantName)) {
// check that the external view is still correct even after removing tagged nodes
taggedNodes.remove(participantName);
Thread.sleep(500);
boolean result = ClusterStateVerifier.verifyByZkCallback(new TaggedZkVerifier(clusterName, RESOURCE_NAME, TAGGED_NODES, taggedNodes.isEmpty()));
Assert.assertTrue(result, "incorrect state after removing " + participantName + ", " + taggedNodes + " remain");
}
}
controller.syncStop();
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier in project helix by apache.
the class TestNonOfflineInitState method testNonOfflineInitState.
@Test
public void testNonOfflineInitState() throws Exception {
System.out.println("START testNonOfflineInitState at " + new Date(System.currentTimeMillis()));
String clusterName = getShortClassName();
setupCluster(// participant port
clusterName, // participant port
ZK_ADDR, // participant port
12918, // participant name prefix
"localhost", // resource name prefix
"TestDB", // resources
1, // partitions per resource
10, // number of nodes
5, // replicas
1, "Bootstrap", // do rebalance
true);
ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
controller.syncStart();
// start participants
MockParticipantManager[] participants = new MockParticipantManager[5];
for (int i = 0; i < 5; i++) {
String instanceName = "localhost_" + (12918 + i);
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
// add a state model with non-OFFLINE initial state
StateMachineEngine stateMach = participants[i].getStateMachineEngine();
MockBootstrapModelFactory bootstrapFactory = new MockBootstrapModelFactory();
stateMach.registerStateModelFactory("Bootstrap", bootstrapFactory);
participants[i].syncStart();
}
boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
// clean up
controller.syncStop();
for (int i = 0; i < 5; i++) {
participants[i].syncStop();
}
System.out.println("END testNonOfflineInitState at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier in project helix by apache.
the class TestConsecutiveZkSessionExpiry method testDistributedController.
@Test
public void testDistributedController() throws Exception {
// Logger.getRootLogger().setLevel(Level.INFO);
String className = TestHelper.getTestClassName();
String methodName = TestHelper.getTestMethodName();
String clusterName = className + "_" + methodName;
int n = 2;
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
// participant port
TestHelper.setupCluster(// participant port
clusterName, // participant port
ZK_ADDR, // participant port
12918, // participant name prefix
"localhost", // resource name prefix
"TestDB", // resources
1, // partitions per resource
4, // number of nodes
n, // replicas
2, "MasterSlave", // do rebalance
true);
ClusterDistributedController[] distributedControllers = new ClusterDistributedController[n];
CountDownLatch startCountdown = new CountDownLatch(1);
CountDownLatch endCountdown = new CountDownLatch(1);
for (int i = 0; i < n; i++) {
String contrllerName = "localhost_" + (12918 + i);
distributedControllers[i] = new ClusterDistributedController(ZK_ADDR, clusterName, contrllerName);
distributedControllers[i].getStateMachineEngine().registerStateModelFactory("MasterSlave", new MockMSModelFactory());
if (i == 0) {
distributedControllers[i].addPreConnectCallback(new PreConnectTestCallback(contrllerName, startCountdown, endCountdown));
}
distributedControllers[i].connect();
}
boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
// expire the session of distributedController
LOG.info("1st Expiring distributedController session...");
String oldSessionId = distributedControllers[0].getSessionId();
ZkTestHelper.asyncExpireSession(distributedControllers[0].getZkClient());
String newSessionId = distributedControllers[0].getSessionId();
LOG.info("Expried distributedController session. oldSessionId: " + oldSessionId + ", newSessionId: " + newSessionId);
// expire zk session again during HelixManager#handleNewSession()
startCountdown.await();
LOG.info("2nd Expiring distributedController session...");
oldSessionId = distributedControllers[0].getSessionId();
ZkTestHelper.asyncExpireSession(distributedControllers[0].getZkClient());
newSessionId = distributedControllers[0].getSessionId();
LOG.info("Expried distributedController session. oldSessionId: " + oldSessionId + ", newSessionId: " + newSessionId);
endCountdown.countDown();
result = ClusterStateVerifier.verifyByPolling(new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
// verify leader changes to localhost_12919
HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
PropertyKey.Builder keyBuilder = accessor.keyBuilder();
Assert.assertNotNull(pollForProperty(LiveInstance.class, accessor, keyBuilder.liveInstance("localhost_12918"), true));
LiveInstance leader = pollForProperty(LiveInstance.class, accessor, keyBuilder.controllerLeader(), true);
Assert.assertNotNull(leader);
Assert.assertEquals(leader.getId(), "localhost_12919");
// check localhost_12918 has 2 handlers: message and data-accessor
LOG.debug("handlers: " + TestHelper.printHandlers(distributedControllers[0]));
List<CallbackHandler> handlers = distributedControllers[0].getHandlers();
Assert.assertEquals(handlers.size(), 2, "Distributed controller should have 2 handler (message) after lose leadership, but was " + handlers.size());
// clean up
distributedControllers[0].disconnect();
distributedControllers[1].disconnect();
Assert.assertNull(pollForProperty(LiveInstance.class, accessor, keyBuilder.liveInstance("localhost_12918"), false));
Assert.assertNull(pollForProperty(LiveInstance.class, accessor, keyBuilder.liveInstance("localhost_12919"), false));
Assert.assertNull(pollForProperty(LiveInstance.class, accessor, keyBuilder.controllerLeader(), false));
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
Aggregations