use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class TestReelectedPipelineCorrectness method testReelection.
@Test
public void testReelection() throws Exception {
final int NUM_CONTROLLERS = 2;
final int NUM_PARTICIPANTS = 4;
final int NUM_PARTITIONS = 8;
final int NUM_REPLICAS = 2;
String className = TestHelper.getTestClassName();
String methodName = TestHelper.getTestMethodName();
String clusterName = className + "_" + methodName;
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
ClusterSetup setupTool = new ClusterSetup(ZK_ADDR);
// Set up cluster
// participant port
TestHelper.setupCluster(// participant port
clusterName, // participant port
ZK_ADDR, // participant port
12918, // participant name prefix
"localhost", // resource name prefix
"TestDB", // resources
1, // partitions per resource
NUM_PARTITIONS, // number of nodes
NUM_PARTICIPANTS, // replicas
NUM_REPLICAS, "MasterSlave", RebalanceMode.FULL_AUTO, // do rebalance
true);
// configure distributed controllers
String controllerCluster = clusterName + "_controllers";
setupTool.addCluster(controllerCluster, true);
for (int i = 0; i < NUM_CONTROLLERS; i++) {
setupTool.addInstanceToCluster(controllerCluster, "controller_" + i);
}
setupTool.activateCluster(clusterName, controllerCluster, true);
// start participants
MockParticipantManager[] participants = new MockParticipantManager[NUM_PARTICIPANTS];
for (int i = 0; i < NUM_PARTICIPANTS; i++) {
final String instanceName = "localhost_" + (12918 + i);
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
participants[i].syncStart();
}
// start controllers
ClusterDistributedController[] controllers = new ClusterDistributedController[NUM_CONTROLLERS];
for (int i = 0; i < NUM_CONTROLLERS; i++) {
controllers[i] = new ClusterDistributedController(ZK_ADDR, controllerCluster, "controller_" + i);
controllers[i].syncStart();
}
Thread.sleep(1000);
// Ensure a balanced cluster
boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
// Disable the leader, resulting in a leader election
HelixDataAccessor accessor = participants[0].getHelixDataAccessor();
LiveInstance leader = accessor.getProperty(accessor.keyBuilder().controllerLeader());
int totalWait = 0;
while (leader == null && totalWait < CHECK_TIMEOUT) {
Thread.sleep(CHECK_INTERVAL);
totalWait += CHECK_INTERVAL;
leader = accessor.getProperty(accessor.keyBuilder().controllerLeader());
}
if (totalWait >= CHECK_TIMEOUT) {
Assert.fail("No leader was ever elected!");
}
String leaderId = leader.getId();
String standbyId = (leaderId.equals("controller_0")) ? "controller_1" : "controller_0";
HelixAdmin admin = setupTool.getClusterManagementTool();
admin.enableInstance(controllerCluster, leaderId, false);
// Stop a participant to make sure that the leader election worked
Thread.sleep(500);
participants[0].syncStop();
Thread.sleep(500);
result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
// Disable the original standby (leaving 0 active controllers) and kill another participant
admin.enableInstance(controllerCluster, standbyId, false);
Thread.sleep(500);
participants[1].syncStop();
// Also change the ideal state
IdealState idealState = admin.getResourceIdealState(clusterName, "TestDB0");
idealState.setMaxPartitionsPerInstance(1);
admin.setResourceIdealState(clusterName, "TestDB0", idealState);
Thread.sleep(500);
// Also disable an instance in the main cluster
admin.enableInstance(clusterName, "localhost_12920", false);
// Re-enable the original leader
admin.enableInstance(controllerCluster, leaderId, true);
// Now check that both the ideal state and the live instances are adhered to by the rebalance
Thread.sleep(500);
result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
// cleanup
for (int i = 0; i < NUM_CONTROLLERS; i++) {
controllers[i].syncStop();
}
for (int i = 2; i < NUM_PARTICIPANTS; i++) {
participants[i].syncStop();
}
System.out.println("STOP " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class TestHelixInstanceTag method testInstanceTag.
@Test
public void testInstanceTag() throws Exception {
HelixManager manager = _controller;
HelixDataAccessor accessor = manager.getHelixDataAccessor();
String DB2 = "TestDB2";
int partitions = 100;
String DB2tag = "TestDB2_tag";
int replica = 2;
for (int i = 0; i < 2; i++) {
String instanceName = "localhost_" + (12918 + i);
_setupTool.getClusterManagementTool().addInstanceTag(CLUSTER_NAME, instanceName, DB2tag);
}
_setupTool.addResourceToCluster(CLUSTER_NAME, DB2, partitions, STATE_MODEL);
_setupTool.rebalanceStorageCluster(CLUSTER_NAME, DB2, DB2tag, replica);
boolean result = ClusterStateVerifier.verifyByZkCallback((new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, CLUSTER_NAME)));
Assert.assertTrue(result, "Cluster verification fails");
ExternalView ev = accessor.getProperty(accessor.keyBuilder().externalView(DB2));
Set<String> hosts = new HashSet<String>();
for (String p : ev.getPartitionSet()) {
for (String hostName : ev.getStateMap(p).keySet()) {
InstanceConfig config = accessor.getProperty(accessor.keyBuilder().instanceConfig(hostName));
Assert.assertTrue(config.containsTag(DB2tag));
hosts.add(hostName);
}
}
Assert.assertEquals(hosts.size(), 2);
String DB3 = "TestDB3";
String DB3Tag = "TestDB3_tag";
partitions = 10;
replica = 3;
for (int i = 1; i < 5; i++) {
String instanceName = "localhost_" + (12918 + i);
_setupTool.getClusterManagementTool().addInstanceTag(CLUSTER_NAME, instanceName, DB3Tag);
}
_setupTool.addResourceToCluster(CLUSTER_NAME, DB3, partitions, STATE_MODEL);
_setupTool.rebalanceStorageCluster(CLUSTER_NAME, DB3, DB3Tag, replica);
result = ClusterStateVerifier.verifyByZkCallback((new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, CLUSTER_NAME)));
Assert.assertTrue(result, "Cluster verification fails");
ev = accessor.getProperty(accessor.keyBuilder().externalView(DB3));
hosts = new HashSet<String>();
for (String p : ev.getPartitionSet()) {
for (String hostName : ev.getStateMap(p).keySet()) {
InstanceConfig config = accessor.getProperty(accessor.keyBuilder().instanceConfig(hostName));
Assert.assertTrue(config.containsTag(DB3Tag));
hosts.add(hostName);
}
}
Assert.assertEquals(hosts.size(), 4);
}
use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class TestMessagePartitionStateMismatch method testStateMismatch.
@Test
public void testStateMismatch() throws InterruptedException {
// String controllerName = CONTROLLER_PREFIX + "_0";
// _startCMResultMap.get(controllerName)._manager;
HelixManager manager = _controller;
HelixDataAccessor accessor = manager.getHelixDataAccessor();
Builder kb = accessor.keyBuilder();
ExternalView ev = accessor.getProperty(kb.externalView(TEST_DB));
Map<String, LiveInstance> liveinstanceMap = accessor.getChildValuesMap(accessor.keyBuilder().liveInstances());
for (String instanceName : liveinstanceMap.keySet()) {
String sessionid = liveinstanceMap.get(instanceName).getSessionId();
for (String partition : ev.getPartitionSet()) {
if (ev.getStateMap(partition).containsKey(instanceName)) {
String uuid = UUID.randomUUID().toString();
Message message = new Message(MessageType.STATE_TRANSITION, uuid);
boolean rand = new Random().nextInt(10) > 5;
if (ev.getStateMap(partition).get(instanceName).equals("MASTER")) {
message.setSrcName(manager.getInstanceName());
message.setTgtName(instanceName);
message.setMsgState(MessageState.NEW);
message.setPartitionName(partition);
message.setResourceName(TEST_DB);
message.setFromState(rand ? "SLAVE" : "OFFLINE");
message.setToState(rand ? "MASTER" : "SLAVE");
message.setTgtSessionId(sessionid);
message.setSrcSessionId(manager.getSessionId());
message.setStateModelDef("MasterSlave");
message.setStateModelFactoryName("DEFAULT");
} else if (ev.getStateMap(partition).get(instanceName).equals("SLAVE")) {
message.setSrcName(manager.getInstanceName());
message.setTgtName(instanceName);
message.setMsgState(MessageState.NEW);
message.setPartitionName(partition);
message.setResourceName(TEST_DB);
message.setFromState(rand ? "MASTER" : "OFFLINE");
message.setToState(rand ? "SLAVE" : "SLAVE");
message.setTgtSessionId(sessionid);
message.setSrcSessionId(manager.getSessionId());
message.setStateModelDef("MasterSlave");
message.setStateModelFactoryName("DEFAULT");
}
accessor.setProperty(accessor.keyBuilder().message(instanceName, message.getMsgId()), message);
}
}
}
Thread.sleep(3000);
ExternalView ev2 = accessor.getProperty(kb.externalView(TEST_DB));
Assert.assertTrue(ev.equals(ev2));
}
use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class TestPauseSignal method testPauseSignal.
@Test()
public void testPauseSignal() throws Exception {
// Logger.getRootLogger().setLevel(Level.INFO);
String className = TestHelper.getTestClassName();
String methodName = TestHelper.getTestMethodName();
final String clusterName = className + "_" + methodName;
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
MockParticipantManager[] participants = new MockParticipantManager[5];
// participant port
TestHelper.setupCluster(// participant port
clusterName, // participant port
ZK_ADDR, // participant port
12918, // participant name prefix
"localhost", // resource name prefix
"TestDB", // resources
1, // partitions per resource
10, // number of nodes
5, // replicas
3, "MasterSlave", // do rebalance
true);
// start controller
ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
controller.syncStart();
// start participants
for (int i = 0; i < 5; i++) {
String instanceName = "localhost_" + (12918 + i);
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
participants[i].syncStart();
}
boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
// pause the cluster and make sure pause is persistent
ZkClient zkClient = new ZkClient(ZK_ADDR);
zkClient.setZkSerializer(new ZNRecordSerializer());
final HelixDataAccessor tmpAccessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(zkClient));
String cmd = "-zkSvr " + ZK_ADDR + " -enableCluster " + clusterName + " false";
ClusterSetup.processCommandLineArgs(cmd.split(" "));
tmpAccessor.setProperty(tmpAccessor.keyBuilder().pause(), new PauseSignal("pause"));
zkClient.close();
// wait for controller to be signaled by pause
Thread.sleep(1000);
// add a new resource group
ClusterSetup setupTool = new ClusterSetup(ZK_ADDR);
setupTool.addResourceToCluster(clusterName, "TestDB1", 10, "MasterSlave");
setupTool.rebalanceStorageCluster(clusterName, "TestDB1", 3);
// make sure TestDB1 external view is empty
TestHelper.verifyWithTimeout("verifyEmptyCurStateAndExtView", 1000, clusterName, "TestDB1", TestHelper.<String>setOf("localhost_12918", "localhost_12919", "localhost_12920", "localhost_12921", "localhost_12922"), ZK_ADDR);
// resume controller
final HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
cmd = "-zkSvr " + ZK_ADDR + " -enableCluster " + clusterName + " true";
ClusterSetup.processCommandLineArgs(cmd.split(" "));
result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
// clean up
controller.syncStop();
for (int i = 0; i < 5; i++) {
participants[i].syncStop();
}
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.HelixDataAccessor in project helix by apache.
the class TestRebalancePipeline method testMsgTriggeredRebalance.
@Test
public void testMsgTriggeredRebalance() throws Exception {
String clusterName = "CLUSTER_" + _className + "_msgTrigger";
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
HelixManager manager = new DummyClusterManager(clusterName, accessor);
ClusterEvent event = new ClusterEvent(ClusterEventType.Unknown);
refreshClusterConfig(clusterName, accessor);
final String resourceName = "testResource_dup";
String[] resourceGroups = new String[] { resourceName };
TestHelper.setupEmptyCluster(_gZkClient, clusterName);
// ideal state: node0 is MASTER, node1 is SLAVE
// replica=2 means 1 master and 1 slave
setupIdealState(clusterName, new int[] { 0, 1 }, resourceGroups, 1, 2);
setupStateModel(clusterName);
setupInstances(clusterName, new int[] { 0, 1 });
setupLiveInstances(clusterName, new int[] { 0, 1 });
ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
controller.syncStart();
// round1: controller sends O->S to both node0 and node1
Thread.sleep(1000);
Builder keyBuilder = accessor.keyBuilder();
List<String> messages = accessor.getChildNames(keyBuilder.messages("localhost_0"));
Assert.assertEquals(messages.size(), 1);
messages = accessor.getChildNames(keyBuilder.messages("localhost_1"));
Assert.assertEquals(messages.size(), 1);
// round2: node0 and node1 update current states but not removing messages
// Since controller's rebalancer pipeline will GC pending messages after timeout, and both hosts
// update current states to SLAVE, controller will send out rebalance message to
// have one host to become master
setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "SLAVE", true);
setCurrentState(clusterName, "localhost_1", resourceName, resourceName + "_0", "session_1", "SLAVE", true);
// Controller has 3s timeout, so after 1s, controller should not have GCed message
Thread.sleep(1000);
Assert.assertEquals(accessor.getChildValues(keyBuilder.messages("localhost_0")).size(), 1);
Assert.assertEquals(accessor.getChildValues(keyBuilder.messages("localhost_1")).size(), 1);
// After another 2 second, controller should cleanup messages and continue to rebalance
Thread.sleep(3000);
// Manually trigger another rebalance by touching current state
setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "SLAVE");
Thread.sleep(1000);
List<Message> host0Msg = accessor.getChildValues(keyBuilder.messages("localhost_0"));
List<Message> host1Msg = accessor.getChildValues(keyBuilder.messages("localhost_1"));
List<Message> allMsgs = new ArrayList<>(host0Msg);
allMsgs.addAll(host1Msg);
Assert.assertEquals(allMsgs.size(), 1);
Assert.assertEquals(allMsgs.get(0).getToState(), "MASTER");
Assert.assertEquals(allMsgs.get(0).getFromState(), "SLAVE");
// round3: node0 changes state to master, but failed to delete message,
// controller will clean it up
setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "MASTER", true);
Thread.sleep(3500);
// touch current state to trigger rebalance
setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "MASTER", false);
Thread.sleep(1000);
Assert.assertTrue(accessor.getChildNames(keyBuilder.messages("localhost_0")).isEmpty());
// round4: node0 has duplicated but valid message, i.e. there is a P2P message sent to it
// due to error in the triggered pipeline, controller should remove duplicated message
// immediately as the partition has became master 3 sec ago (there is already a timeout)
Message sourceMsg = allMsgs.get(0);
Message dupMsg = new Message(sourceMsg.getMsgType(), UUID.randomUUID().toString());
dupMsg.getRecord().setSimpleFields(sourceMsg.getRecord().getSimpleFields());
dupMsg.getRecord().setListFields(sourceMsg.getRecord().getListFields());
dupMsg.getRecord().setMapFields(sourceMsg.getRecord().getMapFields());
accessor.setProperty(dupMsg.getKey(accessor.keyBuilder(), dupMsg.getTgtName()), dupMsg);
Thread.sleep(1000);
messages = accessor.getChildNames(keyBuilder.messages("localhost_0"));
Assert.assertTrue(messages.isEmpty());
// round5: node0 has completely invalid message, controller should immediately delete it
dupMsg.setFromState("SLAVE");
dupMsg.setToState("OFFLINE");
accessor.setProperty(dupMsg.getKey(accessor.keyBuilder(), dupMsg.getTgtName()), dupMsg);
Thread.sleep(1000);
messages = accessor.getChildNames(keyBuilder.messages("localhost_0"));
Assert.assertTrue(messages.isEmpty());
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
Aggregations