use of org.apache.helix.model.Message in project helix by apache.
the class TestMsgSelectionStage method testMasterXferAfterMasterResume.
@Test
public void testMasterXferAfterMasterResume() {
System.out.println("START testMasterXferAfterMasterResume at " + new Date(System.currentTimeMillis()));
Map<String, LiveInstance> liveInstances = new HashMap<String, LiveInstance>();
liveInstances.put("localhost_0", new LiveInstance("localhost_0"));
liveInstances.put("localhost_1", new LiveInstance("localhost_1"));
Map<String, String> currentStates = new HashMap<String, String>();
currentStates.put("localhost_0", "SLAVE");
currentStates.put("localhost_1", "SLAVE");
Map<String, Message> pendingMessages = new HashMap<String, Message>();
pendingMessages.put("localhost_1", newMessage("TestDB", "TestDB_0", "localhost_1", "SLAVE", "MASTER"));
List<Message> messages = new ArrayList<Message>();
messages.add(TestHelper.createMessage("msgId_0", "SLAVE", "MASTER", "localhost_0", "TestDB", "TestDB_0"));
Map<String, Bounds> stateConstraints = new HashMap<String, Bounds>();
stateConstraints.put("MASTER", new Bounds(0, 1));
stateConstraints.put("SLAVE", new Bounds(0, 2));
Map<String, Integer> stateTransitionPriorities = new HashMap<String, Integer>();
stateTransitionPriorities.put("MASTER-SLAVE", 0);
stateTransitionPriorities.put("SLAVE-MASTER", 1);
List<Message> selectedMsg = new MessageSelectionStage().selectMessages(liveInstances, currentStates, pendingMessages, messages, stateConstraints, stateTransitionPriorities, BuiltInStateModelDefinitions.MasterSlave.getStateModelDefinition(), false);
Assert.assertEquals(selectedMsg.size(), 0);
System.out.println("END testMasterXferAfterMasterResume at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.model.Message in project helix by apache.
the class TestRebalancePipeline method testMsgTriggeredRebalance.
@Test
public void testMsgTriggeredRebalance() throws Exception {
String clusterName = "CLUSTER_" + _className + "_msgTrigger";
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
HelixManager manager = new DummyClusterManager(clusterName, accessor);
ClusterEvent event = new ClusterEvent(ClusterEventType.Unknown);
refreshClusterConfig(clusterName, accessor);
final String resourceName = "testResource_dup";
String[] resourceGroups = new String[] { resourceName };
TestHelper.setupEmptyCluster(_gZkClient, clusterName);
// ideal state: node0 is MASTER, node1 is SLAVE
// replica=2 means 1 master and 1 slave
setupIdealState(clusterName, new int[] { 0, 1 }, resourceGroups, 1, 2);
setupStateModel(clusterName);
setupInstances(clusterName, new int[] { 0, 1 });
setupLiveInstances(clusterName, new int[] { 0, 1 });
ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
controller.syncStart();
// round1: controller sends O->S to both node0 and node1
Thread.sleep(1000);
Builder keyBuilder = accessor.keyBuilder();
List<String> messages = accessor.getChildNames(keyBuilder.messages("localhost_0"));
Assert.assertEquals(messages.size(), 1);
messages = accessor.getChildNames(keyBuilder.messages("localhost_1"));
Assert.assertEquals(messages.size(), 1);
// round2: node0 and node1 update current states but not removing messages
// Since controller's rebalancer pipeline will GC pending messages after timeout, and both hosts
// update current states to SLAVE, controller will send out rebalance message to
// have one host to become master
setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "SLAVE", true);
setCurrentState(clusterName, "localhost_1", resourceName, resourceName + "_0", "session_1", "SLAVE", true);
// Controller has 3s timeout, so after 1s, controller should not have GCed message
Thread.sleep(1000);
Assert.assertEquals(accessor.getChildValues(keyBuilder.messages("localhost_0")).size(), 1);
Assert.assertEquals(accessor.getChildValues(keyBuilder.messages("localhost_1")).size(), 1);
// After another 2 second, controller should cleanup messages and continue to rebalance
Thread.sleep(3000);
// Manually trigger another rebalance by touching current state
setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "SLAVE");
Thread.sleep(1000);
List<Message> host0Msg = accessor.getChildValues(keyBuilder.messages("localhost_0"));
List<Message> host1Msg = accessor.getChildValues(keyBuilder.messages("localhost_1"));
List<Message> allMsgs = new ArrayList<>(host0Msg);
allMsgs.addAll(host1Msg);
Assert.assertEquals(allMsgs.size(), 1);
Assert.assertEquals(allMsgs.get(0).getToState(), "MASTER");
Assert.assertEquals(allMsgs.get(0).getFromState(), "SLAVE");
// round3: node0 changes state to master, but failed to delete message,
// controller will clean it up
setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "MASTER", true);
Thread.sleep(3500);
// touch current state to trigger rebalance
setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "MASTER", false);
Thread.sleep(1000);
Assert.assertTrue(accessor.getChildNames(keyBuilder.messages("localhost_0")).isEmpty());
// round4: node0 has duplicated but valid message, i.e. there is a P2P message sent to it
// due to error in the triggered pipeline, controller should remove duplicated message
// immediately as the partition has became master 3 sec ago (there is already a timeout)
Message sourceMsg = allMsgs.get(0);
Message dupMsg = new Message(sourceMsg.getMsgType(), UUID.randomUUID().toString());
dupMsg.getRecord().setSimpleFields(sourceMsg.getRecord().getSimpleFields());
dupMsg.getRecord().setListFields(sourceMsg.getRecord().getListFields());
dupMsg.getRecord().setMapFields(sourceMsg.getRecord().getMapFields());
accessor.setProperty(dupMsg.getKey(accessor.keyBuilder(), dupMsg.getTgtName()), dupMsg);
Thread.sleep(1000);
messages = accessor.getChildNames(keyBuilder.messages("localhost_0"));
Assert.assertTrue(messages.isEmpty());
// round5: node0 has completely invalid message, controller should immediately delete it
dupMsg.setFromState("SLAVE");
dupMsg.setToState("OFFLINE");
accessor.setProperty(dupMsg.getKey(accessor.keyBuilder(), dupMsg.getTgtName()), dupMsg);
Thread.sleep(1000);
messages = accessor.getChildNames(keyBuilder.messages("localhost_0"));
Assert.assertTrue(messages.isEmpty());
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.model.Message in project helix by apache.
the class TestRebalancePipeline method testDuplicateMsg.
@Test
public void testDuplicateMsg() {
String clusterName = "CLUSTER_" + _className + "_dup";
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
refreshClusterConfig(clusterName, accessor);
HelixManager manager = new DummyClusterManager(clusterName, accessor);
ClusterEvent event = new ClusterEvent(ClusterEventType.Unknown);
event.addAttribute(AttributeName.helixmanager.name(), manager);
final String resourceName = "testResource_dup";
String[] resourceGroups = new String[] { resourceName };
// ideal state: node0 is MASTER, node1 is SLAVE
// replica=2 means 1 master and 1 slave
setupIdealState(clusterName, new int[] { 0 }, resourceGroups, 1, 1);
setupLiveInstances(clusterName, new int[] { 0 });
setupStateModel(clusterName);
// cluster data cache refresh pipeline
Pipeline dataRefresh = new Pipeline();
dataRefresh.addStage(new ReadClusterDataStage());
// rebalance pipeline
Pipeline rebalancePipeline = new Pipeline();
rebalancePipeline.addStage(new ResourceComputationStage());
rebalancePipeline.addStage(new CurrentStateComputationStage());
rebalancePipeline.addStage(new BestPossibleStateCalcStage());
rebalancePipeline.addStage(new IntermediateStateCalcStage());
rebalancePipeline.addStage(new MessageGenerationPhase());
rebalancePipeline.addStage(new MessageSelectionStage());
rebalancePipeline.addStage(new MessageThrottleStage());
rebalancePipeline.addStage(new TaskAssignmentStage());
// round1: set node0 currentState to OFFLINE
setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "OFFLINE");
runPipeline(event, dataRefresh);
runPipeline(event, rebalancePipeline);
MessageSelectionStageOutput msgSelOutput = event.getAttribute(AttributeName.MESSAGES_SELECTED.name());
List<Message> messages = msgSelOutput.getMessages(resourceName, new Partition(resourceName + "_0"));
Assert.assertEquals(messages.size(), 1, "Should output 1 message: OFFLINE-SLAVE for node0");
Message message = messages.get(0);
Assert.assertEquals(message.getFromState(), "OFFLINE");
Assert.assertEquals(message.getToState(), "SLAVE");
Assert.assertEquals(message.getTgtName(), "localhost_0");
// round2: updates node0 currentState to SLAVE but keep the
// message, make sure controller should not send S->M until removal is done
setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_1", "SLAVE");
runPipeline(event, dataRefresh);
refreshClusterConfig(clusterName, accessor);
runPipeline(event, rebalancePipeline);
msgSelOutput = event.getAttribute(AttributeName.MESSAGES_SELECTED.name());
messages = msgSelOutput.getMessages(resourceName, new Partition(resourceName + "_0"));
Assert.assertEquals(messages.size(), 0, "Should NOT output 1 message: SLAVE-MASTER for node1");
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.model.Message in project helix by apache.
the class TestAddStateModelFactoryAfterConnect method testBasic.
@Test
public void testBasic() throws Exception {
// Logger.getRootLogger().setLevel(Level.INFO);
String className = TestHelper.getTestClassName();
String methodName = TestHelper.getTestMethodName();
String clusterName = className + "_" + methodName;
final int n = 5;
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
MockParticipantManager[] participants = new MockParticipantManager[n];
// participant port
TestHelper.setupCluster(// participant port
clusterName, // participant port
ZK_ADDR, // participant port
12918, // participant name prefix
"localhost", // resource name prefix
"TestDB", // resources
1, // partitions per resource
10, // number of nodes
n, // replicas
3, "MasterSlave", // do rebalance
true);
ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
controller.syncStart();
// start participants
for (int i = 0; i < n; i++) {
String instanceName = "localhost_" + (12918 + i);
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
participants[i].syncStart();
}
boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
// add a new idealState without registering message handling factory
ClusterSetup setupTool = new ClusterSetup(ZK_ADDR);
setupTool.addResourceToCluster(clusterName, "TestDB1", 16, "MasterSlave");
ZkBaseDataAccessor<ZNRecord> baseAccessor = new ZkBaseDataAccessor<ZNRecord>(_gZkClient);
ZKHelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, baseAccessor);
Builder keyBuilder = accessor.keyBuilder();
IdealState idealState = accessor.getProperty(keyBuilder.idealStates("TestDB1"));
idealState.setStateModelFactoryName("TestDB1_Factory");
accessor.setProperty(keyBuilder.idealStates("TestDB1"), idealState);
setupTool.rebalanceStorageCluster(clusterName, "TestDB1", 3);
// assert that we have received OFFLINE->SLAVE messages for all partitions
int totalMsgs = 0;
for (int retry = 0; retry < 5; retry++) {
Thread.sleep(100);
totalMsgs = 0;
for (int i = 0; i < n; i++) {
List<Message> msgs = accessor.getChildValues(keyBuilder.messages(participants[i].getInstanceName()));
totalMsgs += msgs.size();
}
if (// partition# x replicas
totalMsgs == 48)
break;
}
Assert.assertEquals(totalMsgs, 48, "Should accumulated 48 unprocessed messages (1 O->S per partition per replica) because TestDB1 is added without state-model-factory but was " + totalMsgs);
// Logger.getRootLogger().setLevel(Level.INFO);
for (int i = 0; i < n; i++) {
participants[i].getStateMachineEngine().registerStateModelFactory("MasterSlave", new MockMSModelFactory(), "TestDB1_Factory");
}
result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
// clean up
// wait for all zk callbacks done
controller.syncStop();
for (int i = 0; i < 5; i++) {
participants[i].syncStop();
}
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
use of org.apache.helix.model.Message in project helix by apache.
the class TestMessageThrottle method testMessageThrottle.
@Test()
public void testMessageThrottle() throws Exception {
// Logger.getRootLogger().setLevel(Level.INFO);
String clusterName = getShortClassName();
MockParticipantManager[] participants = new MockParticipantManager[5];
System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
// participant start
TestHelper.setupCluster(// participant start
clusterName, // participant start
ZK_ADDR, // participant start
12918, // participant name prefix
"localhost", // resource name prefix
"TestDB", // resources
1, // partitions per resource
10, // number of nodes
5, // replicas
3, "MasterSlave", // do rebalance
true);
// setup message constraint
// "MESSAGE_TYPE=STATE_TRANSITION,TRANSITION=OFFLINE-SLAVE,INSTANCE=.*,CONSTRAINT_VALUE=1";
HelixAdmin admin = new ZKHelixAdmin(_gZkClient);
ConstraintItemBuilder builder = new ConstraintItemBuilder();
builder.addConstraintAttribute("MESSAGE_TYPE", "STATE_TRANSITION").addConstraintAttribute("INSTANCE", ".*").addConstraintAttribute("CONSTRAINT_VALUE", "1");
// Map<String, String> constraints = new TreeMap<String, String>();
// constraints.put("MESSAGE_TYPE", "STATE_TRANSITION");
// // constraints.put("TRANSITION", "OFFLINE-SLAVE");
// constraints.put("CONSTRAINT_VALUE", "1");
// constraints.put("INSTANCE", ".*");
admin.setConstraint(clusterName, ConstraintType.MESSAGE_CONSTRAINT, "constraint1", builder.build());
final ZKHelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
// make sure we never see more than 1 state transition message for each participant
final AtomicBoolean success = new AtomicBoolean(true);
for (int i = 0; i < 5; i++) {
String instanceName = "localhost_" + (12918 + i);
String msgPath = PropertyPathBuilder.instanceMessage(clusterName, instanceName);
_gZkClient.subscribeChildChanges(msgPath, new IZkChildListener() {
@Override
public void handleChildChange(String parentPath, List<String> currentChilds) throws Exception {
if (currentChilds != null && currentChilds.size() > 1) {
List<ZNRecord> records = accessor.getBaseDataAccessor().getChildren(parentPath, null, 0);
int transitionMsgCount = 0;
for (ZNRecord record : records) {
Message msg = new Message(record);
if (msg.getMsgType().equals(Message.MessageType.STATE_TRANSITION.name())) {
transitionMsgCount++;
}
}
if (transitionMsgCount > 1) {
success.set(false);
Assert.fail("Should not see more than 1 message");
}
}
}
});
}
ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
controller.syncStart();
// start participants
for (int i = 0; i < 5; i++) {
String instanceName = "localhost_" + (12918 + i);
participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
participants[i].syncStart();
}
boolean result = ClusterStateVerifier.verifyByZkCallback(new MasterNbInExtViewVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
Assert.assertTrue(result);
Assert.assertTrue(success.get());
// clean up
controller.syncStop();
for (int i = 0; i < 5; i++) {
participants[i].syncStop();
}
System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
Aggregations