Search in sources :

Example 46 with Message

use of org.apache.helix.model.Message in project helix by apache.

the class TestMsgSelectionStage method testMasterXferAfterMasterResume.

@Test
public void testMasterXferAfterMasterResume() {
    System.out.println("START testMasterXferAfterMasterResume at " + new Date(System.currentTimeMillis()));
    Map<String, LiveInstance> liveInstances = new HashMap<String, LiveInstance>();
    liveInstances.put("localhost_0", new LiveInstance("localhost_0"));
    liveInstances.put("localhost_1", new LiveInstance("localhost_1"));
    Map<String, String> currentStates = new HashMap<String, String>();
    currentStates.put("localhost_0", "SLAVE");
    currentStates.put("localhost_1", "SLAVE");
    Map<String, Message> pendingMessages = new HashMap<String, Message>();
    pendingMessages.put("localhost_1", newMessage("TestDB", "TestDB_0", "localhost_1", "SLAVE", "MASTER"));
    List<Message> messages = new ArrayList<Message>();
    messages.add(TestHelper.createMessage("msgId_0", "SLAVE", "MASTER", "localhost_0", "TestDB", "TestDB_0"));
    Map<String, Bounds> stateConstraints = new HashMap<String, Bounds>();
    stateConstraints.put("MASTER", new Bounds(0, 1));
    stateConstraints.put("SLAVE", new Bounds(0, 2));
    Map<String, Integer> stateTransitionPriorities = new HashMap<String, Integer>();
    stateTransitionPriorities.put("MASTER-SLAVE", 0);
    stateTransitionPriorities.put("SLAVE-MASTER", 1);
    List<Message> selectedMsg = new MessageSelectionStage().selectMessages(liveInstances, currentStates, pendingMessages, messages, stateConstraints, stateTransitionPriorities, BuiltInStateModelDefinitions.MasterSlave.getStateModelDefinition(), false);
    Assert.assertEquals(selectedMsg.size(), 0);
    System.out.println("END testMasterXferAfterMasterResume at " + new Date(System.currentTimeMillis()));
}
Also used : Message(org.apache.helix.model.Message) HashMap(java.util.HashMap) Bounds(org.apache.helix.controller.stages.MessageSelectionStage.Bounds) ArrayList(java.util.ArrayList) Date(java.util.Date) LiveInstance(org.apache.helix.model.LiveInstance) Test(org.testng.annotations.Test)

Example 47 with Message

use of org.apache.helix.model.Message in project helix by apache.

the class TestRebalancePipeline method testMsgTriggeredRebalance.

@Test
public void testMsgTriggeredRebalance() throws Exception {
    String clusterName = "CLUSTER_" + _className + "_msgTrigger";
    System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
    HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
    HelixManager manager = new DummyClusterManager(clusterName, accessor);
    ClusterEvent event = new ClusterEvent(ClusterEventType.Unknown);
    refreshClusterConfig(clusterName, accessor);
    final String resourceName = "testResource_dup";
    String[] resourceGroups = new String[] { resourceName };
    TestHelper.setupEmptyCluster(_gZkClient, clusterName);
    // ideal state: node0 is MASTER, node1 is SLAVE
    // replica=2 means 1 master and 1 slave
    setupIdealState(clusterName, new int[] { 0, 1 }, resourceGroups, 1, 2);
    setupStateModel(clusterName);
    setupInstances(clusterName, new int[] { 0, 1 });
    setupLiveInstances(clusterName, new int[] { 0, 1 });
    ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
    controller.syncStart();
    // round1: controller sends O->S to both node0 and node1
    Thread.sleep(1000);
    Builder keyBuilder = accessor.keyBuilder();
    List<String> messages = accessor.getChildNames(keyBuilder.messages("localhost_0"));
    Assert.assertEquals(messages.size(), 1);
    messages = accessor.getChildNames(keyBuilder.messages("localhost_1"));
    Assert.assertEquals(messages.size(), 1);
    // round2: node0 and node1 update current states but not removing messages
    // Since controller's rebalancer pipeline will GC pending messages after timeout, and both hosts
    // update current states to SLAVE, controller will send out rebalance message to
    // have one host to become master
    setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "SLAVE", true);
    setCurrentState(clusterName, "localhost_1", resourceName, resourceName + "_0", "session_1", "SLAVE", true);
    // Controller has 3s timeout, so after 1s, controller should not have GCed message
    Thread.sleep(1000);
    Assert.assertEquals(accessor.getChildValues(keyBuilder.messages("localhost_0")).size(), 1);
    Assert.assertEquals(accessor.getChildValues(keyBuilder.messages("localhost_1")).size(), 1);
    // After another 2 second, controller should cleanup messages and continue to rebalance
    Thread.sleep(3000);
    // Manually trigger another rebalance by touching current state
    setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "SLAVE");
    Thread.sleep(1000);
    List<Message> host0Msg = accessor.getChildValues(keyBuilder.messages("localhost_0"));
    List<Message> host1Msg = accessor.getChildValues(keyBuilder.messages("localhost_1"));
    List<Message> allMsgs = new ArrayList<>(host0Msg);
    allMsgs.addAll(host1Msg);
    Assert.assertEquals(allMsgs.size(), 1);
    Assert.assertEquals(allMsgs.get(0).getToState(), "MASTER");
    Assert.assertEquals(allMsgs.get(0).getFromState(), "SLAVE");
    // round3: node0 changes state to master, but failed to delete message,
    // controller will clean it up
    setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "MASTER", true);
    Thread.sleep(3500);
    // touch current state to trigger rebalance
    setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "MASTER", false);
    Thread.sleep(1000);
    Assert.assertTrue(accessor.getChildNames(keyBuilder.messages("localhost_0")).isEmpty());
    // round4: node0 has duplicated but valid message, i.e. there is a P2P message sent to it
    // due to error in the triggered pipeline, controller should remove duplicated message
    // immediately as the partition has became master 3 sec ago (there is already a timeout)
    Message sourceMsg = allMsgs.get(0);
    Message dupMsg = new Message(sourceMsg.getMsgType(), UUID.randomUUID().toString());
    dupMsg.getRecord().setSimpleFields(sourceMsg.getRecord().getSimpleFields());
    dupMsg.getRecord().setListFields(sourceMsg.getRecord().getListFields());
    dupMsg.getRecord().setMapFields(sourceMsg.getRecord().getMapFields());
    accessor.setProperty(dupMsg.getKey(accessor.keyBuilder(), dupMsg.getTgtName()), dupMsg);
    Thread.sleep(1000);
    messages = accessor.getChildNames(keyBuilder.messages("localhost_0"));
    Assert.assertTrue(messages.isEmpty());
    // round5: node0 has completely invalid message, controller should immediately delete it
    dupMsg.setFromState("SLAVE");
    dupMsg.setToState("OFFLINE");
    accessor.setProperty(dupMsg.getKey(accessor.keyBuilder(), dupMsg.getTgtName()), dupMsg);
    Thread.sleep(1000);
    messages = accessor.getChildNames(keyBuilder.messages("localhost_0"));
    Assert.assertTrue(messages.isEmpty());
    System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
Also used : HelixManager(org.apache.helix.HelixManager) Message(org.apache.helix.model.Message) Builder(org.apache.helix.PropertyKey.Builder) ArrayList(java.util.ArrayList) Date(java.util.Date) ClusterControllerManager(org.apache.helix.integration.manager.ClusterControllerManager) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) HelixDataAccessor(org.apache.helix.HelixDataAccessor) ZNRecord(org.apache.helix.ZNRecord) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) Test(org.testng.annotations.Test)

Example 48 with Message

use of org.apache.helix.model.Message in project helix by apache.

the class TestRebalancePipeline method testDuplicateMsg.

@Test
public void testDuplicateMsg() {
    String clusterName = "CLUSTER_" + _className + "_dup";
    System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
    HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
    refreshClusterConfig(clusterName, accessor);
    HelixManager manager = new DummyClusterManager(clusterName, accessor);
    ClusterEvent event = new ClusterEvent(ClusterEventType.Unknown);
    event.addAttribute(AttributeName.helixmanager.name(), manager);
    final String resourceName = "testResource_dup";
    String[] resourceGroups = new String[] { resourceName };
    // ideal state: node0 is MASTER, node1 is SLAVE
    // replica=2 means 1 master and 1 slave
    setupIdealState(clusterName, new int[] { 0 }, resourceGroups, 1, 1);
    setupLiveInstances(clusterName, new int[] { 0 });
    setupStateModel(clusterName);
    // cluster data cache refresh pipeline
    Pipeline dataRefresh = new Pipeline();
    dataRefresh.addStage(new ReadClusterDataStage());
    // rebalance pipeline
    Pipeline rebalancePipeline = new Pipeline();
    rebalancePipeline.addStage(new ResourceComputationStage());
    rebalancePipeline.addStage(new CurrentStateComputationStage());
    rebalancePipeline.addStage(new BestPossibleStateCalcStage());
    rebalancePipeline.addStage(new IntermediateStateCalcStage());
    rebalancePipeline.addStage(new MessageGenerationPhase());
    rebalancePipeline.addStage(new MessageSelectionStage());
    rebalancePipeline.addStage(new MessageThrottleStage());
    rebalancePipeline.addStage(new TaskAssignmentStage());
    // round1: set node0 currentState to OFFLINE
    setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "OFFLINE");
    runPipeline(event, dataRefresh);
    runPipeline(event, rebalancePipeline);
    MessageSelectionStageOutput msgSelOutput = event.getAttribute(AttributeName.MESSAGES_SELECTED.name());
    List<Message> messages = msgSelOutput.getMessages(resourceName, new Partition(resourceName + "_0"));
    Assert.assertEquals(messages.size(), 1, "Should output 1 message: OFFLINE-SLAVE for node0");
    Message message = messages.get(0);
    Assert.assertEquals(message.getFromState(), "OFFLINE");
    Assert.assertEquals(message.getToState(), "SLAVE");
    Assert.assertEquals(message.getTgtName(), "localhost_0");
    // round2: updates node0 currentState to SLAVE but keep the
    // message, make sure controller should not send S->M until removal is done
    setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_1", "SLAVE");
    runPipeline(event, dataRefresh);
    refreshClusterConfig(clusterName, accessor);
    runPipeline(event, rebalancePipeline);
    msgSelOutput = event.getAttribute(AttributeName.MESSAGES_SELECTED.name());
    messages = msgSelOutput.getMessages(resourceName, new Partition(resourceName + "_0"));
    Assert.assertEquals(messages.size(), 0, "Should NOT output 1 message: SLAVE-MASTER for node1");
    System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
Also used : Partition(org.apache.helix.model.Partition) HelixManager(org.apache.helix.HelixManager) Message(org.apache.helix.model.Message) Date(java.util.Date) Pipeline(org.apache.helix.controller.pipeline.Pipeline) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) HelixDataAccessor(org.apache.helix.HelixDataAccessor) ZNRecord(org.apache.helix.ZNRecord) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) Test(org.testng.annotations.Test)

Example 49 with Message

use of org.apache.helix.model.Message in project helix by apache.

the class TestAddStateModelFactoryAfterConnect method testBasic.

@Test
public void testBasic() throws Exception {
    // Logger.getRootLogger().setLevel(Level.INFO);
    String className = TestHelper.getTestClassName();
    String methodName = TestHelper.getTestMethodName();
    String clusterName = className + "_" + methodName;
    final int n = 5;
    System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
    MockParticipantManager[] participants = new MockParticipantManager[n];
    // participant port
    TestHelper.setupCluster(// participant port
    clusterName, // participant port
    ZK_ADDR, // participant port
    12918, // participant name prefix
    "localhost", // resource name prefix
    "TestDB", // resources
    1, // partitions per resource
    10, // number of nodes
    n, // replicas
    3, "MasterSlave", // do rebalance
    true);
    ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
    controller.syncStart();
    // start participants
    for (int i = 0; i < n; i++) {
        String instanceName = "localhost_" + (12918 + i);
        participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
        participants[i].syncStart();
    }
    boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
    Assert.assertTrue(result);
    // add a new idealState without registering message handling factory
    ClusterSetup setupTool = new ClusterSetup(ZK_ADDR);
    setupTool.addResourceToCluster(clusterName, "TestDB1", 16, "MasterSlave");
    ZkBaseDataAccessor<ZNRecord> baseAccessor = new ZkBaseDataAccessor<ZNRecord>(_gZkClient);
    ZKHelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, baseAccessor);
    Builder keyBuilder = accessor.keyBuilder();
    IdealState idealState = accessor.getProperty(keyBuilder.idealStates("TestDB1"));
    idealState.setStateModelFactoryName("TestDB1_Factory");
    accessor.setProperty(keyBuilder.idealStates("TestDB1"), idealState);
    setupTool.rebalanceStorageCluster(clusterName, "TestDB1", 3);
    // assert that we have received OFFLINE->SLAVE messages for all partitions
    int totalMsgs = 0;
    for (int retry = 0; retry < 5; retry++) {
        Thread.sleep(100);
        totalMsgs = 0;
        for (int i = 0; i < n; i++) {
            List<Message> msgs = accessor.getChildValues(keyBuilder.messages(participants[i].getInstanceName()));
            totalMsgs += msgs.size();
        }
        if (// partition# x replicas
        totalMsgs == 48)
            break;
    }
    Assert.assertEquals(totalMsgs, 48, "Should accumulated 48 unprocessed messages (1 O->S per partition per replica) because TestDB1 is added without state-model-factory but was " + totalMsgs);
    // Logger.getRootLogger().setLevel(Level.INFO);
    for (int i = 0; i < n; i++) {
        participants[i].getStateMachineEngine().registerStateModelFactory("MasterSlave", new MockMSModelFactory(), "TestDB1_Factory");
    }
    result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
    Assert.assertTrue(result);
    // clean up
    // wait for all zk callbacks done
    controller.syncStop();
    for (int i = 0; i < 5; i++) {
        participants[i].syncStop();
    }
    System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
Also used : ZkBaseDataAccessor(org.apache.helix.manager.zk.ZkBaseDataAccessor) MockParticipantManager(org.apache.helix.integration.manager.MockParticipantManager) Message(org.apache.helix.model.Message) Builder(org.apache.helix.PropertyKey.Builder) ClusterSetup(org.apache.helix.tools.ClusterSetup) BestPossAndExtViewZkVerifier(org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier) Date(java.util.Date) IdealState(org.apache.helix.model.IdealState) ClusterControllerManager(org.apache.helix.integration.manager.ClusterControllerManager) MockMSModelFactory(org.apache.helix.mock.participant.MockMSModelFactory) ZNRecord(org.apache.helix.ZNRecord) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) Test(org.testng.annotations.Test)

Example 50 with Message

use of org.apache.helix.model.Message in project helix by apache.

the class TestMessageThrottle method testMessageThrottle.

@Test()
public void testMessageThrottle() throws Exception {
    // Logger.getRootLogger().setLevel(Level.INFO);
    String clusterName = getShortClassName();
    MockParticipantManager[] participants = new MockParticipantManager[5];
    System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
    // participant start
    TestHelper.setupCluster(// participant start
    clusterName, // participant start
    ZK_ADDR, // participant start
    12918, // participant name prefix
    "localhost", // resource name prefix
    "TestDB", // resources
    1, // partitions per resource
    10, // number of nodes
    5, // replicas
    3, "MasterSlave", // do rebalance
    true);
    // setup message constraint
    // "MESSAGE_TYPE=STATE_TRANSITION,TRANSITION=OFFLINE-SLAVE,INSTANCE=.*,CONSTRAINT_VALUE=1";
    HelixAdmin admin = new ZKHelixAdmin(_gZkClient);
    ConstraintItemBuilder builder = new ConstraintItemBuilder();
    builder.addConstraintAttribute("MESSAGE_TYPE", "STATE_TRANSITION").addConstraintAttribute("INSTANCE", ".*").addConstraintAttribute("CONSTRAINT_VALUE", "1");
    // Map<String, String> constraints = new TreeMap<String, String>();
    // constraints.put("MESSAGE_TYPE", "STATE_TRANSITION");
    // // constraints.put("TRANSITION", "OFFLINE-SLAVE");
    // constraints.put("CONSTRAINT_VALUE", "1");
    // constraints.put("INSTANCE", ".*");
    admin.setConstraint(clusterName, ConstraintType.MESSAGE_CONSTRAINT, "constraint1", builder.build());
    final ZKHelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
    // make sure we never see more than 1 state transition message for each participant
    final AtomicBoolean success = new AtomicBoolean(true);
    for (int i = 0; i < 5; i++) {
        String instanceName = "localhost_" + (12918 + i);
        String msgPath = PropertyPathBuilder.instanceMessage(clusterName, instanceName);
        _gZkClient.subscribeChildChanges(msgPath, new IZkChildListener() {

            @Override
            public void handleChildChange(String parentPath, List<String> currentChilds) throws Exception {
                if (currentChilds != null && currentChilds.size() > 1) {
                    List<ZNRecord> records = accessor.getBaseDataAccessor().getChildren(parentPath, null, 0);
                    int transitionMsgCount = 0;
                    for (ZNRecord record : records) {
                        Message msg = new Message(record);
                        if (msg.getMsgType().equals(Message.MessageType.STATE_TRANSITION.name())) {
                            transitionMsgCount++;
                        }
                    }
                    if (transitionMsgCount > 1) {
                        success.set(false);
                        Assert.fail("Should not see more than 1 message");
                    }
                }
            }
        });
    }
    ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
    controller.syncStart();
    // start participants
    for (int i = 0; i < 5; i++) {
        String instanceName = "localhost_" + (12918 + i);
        participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
        participants[i].syncStart();
    }
    boolean result = ClusterStateVerifier.verifyByZkCallback(new MasterNbInExtViewVerifier(ZK_ADDR, clusterName));
    Assert.assertTrue(result);
    result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
    Assert.assertTrue(result);
    Assert.assertTrue(success.get());
    // clean up
    controller.syncStop();
    for (int i = 0; i < 5; i++) {
        participants[i].syncStop();
    }
    System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
Also used : MasterNbInExtViewVerifier(org.apache.helix.tools.ClusterStateVerifier.MasterNbInExtViewVerifier) MockParticipantManager(org.apache.helix.integration.manager.MockParticipantManager) Message(org.apache.helix.model.Message) ConstraintItemBuilder(org.apache.helix.model.builder.ConstraintItemBuilder) HelixAdmin(org.apache.helix.HelixAdmin) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) BestPossAndExtViewZkVerifier(org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier) Date(java.util.Date) ClusterControllerManager(org.apache.helix.integration.manager.ClusterControllerManager) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) IZkChildListener(org.I0Itec.zkclient.IZkChildListener) List(java.util.List) ZNRecord(org.apache.helix.ZNRecord) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) Test(org.testng.annotations.Test)

Aggregations

Message (org.apache.helix.model.Message)116 Test (org.testng.annotations.Test)53 ArrayList (java.util.ArrayList)36 HelixDataAccessor (org.apache.helix.HelixDataAccessor)30 Builder (org.apache.helix.PropertyKey.Builder)28 HelixManager (org.apache.helix.HelixManager)22 ZNRecord (org.apache.helix.ZNRecord)22 Criteria (org.apache.helix.Criteria)21 Date (java.util.Date)19 HashMap (java.util.HashMap)18 Partition (org.apache.helix.model.Partition)18 PropertyKey (org.apache.helix.PropertyKey)17 LiveInstance (org.apache.helix.model.LiveInstance)13 ZKHelixDataAccessor (org.apache.helix.manager.zk.ZKHelixDataAccessor)12 NotificationContext (org.apache.helix.NotificationContext)11 CurrentState (org.apache.helix.model.CurrentState)10 HelixException (org.apache.helix.HelixException)9 Resource (org.apache.helix.model.Resource)9 StringWriter (java.io.StringWriter)8 List (java.util.List)8