Search in sources :

Example 36 with HelixManager

use of org.apache.helix.HelixManager in project helix by apache.

the class TestHelixInstanceTag method testInstanceTag.

@Test
public void testInstanceTag() throws Exception {
    HelixManager manager = _controller;
    HelixDataAccessor accessor = manager.getHelixDataAccessor();
    String DB2 = "TestDB2";
    int partitions = 100;
    String DB2tag = "TestDB2_tag";
    int replica = 2;
    for (int i = 0; i < 2; i++) {
        String instanceName = "localhost_" + (12918 + i);
        _setupTool.getClusterManagementTool().addInstanceTag(CLUSTER_NAME, instanceName, DB2tag);
    }
    _setupTool.addResourceToCluster(CLUSTER_NAME, DB2, partitions, STATE_MODEL);
    _setupTool.rebalanceStorageCluster(CLUSTER_NAME, DB2, DB2tag, replica);
    boolean result = ClusterStateVerifier.verifyByZkCallback((new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, CLUSTER_NAME)));
    Assert.assertTrue(result, "Cluster verification fails");
    ExternalView ev = accessor.getProperty(accessor.keyBuilder().externalView(DB2));
    Set<String> hosts = new HashSet<String>();
    for (String p : ev.getPartitionSet()) {
        for (String hostName : ev.getStateMap(p).keySet()) {
            InstanceConfig config = accessor.getProperty(accessor.keyBuilder().instanceConfig(hostName));
            Assert.assertTrue(config.containsTag(DB2tag));
            hosts.add(hostName);
        }
    }
    Assert.assertEquals(hosts.size(), 2);
    String DB3 = "TestDB3";
    String DB3Tag = "TestDB3_tag";
    partitions = 10;
    replica = 3;
    for (int i = 1; i < 5; i++) {
        String instanceName = "localhost_" + (12918 + i);
        _setupTool.getClusterManagementTool().addInstanceTag(CLUSTER_NAME, instanceName, DB3Tag);
    }
    _setupTool.addResourceToCluster(CLUSTER_NAME, DB3, partitions, STATE_MODEL);
    _setupTool.rebalanceStorageCluster(CLUSTER_NAME, DB3, DB3Tag, replica);
    result = ClusterStateVerifier.verifyByZkCallback((new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, CLUSTER_NAME)));
    Assert.assertTrue(result, "Cluster verification fails");
    ev = accessor.getProperty(accessor.keyBuilder().externalView(DB3));
    hosts = new HashSet<String>();
    for (String p : ev.getPartitionSet()) {
        for (String hostName : ev.getStateMap(p).keySet()) {
            InstanceConfig config = accessor.getProperty(accessor.keyBuilder().instanceConfig(hostName));
            Assert.assertTrue(config.containsTag(DB3Tag));
            hosts.add(hostName);
        }
    }
    Assert.assertEquals(hosts.size(), 4);
}
Also used : ExternalView(org.apache.helix.model.ExternalView) HelixManager(org.apache.helix.HelixManager) ClusterStateVerifier(org.apache.helix.tools.ClusterStateVerifier) HelixDataAccessor(org.apache.helix.HelixDataAccessor) InstanceConfig(org.apache.helix.model.InstanceConfig) HashSet(java.util.HashSet) Test(org.testng.annotations.Test)

Example 37 with HelixManager

use of org.apache.helix.HelixManager in project helix by apache.

the class TestMessagePartitionStateMismatch method testStateMismatch.

@Test
public void testStateMismatch() throws InterruptedException {
    // String controllerName = CONTROLLER_PREFIX + "_0";
    // _startCMResultMap.get(controllerName)._manager;
    HelixManager manager = _controller;
    HelixDataAccessor accessor = manager.getHelixDataAccessor();
    Builder kb = accessor.keyBuilder();
    ExternalView ev = accessor.getProperty(kb.externalView(TEST_DB));
    Map<String, LiveInstance> liveinstanceMap = accessor.getChildValuesMap(accessor.keyBuilder().liveInstances());
    for (String instanceName : liveinstanceMap.keySet()) {
        String sessionid = liveinstanceMap.get(instanceName).getSessionId();
        for (String partition : ev.getPartitionSet()) {
            if (ev.getStateMap(partition).containsKey(instanceName)) {
                String uuid = UUID.randomUUID().toString();
                Message message = new Message(MessageType.STATE_TRANSITION, uuid);
                boolean rand = new Random().nextInt(10) > 5;
                if (ev.getStateMap(partition).get(instanceName).equals("MASTER")) {
                    message.setSrcName(manager.getInstanceName());
                    message.setTgtName(instanceName);
                    message.setMsgState(MessageState.NEW);
                    message.setPartitionName(partition);
                    message.setResourceName(TEST_DB);
                    message.setFromState(rand ? "SLAVE" : "OFFLINE");
                    message.setToState(rand ? "MASTER" : "SLAVE");
                    message.setTgtSessionId(sessionid);
                    message.setSrcSessionId(manager.getSessionId());
                    message.setStateModelDef("MasterSlave");
                    message.setStateModelFactoryName("DEFAULT");
                } else if (ev.getStateMap(partition).get(instanceName).equals("SLAVE")) {
                    message.setSrcName(manager.getInstanceName());
                    message.setTgtName(instanceName);
                    message.setMsgState(MessageState.NEW);
                    message.setPartitionName(partition);
                    message.setResourceName(TEST_DB);
                    message.setFromState(rand ? "MASTER" : "OFFLINE");
                    message.setToState(rand ? "SLAVE" : "SLAVE");
                    message.setTgtSessionId(sessionid);
                    message.setSrcSessionId(manager.getSessionId());
                    message.setStateModelDef("MasterSlave");
                    message.setStateModelFactoryName("DEFAULT");
                }
                accessor.setProperty(accessor.keyBuilder().message(instanceName, message.getMsgId()), message);
            }
        }
    }
    Thread.sleep(3000);
    ExternalView ev2 = accessor.getProperty(kb.externalView(TEST_DB));
    Assert.assertTrue(ev.equals(ev2));
}
Also used : ExternalView(org.apache.helix.model.ExternalView) HelixDataAccessor(org.apache.helix.HelixDataAccessor) HelixManager(org.apache.helix.HelixManager) LiveInstance(org.apache.helix.model.LiveInstance) Message(org.apache.helix.model.Message) Random(java.util.Random) Builder(org.apache.helix.PropertyKey.Builder) Test(org.testng.annotations.Test)

Example 38 with HelixManager

use of org.apache.helix.HelixManager in project helix by apache.

the class TestRebalancePipeline method testMsgTriggeredRebalance.

@Test
public void testMsgTriggeredRebalance() throws Exception {
    String clusterName = "CLUSTER_" + _className + "_msgTrigger";
    System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
    HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
    HelixManager manager = new DummyClusterManager(clusterName, accessor);
    ClusterEvent event = new ClusterEvent(ClusterEventType.Unknown);
    refreshClusterConfig(clusterName, accessor);
    final String resourceName = "testResource_dup";
    String[] resourceGroups = new String[] { resourceName };
    TestHelper.setupEmptyCluster(_gZkClient, clusterName);
    // ideal state: node0 is MASTER, node1 is SLAVE
    // replica=2 means 1 master and 1 slave
    setupIdealState(clusterName, new int[] { 0, 1 }, resourceGroups, 1, 2);
    setupStateModel(clusterName);
    setupInstances(clusterName, new int[] { 0, 1 });
    setupLiveInstances(clusterName, new int[] { 0, 1 });
    ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
    controller.syncStart();
    // round1: controller sends O->S to both node0 and node1
    Thread.sleep(1000);
    Builder keyBuilder = accessor.keyBuilder();
    List<String> messages = accessor.getChildNames(keyBuilder.messages("localhost_0"));
    Assert.assertEquals(messages.size(), 1);
    messages = accessor.getChildNames(keyBuilder.messages("localhost_1"));
    Assert.assertEquals(messages.size(), 1);
    // round2: node0 and node1 update current states but not removing messages
    // Since controller's rebalancer pipeline will GC pending messages after timeout, and both hosts
    // update current states to SLAVE, controller will send out rebalance message to
    // have one host to become master
    setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "SLAVE", true);
    setCurrentState(clusterName, "localhost_1", resourceName, resourceName + "_0", "session_1", "SLAVE", true);
    // Controller has 3s timeout, so after 1s, controller should not have GCed message
    Thread.sleep(1000);
    Assert.assertEquals(accessor.getChildValues(keyBuilder.messages("localhost_0")).size(), 1);
    Assert.assertEquals(accessor.getChildValues(keyBuilder.messages("localhost_1")).size(), 1);
    // After another 2 second, controller should cleanup messages and continue to rebalance
    Thread.sleep(3000);
    // Manually trigger another rebalance by touching current state
    setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "SLAVE");
    Thread.sleep(1000);
    List<Message> host0Msg = accessor.getChildValues(keyBuilder.messages("localhost_0"));
    List<Message> host1Msg = accessor.getChildValues(keyBuilder.messages("localhost_1"));
    List<Message> allMsgs = new ArrayList<>(host0Msg);
    allMsgs.addAll(host1Msg);
    Assert.assertEquals(allMsgs.size(), 1);
    Assert.assertEquals(allMsgs.get(0).getToState(), "MASTER");
    Assert.assertEquals(allMsgs.get(0).getFromState(), "SLAVE");
    // round3: node0 changes state to master, but failed to delete message,
    // controller will clean it up
    setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "MASTER", true);
    Thread.sleep(3500);
    // touch current state to trigger rebalance
    setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "MASTER", false);
    Thread.sleep(1000);
    Assert.assertTrue(accessor.getChildNames(keyBuilder.messages("localhost_0")).isEmpty());
    // round4: node0 has duplicated but valid message, i.e. there is a P2P message sent to it
    // due to error in the triggered pipeline, controller should remove duplicated message
    // immediately as the partition has became master 3 sec ago (there is already a timeout)
    Message sourceMsg = allMsgs.get(0);
    Message dupMsg = new Message(sourceMsg.getMsgType(), UUID.randomUUID().toString());
    dupMsg.getRecord().setSimpleFields(sourceMsg.getRecord().getSimpleFields());
    dupMsg.getRecord().setListFields(sourceMsg.getRecord().getListFields());
    dupMsg.getRecord().setMapFields(sourceMsg.getRecord().getMapFields());
    accessor.setProperty(dupMsg.getKey(accessor.keyBuilder(), dupMsg.getTgtName()), dupMsg);
    Thread.sleep(1000);
    messages = accessor.getChildNames(keyBuilder.messages("localhost_0"));
    Assert.assertTrue(messages.isEmpty());
    // round5: node0 has completely invalid message, controller should immediately delete it
    dupMsg.setFromState("SLAVE");
    dupMsg.setToState("OFFLINE");
    accessor.setProperty(dupMsg.getKey(accessor.keyBuilder(), dupMsg.getTgtName()), dupMsg);
    Thread.sleep(1000);
    messages = accessor.getChildNames(keyBuilder.messages("localhost_0"));
    Assert.assertTrue(messages.isEmpty());
    System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
Also used : HelixManager(org.apache.helix.HelixManager) Message(org.apache.helix.model.Message) Builder(org.apache.helix.PropertyKey.Builder) ArrayList(java.util.ArrayList) Date(java.util.Date) ClusterControllerManager(org.apache.helix.integration.manager.ClusterControllerManager) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) HelixDataAccessor(org.apache.helix.HelixDataAccessor) ZNRecord(org.apache.helix.ZNRecord) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) Test(org.testng.annotations.Test)

Example 39 with HelixManager

use of org.apache.helix.HelixManager in project helix by apache.

the class TestRebalancePipeline method testDuplicateMsg.

@Test
public void testDuplicateMsg() {
    String clusterName = "CLUSTER_" + _className + "_dup";
    System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
    HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
    refreshClusterConfig(clusterName, accessor);
    HelixManager manager = new DummyClusterManager(clusterName, accessor);
    ClusterEvent event = new ClusterEvent(ClusterEventType.Unknown);
    event.addAttribute(AttributeName.helixmanager.name(), manager);
    final String resourceName = "testResource_dup";
    String[] resourceGroups = new String[] { resourceName };
    // ideal state: node0 is MASTER, node1 is SLAVE
    // replica=2 means 1 master and 1 slave
    setupIdealState(clusterName, new int[] { 0 }, resourceGroups, 1, 1);
    setupLiveInstances(clusterName, new int[] { 0 });
    setupStateModel(clusterName);
    // cluster data cache refresh pipeline
    Pipeline dataRefresh = new Pipeline();
    dataRefresh.addStage(new ReadClusterDataStage());
    // rebalance pipeline
    Pipeline rebalancePipeline = new Pipeline();
    rebalancePipeline.addStage(new ResourceComputationStage());
    rebalancePipeline.addStage(new CurrentStateComputationStage());
    rebalancePipeline.addStage(new BestPossibleStateCalcStage());
    rebalancePipeline.addStage(new IntermediateStateCalcStage());
    rebalancePipeline.addStage(new MessageGenerationPhase());
    rebalancePipeline.addStage(new MessageSelectionStage());
    rebalancePipeline.addStage(new MessageThrottleStage());
    rebalancePipeline.addStage(new TaskAssignmentStage());
    // round1: set node0 currentState to OFFLINE
    setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_0", "OFFLINE");
    runPipeline(event, dataRefresh);
    runPipeline(event, rebalancePipeline);
    MessageSelectionStageOutput msgSelOutput = event.getAttribute(AttributeName.MESSAGES_SELECTED.name());
    List<Message> messages = msgSelOutput.getMessages(resourceName, new Partition(resourceName + "_0"));
    Assert.assertEquals(messages.size(), 1, "Should output 1 message: OFFLINE-SLAVE for node0");
    Message message = messages.get(0);
    Assert.assertEquals(message.getFromState(), "OFFLINE");
    Assert.assertEquals(message.getToState(), "SLAVE");
    Assert.assertEquals(message.getTgtName(), "localhost_0");
    // round2: updates node0 currentState to SLAVE but keep the
    // message, make sure controller should not send S->M until removal is done
    setCurrentState(clusterName, "localhost_0", resourceName, resourceName + "_0", "session_1", "SLAVE");
    runPipeline(event, dataRefresh);
    refreshClusterConfig(clusterName, accessor);
    runPipeline(event, rebalancePipeline);
    msgSelOutput = event.getAttribute(AttributeName.MESSAGES_SELECTED.name());
    messages = msgSelOutput.getMessages(resourceName, new Partition(resourceName + "_0"));
    Assert.assertEquals(messages.size(), 0, "Should NOT output 1 message: SLAVE-MASTER for node1");
    System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
Also used : Partition(org.apache.helix.model.Partition) HelixManager(org.apache.helix.HelixManager) Message(org.apache.helix.model.Message) Date(java.util.Date) Pipeline(org.apache.helix.controller.pipeline.Pipeline) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) HelixDataAccessor(org.apache.helix.HelixDataAccessor) ZNRecord(org.apache.helix.ZNRecord) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) Test(org.testng.annotations.Test)

Example 40 with HelixManager

use of org.apache.helix.HelixManager in project helix by apache.

the class TestBasicSpectator method TestSpectator.

@Test
public void TestSpectator() throws Exception {
    HelixManager relayHelixManager = HelixManagerFactory.getZKHelixManager(CLUSTER_NAME, null, InstanceType.SPECTATOR, ZK_ADDR);
    relayHelixManager.connect();
    relayHelixManager.addExternalViewChangeListener(this);
    _setupTool.addResourceToCluster(CLUSTER_NAME, "NextDB", 64, STATE_MODEL);
    _setupTool.rebalanceStorageCluster(CLUSTER_NAME, "NextDB", 3);
    boolean result = ClusterStateVerifier.verifyByPolling(new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, CLUSTER_NAME));
    Assert.assertTrue(result);
    Assert.assertTrue(_externalViewChanges.containsKey("NextDB"));
    Assert.assertTrue(_externalViewChanges.containsKey(TEST_DB));
}
Also used : HelixManager(org.apache.helix.HelixManager) ClusterStateVerifier(org.apache.helix.tools.ClusterStateVerifier) Test(org.testng.annotations.Test)

Aggregations

HelixManager (org.apache.helix.HelixManager)115 Test (org.testng.annotations.Test)49 HelixDataAccessor (org.apache.helix.HelixDataAccessor)35 ZNRecord (org.apache.helix.ZNRecord)28 Message (org.apache.helix.model.Message)23 PropertyKey (org.apache.helix.PropertyKey)20 Date (java.util.Date)18 ZKHelixDataAccessor (org.apache.helix.manager.zk.ZKHelixDataAccessor)17 Builder (org.apache.helix.PropertyKey.Builder)16 ArrayList (java.util.ArrayList)14 HashMap (java.util.HashMap)12 HelixException (org.apache.helix.HelixException)11 ExternalView (org.apache.helix.model.ExternalView)11 NotificationContext (org.apache.helix.NotificationContext)10 LiveInstance (org.apache.helix.model.LiveInstance)10 IdealState (org.apache.helix.model.IdealState)9 Criteria (org.apache.helix.Criteria)8 HelixAdmin (org.apache.helix.HelixAdmin)8 ZKHelixManager (org.apache.helix.manager.zk.ZKHelixManager)8 StringWriter (java.io.StringWriter)7