Search in sources :

Example 26 with BestPossAndExtViewZkVerifier

use of org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier in project helix by apache.

the class TestRestartParticipant method testRestartParticipant.

@Test()
public void testRestartParticipant() throws Exception {
    // Logger.getRootLogger().setLevel(Level.INFO);
    System.out.println("START testRestartParticipant at " + new Date(System.currentTimeMillis()));
    String clusterName = getShortClassName();
    MockParticipantManager[] participants = new MockParticipantManager[5];
    // participant port
    TestHelper.setupCluster(// participant port
    clusterName, // participant port
    ZK_ADDR, // participant port
    12918, // participant name prefix
    "localhost", // resource name prefix
    "TestDB", // resources
    1, // partitions per resource
    10, // number of nodes
    5, // replicas
    3, "MasterSlave", // do rebalance
    true);
    ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
    controller.syncStart();
    // start participants
    for (int i = 0; i < 5; i++) {
        String instanceName = "localhost_" + (12918 + i);
        if (i == 4) {
            participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
            participants[i].setTransition(new KillOtherTransition(participants[0]));
        } else {
            participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
        }
        participants[i].syncStart();
    }
    boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
    Assert.assertTrue(result);
    // restart
    Thread.sleep(500);
    MockParticipantManager participant = new MockParticipantManager(ZK_ADDR, participants[0].getClusterName(), participants[0].getInstanceName());
    System.err.println("Restart " + participant.getInstanceName());
    participant.syncStart();
    result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
    Assert.assertTrue(result);
    // clean up
    controller.syncStop();
    for (int i = 0; i < 5; i++) {
        participants[i].syncStop();
    }
    participant.syncStop();
    System.out.println("START testRestartParticipant at " + new Date(System.currentTimeMillis()));
}
Also used : ClusterControllerManager(org.apache.helix.integration.manager.ClusterControllerManager) MockParticipantManager(org.apache.helix.integration.manager.MockParticipantManager) BestPossAndExtViewZkVerifier(org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier) Date(java.util.Date) Test(org.testng.annotations.Test)

Example 27 with BestPossAndExtViewZkVerifier

use of org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier in project helix by apache.

the class TestZkReconnect method testZKReconnect.

@Test(enabled = false)
public void testZKReconnect() throws Exception {
    final AtomicReference<ZkServer> zkServerRef = new AtomicReference<ZkServer>();
    final int zkPort = TestHelper.getRandomPort();
    final String zkAddr = String.format("localhost:%d", zkPort);
    ZkServer zkServer = TestHelper.startZkServer(zkAddr);
    zkServerRef.set(zkServer);
    String className = TestHelper.getTestClassName();
    String methodName = TestHelper.getTestMethodName();
    String clusterName = className + "_" + methodName;
    // Setup cluster
    LOG.info("Setup clusters");
    ClusterSetup clusterSetup = new ClusterSetup(zkAddr);
    clusterSetup.addCluster(clusterName, true);
    // Registers and starts controller
    LOG.info("Starts controller");
    HelixManager controller = HelixManagerFactory.getZKHelixManager(clusterName, null, InstanceType.CONTROLLER, zkAddr);
    controller.connect();
    // Registers and starts participant
    LOG.info("Starts participant");
    String hostname = "localhost";
    String instanceId = String.format("%s_%d", hostname, 1);
    clusterSetup.addInstanceToCluster(clusterName, instanceId);
    HelixManager participant = HelixManagerFactory.getZKHelixManager(clusterName, instanceId, InstanceType.PARTICIPANT, zkAddr);
    participant.connect();
    LOG.info("Register state machine");
    final CountDownLatch latch = new CountDownLatch(1);
    participant.getStateMachineEngine().registerStateModelFactory("OnlineOffline", new StateModelFactory<StateModel>() {

        @Override
        public StateModel createNewStateModel(String resource, String stateUnitKey) {
            return new SimpleStateModel(latch);
        }
    }, "test");
    String resourceName = "test-resource";
    LOG.info("Ideal state assignment");
    HelixAdmin helixAdmin = participant.getClusterManagmentTool();
    helixAdmin.addResource(clusterName, resourceName, 1, "OnlineOffline", IdealState.RebalanceMode.CUSTOMIZED.toString());
    IdealState idealState = helixAdmin.getResourceIdealState(clusterName, resourceName);
    idealState.setReplicas("1");
    idealState.setStateModelFactoryName("test");
    idealState.setPartitionState(resourceName + "_0", instanceId, "ONLINE");
    LOG.info("Shutdown ZK server");
    TestHelper.stopZkServer(zkServerRef.get());
    Executors.newSingleThreadScheduledExecutor().schedule(new Runnable() {

        @Override
        public void run() {
            try {
                LOG.info("Restart ZK server");
                // zkServer.set(TestUtils.startZookeeper(zkDir, zkPort));
                zkServerRef.set(TestHelper.startZkServer(zkAddr, null, false));
            } catch (Exception e) {
                LOG.error(e.getMessage(), e);
            }
        }
    }, 2L, TimeUnit.SECONDS);
    // future.get();
    LOG.info("Before update ideal state");
    helixAdmin.setResourceIdealState(clusterName, resourceName, idealState);
    LOG.info("After update ideal state");
    LOG.info("Wait for OFFLINE->ONLINE state transition");
    try {
        Assert.assertTrue(latch.await(15, TimeUnit.SECONDS));
        // wait until stable state
        boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(zkAddr, clusterName));
        Assert.assertTrue(result);
    } finally {
        participant.disconnect();
        zkServerRef.get().shutdown();
    }
}
Also used : ZKHelixManager(org.apache.helix.manager.zk.ZKHelixManager) HelixManager(org.apache.helix.HelixManager) AtomicReference(java.util.concurrent.atomic.AtomicReference) ClusterSetup(org.apache.helix.tools.ClusterSetup) CountDownLatch(java.util.concurrent.CountDownLatch) HelixAdmin(org.apache.helix.HelixAdmin) BestPossAndExtViewZkVerifier(org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier) IdealState(org.apache.helix.model.IdealState) StateModel(org.apache.helix.participant.statemachine.StateModel) ZkServer(org.I0Itec.zkclient.ZkServer) Test(org.testng.annotations.Test)

Example 28 with BestPossAndExtViewZkVerifier

use of org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier in project helix by apache.

the class TestFullAutoNodeTagging method testSafeAssignment.

/**
 * Basic test for tagging behavior. 10 participants, of which 4 are tagged. Launch all 10,
 * checking external view every time a tagged node is started. Then shut down all 10, checking
 * external view every time a tagged node is killed.
 */
@Test
public void testSafeAssignment() throws Exception {
    final int NUM_PARTICIPANTS = 10;
    final int NUM_PARTITIONS = 4;
    final int NUM_REPLICAS = 2;
    final String RESOURCE_NAME = "TestDB0";
    final String TAG = "ASSIGNABLE";
    final String[] TAGGED_NODES = { "localhost_12920", "localhost_12922", "localhost_12924", "localhost_12925" };
    Set<String> taggedNodes = Sets.newHashSet(TAGGED_NODES);
    String className = TestHelper.getTestClassName();
    String methodName = TestHelper.getTestMethodName();
    String clusterName = className + "_" + methodName;
    System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
    // Set up cluster
    // participant port
    TestHelper.setupCluster(// participant port
    clusterName, // participant port
    ZK_ADDR, // participant port
    12918, // participant name prefix
    "localhost", // resource name prefix
    "TestDB", // resources
    1, // partitions per resource
    NUM_PARTITIONS, // number of nodes
    NUM_PARTICIPANTS, // replicas
    NUM_REPLICAS, // use FULL_AUTO mode to test node tagging
    "MasterSlave", // use FULL_AUTO mode to test node tagging
    RebalanceMode.FULL_AUTO, // do rebalance
    true);
    // tag the resource and participants
    HelixAdmin helixAdmin = new ZKHelixAdmin(ZK_ADDR);
    for (String taggedNode : TAGGED_NODES) {
        helixAdmin.addInstanceTag(clusterName, taggedNode, TAG);
    }
    IdealState idealState = helixAdmin.getResourceIdealState(clusterName, RESOURCE_NAME);
    idealState.setInstanceGroupTag(TAG);
    helixAdmin.setResourceIdealState(clusterName, RESOURCE_NAME, idealState);
    // start controller
    ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller");
    controller.syncStart();
    // start participants
    MockParticipantManager[] participants = new MockParticipantManager[NUM_PARTICIPANTS];
    for (int i = 0; i < NUM_PARTICIPANTS; i++) {
        final String instanceName = "localhost_" + (12918 + i);
        participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
        participants[i].syncStart();
        // ensure that everything is valid if this is a tagged node that is starting
        if (taggedNodes.contains(instanceName)) {
            // make sure that the best possible matches the external view
            Thread.sleep(500);
            boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
            Assert.assertTrue(result);
            // make sure that the tagged state of the nodes is still balanced
            result = ClusterStateVerifier.verifyByZkCallback(new TaggedZkVerifier(clusterName, RESOURCE_NAME, TAGGED_NODES, false));
            Assert.assertTrue(result, "initial assignment with all tagged nodes live is invalid");
        }
    }
    // cleanup
    for (int i = 0; i < NUM_PARTICIPANTS; i++) {
        String participantName = participants[i].getInstanceName();
        participants[i].syncStop();
        if (taggedNodes.contains(participantName)) {
            // check that the external view is still correct even after removing tagged nodes
            taggedNodes.remove(participantName);
            Thread.sleep(500);
            boolean result = ClusterStateVerifier.verifyByZkCallback(new TaggedZkVerifier(clusterName, RESOURCE_NAME, TAGGED_NODES, taggedNodes.isEmpty()));
            Assert.assertTrue(result, "incorrect state after removing " + participantName + ", " + taggedNodes + " remain");
        }
    }
    controller.syncStop();
    System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
Also used : MockParticipantManager(org.apache.helix.integration.manager.MockParticipantManager) HelixAdmin(org.apache.helix.HelixAdmin) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) BestPossAndExtViewZkVerifier(org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier) Date(java.util.Date) IdealState(org.apache.helix.model.IdealState) ClusterControllerManager(org.apache.helix.integration.manager.ClusterControllerManager) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) Test(org.testng.annotations.Test)

Example 29 with BestPossAndExtViewZkVerifier

use of org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier in project helix by apache.

the class TestNonOfflineInitState method testNonOfflineInitState.

@Test
public void testNonOfflineInitState() throws Exception {
    System.out.println("START testNonOfflineInitState at " + new Date(System.currentTimeMillis()));
    String clusterName = getShortClassName();
    setupCluster(// participant port
    clusterName, // participant port
    ZK_ADDR, // participant port
    12918, // participant name prefix
    "localhost", // resource name prefix
    "TestDB", // resources
    1, // partitions per resource
    10, // number of nodes
    5, // replicas
    1, "Bootstrap", // do rebalance
    true);
    ClusterControllerManager controller = new ClusterControllerManager(ZK_ADDR, clusterName, "controller_0");
    controller.syncStart();
    // start participants
    MockParticipantManager[] participants = new MockParticipantManager[5];
    for (int i = 0; i < 5; i++) {
        String instanceName = "localhost_" + (12918 + i);
        participants[i] = new MockParticipantManager(ZK_ADDR, clusterName, instanceName);
        // add a state model with non-OFFLINE initial state
        StateMachineEngine stateMach = participants[i].getStateMachineEngine();
        MockBootstrapModelFactory bootstrapFactory = new MockBootstrapModelFactory();
        stateMach.registerStateModelFactory("Bootstrap", bootstrapFactory);
        participants[i].syncStart();
    }
    boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
    Assert.assertTrue(result);
    // clean up
    controller.syncStop();
    for (int i = 0; i < 5; i++) {
        participants[i].syncStop();
    }
    System.out.println("END testNonOfflineInitState at " + new Date(System.currentTimeMillis()));
}
Also used : ClusterControllerManager(org.apache.helix.integration.manager.ClusterControllerManager) StateMachineEngine(org.apache.helix.participant.StateMachineEngine) MockParticipantManager(org.apache.helix.integration.manager.MockParticipantManager) MockBootstrapModelFactory(org.apache.helix.mock.participant.MockBootstrapModelFactory) BestPossAndExtViewZkVerifier(org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier) Date(java.util.Date) Test(org.testng.annotations.Test)

Example 30 with BestPossAndExtViewZkVerifier

use of org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier in project helix by apache.

the class TestConsecutiveZkSessionExpiry method testDistributedController.

@Test
public void testDistributedController() throws Exception {
    // Logger.getRootLogger().setLevel(Level.INFO);
    String className = TestHelper.getTestClassName();
    String methodName = TestHelper.getTestMethodName();
    String clusterName = className + "_" + methodName;
    int n = 2;
    System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
    // participant port
    TestHelper.setupCluster(// participant port
    clusterName, // participant port
    ZK_ADDR, // participant port
    12918, // participant name prefix
    "localhost", // resource name prefix
    "TestDB", // resources
    1, // partitions per resource
    4, // number of nodes
    n, // replicas
    2, "MasterSlave", // do rebalance
    true);
    ClusterDistributedController[] distributedControllers = new ClusterDistributedController[n];
    CountDownLatch startCountdown = new CountDownLatch(1);
    CountDownLatch endCountdown = new CountDownLatch(1);
    for (int i = 0; i < n; i++) {
        String contrllerName = "localhost_" + (12918 + i);
        distributedControllers[i] = new ClusterDistributedController(ZK_ADDR, clusterName, contrllerName);
        distributedControllers[i].getStateMachineEngine().registerStateModelFactory("MasterSlave", new MockMSModelFactory());
        if (i == 0) {
            distributedControllers[i].addPreConnectCallback(new PreConnectTestCallback(contrllerName, startCountdown, endCountdown));
        }
        distributedControllers[i].connect();
    }
    boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
    Assert.assertTrue(result);
    // expire the session of distributedController
    LOG.info("1st Expiring distributedController session...");
    String oldSessionId = distributedControllers[0].getSessionId();
    ZkTestHelper.asyncExpireSession(distributedControllers[0].getZkClient());
    String newSessionId = distributedControllers[0].getSessionId();
    LOG.info("Expried distributedController session. oldSessionId: " + oldSessionId + ", newSessionId: " + newSessionId);
    // expire zk session again during HelixManager#handleNewSession()
    startCountdown.await();
    LOG.info("2nd Expiring distributedController session...");
    oldSessionId = distributedControllers[0].getSessionId();
    ZkTestHelper.asyncExpireSession(distributedControllers[0].getZkClient());
    newSessionId = distributedControllers[0].getSessionId();
    LOG.info("Expried distributedController session. oldSessionId: " + oldSessionId + ", newSessionId: " + newSessionId);
    endCountdown.countDown();
    result = ClusterStateVerifier.verifyByPolling(new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
    Assert.assertTrue(result);
    // verify leader changes to localhost_12919
    HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
    PropertyKey.Builder keyBuilder = accessor.keyBuilder();
    Assert.assertNotNull(pollForProperty(LiveInstance.class, accessor, keyBuilder.liveInstance("localhost_12918"), true));
    LiveInstance leader = pollForProperty(LiveInstance.class, accessor, keyBuilder.controllerLeader(), true);
    Assert.assertNotNull(leader);
    Assert.assertEquals(leader.getId(), "localhost_12919");
    // check localhost_12918 has 2 handlers: message and data-accessor
    LOG.debug("handlers: " + TestHelper.printHandlers(distributedControllers[0]));
    List<CallbackHandler> handlers = distributedControllers[0].getHandlers();
    Assert.assertEquals(handlers.size(), 2, "Distributed controller should have 2 handler (message) after lose leadership, but was " + handlers.size());
    // clean up
    distributedControllers[0].disconnect();
    distributedControllers[1].disconnect();
    Assert.assertNull(pollForProperty(LiveInstance.class, accessor, keyBuilder.liveInstance("localhost_12918"), false));
    Assert.assertNull(pollForProperty(LiveInstance.class, accessor, keyBuilder.liveInstance("localhost_12919"), false));
    Assert.assertNull(pollForProperty(LiveInstance.class, accessor, keyBuilder.controllerLeader(), false));
    System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
Also used : CallbackHandler(org.apache.helix.manager.zk.CallbackHandler) CountDownLatch(java.util.concurrent.CountDownLatch) BestPossAndExtViewZkVerifier(org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier) Date(java.util.Date) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) HelixDataAccessor(org.apache.helix.HelixDataAccessor) MockMSModelFactory(org.apache.helix.mock.participant.MockMSModelFactory) LiveInstance(org.apache.helix.model.LiveInstance) ZNRecord(org.apache.helix.ZNRecord) PropertyKey(org.apache.helix.PropertyKey) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) Test(org.testng.annotations.Test)

Aggregations

BestPossAndExtViewZkVerifier (org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier)50 Test (org.testng.annotations.Test)47 Date (java.util.Date)40 MockParticipantManager (org.apache.helix.integration.manager.MockParticipantManager)33 ClusterControllerManager (org.apache.helix.integration.manager.ClusterControllerManager)26 ZNRecord (org.apache.helix.ZNRecord)24 ZKHelixDataAccessor (org.apache.helix.manager.zk.ZKHelixDataAccessor)18 HashMap (java.util.HashMap)11 HelixDataAccessor (org.apache.helix.HelixDataAccessor)11 Builder (org.apache.helix.PropertyKey.Builder)11 IdealState (org.apache.helix.model.IdealState)11 ClusterDistributedController (org.apache.helix.integration.manager.ClusterDistributedController)9 HelixAdmin (org.apache.helix.HelixAdmin)8 ZkBaseDataAccessor (org.apache.helix.manager.zk.ZkBaseDataAccessor)8 LiveInstance (org.apache.helix.model.LiveInstance)8 MasterNbInExtViewVerifier (org.apache.helix.tools.ClusterStateVerifier.MasterNbInExtViewVerifier)8 ClusterSetup (org.apache.helix.tools.ClusterSetup)7 PropertyKey (org.apache.helix.PropertyKey)6 ZKHelixAdmin (org.apache.helix.manager.zk.ZKHelixAdmin)6 ExternalView (org.apache.helix.model.ExternalView)6