Search in sources :

Example 1 with ControllerHistory

use of org.apache.helix.model.ControllerHistory in project helix by apache.

the class ZKHelixAdmin method processMaintenanceMode.

/**
 * Helper method for enabling/disabling maintenance mode.
 * @param clusterName
 * @param enabled
 * @param reason
 * @param internalReason
 * @param customFields
 * @param triggeringEntity
 */
private void processMaintenanceMode(String clusterName, final boolean enabled, final String reason, final MaintenanceSignal.AutoTriggerReason internalReason, final Map<String, String> customFields, final MaintenanceSignal.TriggeringEntity triggeringEntity) {
    HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_zkClient));
    PropertyKey.Builder keyBuilder = accessor.keyBuilder();
    logger.info("Cluster {} {} {} maintenance mode for reason {}.", clusterName, triggeringEntity == MaintenanceSignal.TriggeringEntity.CONTROLLER ? "automatically" : "manually", enabled ? "enters" : "exits", reason == null ? "NULL" : reason);
    final long currentTime = System.currentTimeMillis();
    if (!enabled) {
        // Exit maintenance mode
        accessor.removeProperty(keyBuilder.maintenance());
    } else {
        // Enter maintenance mode
        MaintenanceSignal maintenanceSignal = new MaintenanceSignal(MAINTENANCE_ZNODE_ID);
        if (reason != null) {
            maintenanceSignal.setReason(reason);
        }
        maintenanceSignal.setTimestamp(currentTime);
        maintenanceSignal.setTriggeringEntity(triggeringEntity);
        switch(triggeringEntity) {
            case CONTROLLER:
                // autoEnable
                maintenanceSignal.setAutoTriggerReason(internalReason);
                break;
            case USER:
            case UNKNOWN:
                // manuallyEnable
                if (customFields != null && !customFields.isEmpty()) {
                    // Enter all custom fields provided by the user
                    Map<String, String> simpleFields = maintenanceSignal.getRecord().getSimpleFields();
                    for (Map.Entry<String, String> entry : customFields.entrySet()) {
                        if (!simpleFields.containsKey(entry.getKey())) {
                            simpleFields.put(entry.getKey(), entry.getValue());
                        }
                    }
                }
                break;
        }
        if (!accessor.createMaintenance(maintenanceSignal)) {
            throw new HelixException("Failed to create maintenance signal!");
        }
    }
    // Record a MaintenanceSignal history
    if (!accessor.getBaseDataAccessor().update(keyBuilder.controllerLeaderHistory().getPath(), (DataUpdater<ZNRecord>) oldRecord -> {
        try {
            if (oldRecord == null) {
                oldRecord = new ZNRecord(PropertyType.HISTORY.toString());
            }
            return new ControllerHistory(oldRecord).updateMaintenanceHistory(enabled, reason, currentTime, internalReason, customFields, triggeringEntity);
        } catch (IOException e) {
            logger.error("Failed to update maintenance history! Exception: {}", e);
            return oldRecord;
        }
    }, AccessOption.PERSISTENT)) {
        logger.error("Failed to write maintenance history to ZK!");
    }
}
Also used : ControllerHistory(org.apache.helix.model.ControllerHistory) IOException(java.io.IOException) HelixException(org.apache.helix.HelixException) HelixDataAccessor(org.apache.helix.HelixDataAccessor) DataUpdater(org.apache.helix.zookeeper.zkclient.DataUpdater) MaintenanceSignal(org.apache.helix.model.MaintenanceSignal) Map(java.util.Map) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord) PropertyKey(org.apache.helix.PropertyKey)

Example 2 with ControllerHistory

use of org.apache.helix.model.ControllerHistory in project helix by apache.

the class ClusterAccessor method getControllerHistory.

/**
 * Reads HISTORY ZNode from the metadata store and generates a Map object that contains the
 * pertinent history entries depending on the history type.
 * @param clusterId
 * @param historyType
 * @return
 */
private Map<String, Object> getControllerHistory(String clusterId, ControllerHistory.HistoryType historyType) {
    HelixDataAccessor dataAccessor = getDataAccssor(clusterId);
    Map<String, Object> history = new HashMap<>();
    history.put(Properties.id.name(), clusterId);
    ControllerHistory historyRecord = dataAccessor.getProperty(dataAccessor.keyBuilder().controllerLeaderHistory());
    switch(historyType) {
        case CONTROLLER_LEADERSHIP:
            history.put(Properties.history.name(), historyRecord != null ? historyRecord.getHistoryList() : Collections.emptyList());
            break;
        case MAINTENANCE:
            history.put(ClusterProperties.maintenanceHistory.name(), historyRecord != null ? historyRecord.getMaintenanceHistoryList() : Collections.emptyList());
            break;
    }
    return history;
}
Also used : HelixDataAccessor(org.apache.helix.HelixDataAccessor) HashMap(java.util.HashMap) ControllerHistory(org.apache.helix.model.ControllerHistory)

Example 3 with ControllerHistory

use of org.apache.helix.model.ControllerHistory in project helix by apache.

the class TestClusterFreezeMode method testEnableFreezeMode.

/*
   * Tests below scenarios:
   * 1. cluster is in progress to freeze mode if there is a pending state transition message;
   * 2. after state transition is completed, cluster freeze mode is completed
   *
   * Also tests cluster status and management mode history recording.
   */
@Test
public void testEnableFreezeMode() throws Exception {
    String methodName = TestHelper.getTestMethodName();
    // Not in freeze mode
    PropertyKey.Builder keyBuilder = _accessor.keyBuilder();
    PauseSignal pauseSignal = _accessor.getProperty(keyBuilder.pause());
    Assert.assertNull(pauseSignal);
    // Block state transition for participants[1]
    CountDownLatch latch = new CountDownLatch(1);
    _participants[1].setTransition(new BlockingTransition(latch));
    // Send a state transition message to participants[1]
    Resource resource = new Resource("TestDB0");
    resource.setStateModelFactoryName(HelixConstants.DEFAULT_STATE_MODEL_FACTORY);
    Message message = MessageUtil.createStateTransitionMessage(_manager.getInstanceName(), _manager.getSessionId(), resource, "TestDB0_1", _participants[1].getInstanceName(), "SLAVE", "OFFLINE", _participants[1].getSessionId(), "MasterSlave");
    Assert.assertTrue(_accessor.updateProperty(keyBuilder.message(message.getTgtName(), message.getMsgId()), message));
    // Freeze cluster
    ClusterManagementModeRequest request = ClusterManagementModeRequest.newBuilder().withClusterName(_clusterName).withMode(ClusterManagementMode.Type.CLUSTER_FREEZE).withReason(methodName).build();
    _gSetupTool.getClusterManagementTool().setClusterManagementMode(request);
    // Wait for all live instances are marked as frozen
    verifyLiveInstanceStatus(_participants, LiveInstance.LiveInstanceStatus.FROZEN);
    // Pending ST message exists
    Assert.assertTrue(_gZkClient.exists(keyBuilder.message(message.getTgtName(), message.getMsgId()).getPath()));
    // Even live instance status is marked as frozen, Cluster is in progress to cluster freeze
    // because there is a pending state transition message
    ClusterStatus expectedClusterStatus = new ClusterStatus();
    expectedClusterStatus.setManagementMode(ClusterManagementMode.Type.CLUSTER_FREEZE);
    expectedClusterStatus.setManagementModeStatus(ClusterManagementMode.Status.IN_PROGRESS);
    verifyClusterStatus(expectedClusterStatus);
    // Verify management mode history is empty
    ControllerHistory controllerHistory = _accessor.getProperty(_accessor.keyBuilder().controllerLeaderHistory());
    List<String> managementHistory = controllerHistory.getManagementModeHistory();
    Assert.assertTrue(managementHistory.isEmpty());
    // Unblock to finish state transition and delete the ST message
    latch.countDown();
    // Verify live instance status and cluster status
    verifyLiveInstanceStatus(_participants, LiveInstance.LiveInstanceStatus.FROZEN);
    expectedClusterStatus = new ClusterStatus();
    expectedClusterStatus.setManagementMode(ClusterManagementMode.Type.CLUSTER_FREEZE);
    expectedClusterStatus.setManagementModeStatus(ClusterManagementMode.Status.COMPLETED);
    verifyClusterStatus(expectedClusterStatus);
    // Verify management mode history
    Assert.assertTrue(TestHelper.verify(() -> {
        ControllerHistory tmpControllerHistory = _accessor.getProperty(keyBuilder.controllerLeaderHistory());
        List<String> tmpManagementHistory = tmpControllerHistory.getManagementModeHistory();
        if (tmpManagementHistory == null || tmpManagementHistory.isEmpty()) {
            return false;
        }
        // Should not have duplicate entries
        if (tmpManagementHistory.size() > 1) {
            return false;
        }
        String lastHistory = tmpManagementHistory.get(0);
        return lastHistory.contains("MODE=" + ClusterManagementMode.Type.CLUSTER_FREEZE) && lastHistory.contains("STATUS=" + ClusterManagementMode.Status.COMPLETED) && lastHistory.contains("REASON=" + methodName);
    }, TestHelper.WAIT_DURATION));
}
Also used : Message(org.apache.helix.model.Message) ClusterManagementModeRequest(org.apache.helix.api.status.ClusterManagementModeRequest) Resource(org.apache.helix.model.Resource) ControllerHistory(org.apache.helix.model.ControllerHistory) CountDownLatch(java.util.concurrent.CountDownLatch) PauseSignal(org.apache.helix.model.PauseSignal) List(java.util.List) PropertyKey(org.apache.helix.PropertyKey) ClusterStatus(org.apache.helix.model.ClusterStatus) Test(org.testng.annotations.Test)

Example 4 with ControllerHistory

use of org.apache.helix.model.ControllerHistory in project helix by apache.

the class TestClusterMaintenanceMode method testMaintenanceHistory.

/**
 * Test that the Controller correctly records maintenance history in various situations.
 * @throws InterruptedException
 */
@Test(dependsOnMethods = "testMaxPartitionLimit")
public void testMaintenanceHistory() throws Exception {
    // In maintenance mode, by controller, for MAX_PARTITION_PER_INSTANCE_EXCEEDED
    ControllerHistory history = _dataAccessor.getProperty(_keyBuilder.controllerLeaderHistory());
    Map<String, String> lastHistoryEntry = convertStringToMap(history.getMaintenanceHistoryList().get(history.getMaintenanceHistoryList().size() - 1));
    // **The KV pairs are hard-coded in here for the ease of reading!**
    Assert.assertEquals(lastHistoryEntry.get("OPERATION_TYPE"), "ENTER");
    Assert.assertEquals(lastHistoryEntry.get("TRIGGERED_BY"), "CONTROLLER");
    Assert.assertEquals(lastHistoryEntry.get("AUTO_TRIGGER_REASON"), "MAX_PARTITION_PER_INSTANCE_EXCEEDED");
    // Remove the maxPartitionPerInstance config
    ClusterConfig clusterConfig = _manager.getConfigAccessor().getClusterConfig(CLUSTER_NAME);
    clusterConfig.setMaxPartitionsPerInstance(-1);
    _manager.getConfigAccessor().setClusterConfig(CLUSTER_NAME, clusterConfig);
    TestHelper.verify(() -> _dataAccessor.getProperty(_keyBuilder.maintenance()) == null, 2000L);
    // Now check that the cluster exited maintenance
    // EXIT, CONTROLLER, for MAX_PARTITION_PER_INSTANCE_EXCEEDED
    history = _dataAccessor.getProperty(_keyBuilder.controllerLeaderHistory());
    lastHistoryEntry = convertStringToMap(history.getMaintenanceHistoryList().get(history.getMaintenanceHistoryList().size() - 1));
    Assert.assertEquals(lastHistoryEntry.get("OPERATION_TYPE"), "EXIT");
    Assert.assertEquals(lastHistoryEntry.get("TRIGGERED_BY"), "CONTROLLER");
    Assert.assertEquals(lastHistoryEntry.get("AUTO_TRIGGER_REASON"), "MAX_PARTITION_PER_INSTANCE_EXCEEDED");
    // Manually put the cluster in maintenance with a custom field
    Map<String, String> customFieldMap = ImmutableMap.of("k1", "v1", "k2", "v2");
    _gSetupTool.getClusterManagementTool().manuallyEnableMaintenanceMode(CLUSTER_NAME, true, TestHelper.getTestMethodName(), customFieldMap);
    TestHelper.verify(() -> _dataAccessor.getProperty(_keyBuilder.maintenance()) != null, 2000L);
    // ENTER, USER, for reason TEST, no internalReason
    history = _dataAccessor.getProperty(_keyBuilder.controllerLeaderHistory());
    lastHistoryEntry = convertStringToMap(history.getMaintenanceHistoryList().get(history.getMaintenanceHistoryList().size() - 1));
    Assert.assertEquals(lastHistoryEntry.get("OPERATION_TYPE"), "ENTER");
    Assert.assertEquals(lastHistoryEntry.get("TRIGGERED_BY"), "USER");
    Assert.assertEquals(lastHistoryEntry.get("REASON"), TestHelper.getTestMethodName());
    Assert.assertNull(lastHistoryEntry.get("AUTO_TRIGGER_REASON"));
}
Also used : ControllerHistory(org.apache.helix.model.ControllerHistory) ClusterConfig(org.apache.helix.model.ClusterConfig) Test(org.testng.annotations.Test)

Example 5 with ControllerHistory

use of org.apache.helix.model.ControllerHistory in project helix by apache.

the class TestClusterFreezeMode method testUnfreezeCluster.

@Test(dependsOnMethods = "testCreateResourceWhenFrozen")
public void testUnfreezeCluster() throws Exception {
    String methodName = TestHelper.getTestMethodName();
    // Unfreeze cluster
    ClusterManagementModeRequest request = ClusterManagementModeRequest.newBuilder().withClusterName(_clusterName).withMode(ClusterManagementMode.Type.NORMAL).withReason(methodName).build();
    _gSetupTool.getClusterManagementTool().setClusterManagementMode(request);
    verifyLiveInstanceStatus(_participants, LiveInstance.LiveInstanceStatus.NORMAL);
    ClusterStatus expectedClusterStatus = new ClusterStatus();
    expectedClusterStatus.setManagementMode(ClusterManagementMode.Type.NORMAL);
    expectedClusterStatus.setManagementModeStatus(ClusterManagementMode.Status.COMPLETED);
    verifyClusterStatus(expectedClusterStatus);
    // Verify management mode history: NORMAL + COMPLETED
    Assert.assertTrue(TestHelper.verify(() -> {
        ControllerHistory history = _accessor.getProperty(_accessor.keyBuilder().controllerLeaderHistory());
        List<String> managementHistory = history.getManagementModeHistory();
        if (managementHistory == null || managementHistory.isEmpty()) {
            return false;
        }
        String lastHistory = managementHistory.get(managementHistory.size() - 1);
        return lastHistory.contains("MODE=" + ClusterManagementMode.Type.NORMAL) && lastHistory.contains("STATUS=" + ClusterManagementMode.Status.COMPLETED);
    }, TestHelper.WAIT_DURATION));
    // Verify cluster's normal rebalance ability after unfrozen.
    Assert.assertTrue(ClusterStateVerifier.verifyByZkCallback(new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, _clusterName)));
}
Also used : ClusterManagementModeRequest(org.apache.helix.api.status.ClusterManagementModeRequest) ControllerHistory(org.apache.helix.model.ControllerHistory) List(java.util.List) ClusterStatus(org.apache.helix.model.ClusterStatus) Test(org.testng.annotations.Test)

Aggregations

ControllerHistory (org.apache.helix.model.ControllerHistory)8 PropertyKey (org.apache.helix.PropertyKey)5 Test (org.testng.annotations.Test)5 List (java.util.List)3 HelixDataAccessor (org.apache.helix.HelixDataAccessor)3 HelixException (org.apache.helix.HelixException)3 ClusterManagementModeRequest (org.apache.helix.api.status.ClusterManagementModeRequest)3 ClusterStatus (org.apache.helix.model.ClusterStatus)3 HashMap (java.util.HashMap)2 HelixManager (org.apache.helix.HelixManager)2 LiveInstance (org.apache.helix.model.LiveInstance)2 ZNRecord (org.apache.helix.zookeeper.datamodel.ZNRecord)2 IOException (java.io.IOException)1 ManagementFactory (java.lang.management.ManagementFactory)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 AccessOption (org.apache.helix.AccessOption)1 HelixTimerTask (org.apache.helix.HelixTimerTask)1 InstanceType (org.apache.helix.InstanceType)1