Search in sources :

Example 86 with HelixDataAccessor

use of org.apache.helix.HelixDataAccessor in project helix by apache.

the class TestConsecutiveZkSessionExpiry method testDistributedController.

@Test
public void testDistributedController() throws Exception {
    // Logger.getRootLogger().setLevel(Level.INFO);
    String className = TestHelper.getTestClassName();
    String methodName = TestHelper.getTestMethodName();
    String clusterName = className + "_" + methodName;
    int n = 2;
    System.out.println("START " + clusterName + " at " + new Date(System.currentTimeMillis()));
    // participant port
    TestHelper.setupCluster(// participant port
    clusterName, // participant port
    ZK_ADDR, // participant port
    12918, // participant name prefix
    "localhost", // resource name prefix
    "TestDB", // resources
    1, // partitions per resource
    4, // number of nodes
    n, // replicas
    2, "MasterSlave", // do rebalance
    true);
    ClusterDistributedController[] distributedControllers = new ClusterDistributedController[n];
    CountDownLatch startCountdown = new CountDownLatch(1);
    CountDownLatch endCountdown = new CountDownLatch(1);
    for (int i = 0; i < n; i++) {
        String contrllerName = "localhost_" + (12918 + i);
        distributedControllers[i] = new ClusterDistributedController(ZK_ADDR, clusterName, contrllerName);
        distributedControllers[i].getStateMachineEngine().registerStateModelFactory("MasterSlave", new MockMSModelFactory());
        if (i == 0) {
            distributedControllers[i].addPreConnectCallback(new PreConnectTestCallback(contrllerName, startCountdown, endCountdown));
        }
        distributedControllers[i].connect();
    }
    boolean result = ClusterStateVerifier.verifyByZkCallback(new BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
    Assert.assertTrue(result);
    // expire the session of distributedController
    LOG.info("1st Expiring distributedController session...");
    String oldSessionId = distributedControllers[0].getSessionId();
    ZkTestHelper.asyncExpireSession(distributedControllers[0].getZkClient());
    String newSessionId = distributedControllers[0].getSessionId();
    LOG.info("Expried distributedController session. oldSessionId: " + oldSessionId + ", newSessionId: " + newSessionId);
    // expire zk session again during HelixManager#handleNewSession()
    startCountdown.await();
    LOG.info("2nd Expiring distributedController session...");
    oldSessionId = distributedControllers[0].getSessionId();
    ZkTestHelper.asyncExpireSession(distributedControllers[0].getZkClient());
    newSessionId = distributedControllers[0].getSessionId();
    LOG.info("Expried distributedController session. oldSessionId: " + oldSessionId + ", newSessionId: " + newSessionId);
    endCountdown.countDown();
    result = ClusterStateVerifier.verifyByPolling(new ClusterStateVerifier.BestPossAndExtViewZkVerifier(ZK_ADDR, clusterName));
    Assert.assertTrue(result);
    // verify leader changes to localhost_12919
    HelixDataAccessor accessor = new ZKHelixDataAccessor(clusterName, new ZkBaseDataAccessor<ZNRecord>(_gZkClient));
    PropertyKey.Builder keyBuilder = accessor.keyBuilder();
    Assert.assertNotNull(pollForProperty(LiveInstance.class, accessor, keyBuilder.liveInstance("localhost_12918"), true));
    LiveInstance leader = pollForProperty(LiveInstance.class, accessor, keyBuilder.controllerLeader(), true);
    Assert.assertNotNull(leader);
    Assert.assertEquals(leader.getId(), "localhost_12919");
    // check localhost_12918 has 2 handlers: message and data-accessor
    LOG.debug("handlers: " + TestHelper.printHandlers(distributedControllers[0]));
    List<CallbackHandler> handlers = distributedControllers[0].getHandlers();
    Assert.assertEquals(handlers.size(), 2, "Distributed controller should have 2 handler (message) after lose leadership, but was " + handlers.size());
    // clean up
    distributedControllers[0].disconnect();
    distributedControllers[1].disconnect();
    Assert.assertNull(pollForProperty(LiveInstance.class, accessor, keyBuilder.liveInstance("localhost_12918"), false));
    Assert.assertNull(pollForProperty(LiveInstance.class, accessor, keyBuilder.liveInstance("localhost_12919"), false));
    Assert.assertNull(pollForProperty(LiveInstance.class, accessor, keyBuilder.controllerLeader(), false));
    System.out.println("END " + clusterName + " at " + new Date(System.currentTimeMillis()));
}
Also used : CallbackHandler(org.apache.helix.manager.zk.CallbackHandler) CountDownLatch(java.util.concurrent.CountDownLatch) BestPossAndExtViewZkVerifier(org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier) Date(java.util.Date) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) HelixDataAccessor(org.apache.helix.HelixDataAccessor) MockMSModelFactory(org.apache.helix.mock.participant.MockMSModelFactory) LiveInstance(org.apache.helix.model.LiveInstance) ZNRecord(org.apache.helix.ZNRecord) PropertyKey(org.apache.helix.PropertyKey) ZKHelixDataAccessor(org.apache.helix.manager.zk.ZKHelixDataAccessor) Test(org.testng.annotations.Test)

Example 87 with HelixDataAccessor

use of org.apache.helix.HelixDataAccessor in project ambry by linkedin.

the class HelixHealthReportAggregationTaskTest method initializeNodeReports.

/**
 * Initialize the reports and create instances in helix if not exists.
 * @param type The type of reports to create
 * @param numNode The number of nodes to initiate.
 * @param startingPort The starting port number, which will then be incremented to represent different nodes.
 * @throws IOException
 */
private void initializeNodeReports(StatsReportType type, int numNode, int startingPort) throws IOException {
    String healthReportName = type == StatsReportType.ACCOUNT_REPORT ? HEALTH_REPORT_NAME_ACCOUNT : HEALTH_REPORT_NAME_PARTITION;
    String statsFieldName = type == StatsReportType.ACCOUNT_REPORT ? STATS_FIELD_NAME_ACCOUNT : STATS_FIELD_NAME_PARTITION;
    List<StatsSnapshot> storeSnapshots = new ArrayList<>();
    Random random = new Random();
    for (int i = 3; i < 6; i++) {
        storeSnapshots.add(TestUtils.generateStoreStats(i, 3, random, type));
    }
    StatsWrapper nodeStats = TestUtils.generateNodeStats(storeSnapshots, 1000, type);
    String nodeStatsJSON = mapper.writeValueAsString(nodeStats);
    HelixDataAccessor dataAccessor = mockHelixManager.getHelixDataAccessor();
    for (int i = 0; i < numNode; i++) {
        String instanceName = ClusterMapUtils.getInstanceName("localhost", startingPort);
        InstanceConfig instanceConfig = new InstanceConfig(instanceName);
        instanceConfig.setHostName("localhost");
        instanceConfig.setPort(Integer.toString(startingPort));
        mockHelixAdmin.addInstance(CLUSTER_NAME, instanceConfig);
        PropertyKey key = dataAccessor.keyBuilder().healthReport(instanceName, healthReportName);
        ZNRecord znRecord = new ZNRecord(instanceName);
        // Set the same reports for all instances
        znRecord.setSimpleField(statsFieldName, nodeStatsJSON);
        HelixProperty helixProperty = new HelixProperty(znRecord);
        dataAccessor.setProperty(key, helixProperty);
        startingPort++;
    }
}
Also used : HelixDataAccessor(org.apache.helix.HelixDataAccessor) Random(java.util.Random) InstanceConfig(org.apache.helix.model.InstanceConfig) HelixProperty(org.apache.helix.HelixProperty) ArrayList(java.util.ArrayList) StatsWrapper(com.github.ambry.server.StatsWrapper) PropertyKey(org.apache.helix.PropertyKey) ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord) StatsSnapshot(com.github.ambry.server.StatsSnapshot)

Example 88 with HelixDataAccessor

use of org.apache.helix.HelixDataAccessor in project pinot by linkedin.

the class HelixExternalViewBasedRouting method processExternalViewChange.

public void processExternalViewChange() {
    long startTime = System.currentTimeMillis();
    // Get list of tables that we're serving
    List<String> tablesServed = new ArrayList<>(_lastKnownExternalViewVersionMap.keySet());
    if (tablesServed.isEmpty()) {
        return;
    }
    // Build list of external views to fetch
    HelixDataAccessor helixDataAccessor = _helixManager.getHelixDataAccessor();
    PropertyKey.Builder propertyKeyBuilder = helixDataAccessor.keyBuilder();
    List<String> externalViewPaths = new ArrayList<>(tablesServed.size());
    for (String tableName : tablesServed) {
        PropertyKey propertyKey = propertyKeyBuilder.externalView(tableName);
        externalViewPaths.add(propertyKey.getPath());
    }
    // Get znode stats for all tables that we're serving
    long statStartTime = System.currentTimeMillis();
    Stat[] externalViewStats = helixDataAccessor.getBaseDataAccessor().getStats(externalViewPaths, AccessOption.PERSISTENT);
    long statEndTime = System.currentTimeMillis();
    // Make a list of external views that changed
    List<String> tablesThatChanged = new ArrayList<>();
    long evCheckStartTime = System.currentTimeMillis();
    for (int i = 0; i < externalViewStats.length; i++) {
        Stat externalViewStat = externalViewStats[i];
        if (externalViewStat != null) {
            String currentTableName = tablesServed.get(i);
            int currentExternalViewVersion = externalViewStat.getVersion();
            int lastKnownExternalViewVersion = _lastKnownExternalViewVersionMap.get(currentTableName);
            if (lastKnownExternalViewVersion != currentExternalViewVersion) {
                tablesThatChanged.add(currentTableName);
            }
        }
    }
    long evCheckEndTime = System.currentTimeMillis();
    // Fetch the instance configs and update the routing tables for the tables that changed
    long icFetchTime = 0;
    long rebuildStartTime = System.currentTimeMillis();
    if (!tablesThatChanged.isEmpty()) {
        // Fetch instance configs
        long icFetchStart = System.currentTimeMillis();
        List<InstanceConfig> instanceConfigs = helixDataAccessor.getChildValues(propertyKeyBuilder.instanceConfigs());
        long icFetchEnd = System.currentTimeMillis();
        icFetchTime = icFetchEnd - icFetchStart;
        for (String tableThatChanged : tablesThatChanged) {
            // We ignore the external views given by Helix on external view change and fetch the latest version as our
            // version of Helix (0.6.5) does not batch external view change messages.
            ExternalView externalView = helixDataAccessor.getProperty(propertyKeyBuilder.externalView(tableThatChanged));
            buildRoutingTable(tableThatChanged, externalView, instanceConfigs);
        }
    }
    long rebuildEndTime = System.currentTimeMillis();
    long endTime = System.currentTimeMillis();
    LOGGER.info("Processed external view change in {} ms (stat {} ms, EV check {} ms, IC fetch {} ms, rebuild {} ms), routing tables rebuilt for tables {}, {} / {} routing tables rebuilt", (endTime - startTime), (statEndTime - statStartTime), (evCheckEndTime - evCheckStartTime), icFetchTime, (rebuildEndTime - rebuildStartTime), tablesThatChanged, tablesThatChanged.size(), tablesServed.size());
}
Also used : ExternalView(org.apache.helix.model.ExternalView) ArrayList(java.util.ArrayList) HelixDataAccessor(org.apache.helix.HelixDataAccessor) Stat(org.apache.zookeeper.data.Stat) InstanceConfig(org.apache.helix.model.InstanceConfig) PropertyKey(org.apache.helix.PropertyKey)

Example 89 with HelixDataAccessor

use of org.apache.helix.HelixDataAccessor in project pinot by linkedin.

the class PinotHelixResourceManager method toggleSegmentState.

/**
   * Toggle the status of segment between ONLINE (enable = true) and OFFLINE (enable = FALSE).
   *
   * @param tableName: Name of table to which the segment belongs.
   * @param segments: List of segment for which to toggle the status.
   * @param enable: True for ONLINE, False for OFFLINE.
   * @param timeoutInSeconds Time out for toggling segment state.
   * @return
   */
public PinotResourceManagerResponse toggleSegmentState(String tableName, List<String> segments, boolean enable, long timeoutInSeconds) {
    String status = (enable) ? "ONLINE" : "OFFLINE";
    HelixDataAccessor helixDataAccessor = _helixZkManager.getHelixDataAccessor();
    PropertyKey idealStatePropertyKey = _keyBuilder.idealStates(tableName);
    boolean updateSuccessful;
    boolean externalViewUpdateSuccessful = true;
    long deadline = System.currentTimeMillis() + 1000 * timeoutInSeconds;
    // Set all partitions to offline to unload them from the servers
    do {
        final IdealState idealState = _helixAdmin.getResourceIdealState(_helixClusterName, tableName);
        for (String segmentName : segments) {
            final Set<String> instanceSet = idealState.getInstanceSet(segmentName);
            if (instanceSet == null || instanceSet.isEmpty()) {
                return new PinotResourceManagerResponse("Segment " + segmentName + " not found.", false);
            }
            for (final String instance : instanceSet) {
                idealState.setPartitionState(segmentName, instance, status);
            }
        }
        updateSuccessful = helixDataAccessor.updateProperty(idealStatePropertyKey, idealState);
    } while (!updateSuccessful && (System.currentTimeMillis() <= deadline));
    // Check that the ideal state has been updated.
    LOGGER.info("Ideal state successfully updated, waiting to update external view");
    IdealState updatedIdealState = _helixAdmin.getResourceIdealState(_helixClusterName, tableName);
    for (String segmentName : segments) {
        Map<String, String> instanceStateMap = updatedIdealState.getInstanceStateMap(segmentName);
        for (String state : instanceStateMap.values()) {
            if (!status.equals(state)) {
                return new PinotResourceManagerResponse("Error: Failed to update Ideal state when setting status " + status + " for segment " + segmentName, false);
            }
        }
        // Wait until the partitions are offline in the external view
        if (!ifExternalViewChangeReflectedForState(tableName, segmentName, status, (timeoutInSeconds * 1000), true)) {
            externalViewUpdateSuccessful = false;
        }
    }
    return (externalViewUpdateSuccessful) ? new PinotResourceManagerResponse(("Success: Segment(s) " + " now " + status), true) : new PinotResourceManagerResponse("Error: Timed out. External view not completely updated", false);
}
Also used : HelixDataAccessor(org.apache.helix.HelixDataAccessor) PropertyKey(org.apache.helix.PropertyKey) IdealState(org.apache.helix.model.IdealState)

Example 90 with HelixDataAccessor

use of org.apache.helix.HelixDataAccessor in project ambry by linkedin.

the class HelixHealthReportAggregatorTask method run.

@Override
public TaskResult run() {
    Pair<StatsSnapshot, StatsSnapshot> results = null;
    Exception exception = null;
    try {
        HelixDataAccessor helixDataAccessor = manager.getHelixDataAccessor();
        List<String> instanceNames = manager.getClusterManagmentTool().getInstancesInCluster(manager.getClusterName());
        Map<String, String> statsWrappersJSON = new HashMap<>();
        for (String instanceName : instanceNames) {
            PropertyKey.Builder keyBuilder = helixDataAccessor.keyBuilder();
            HelixProperty record = helixDataAccessor.getProperty(keyBuilder.healthReport(instanceName, healthReportName));
            if (record != null && record.getRecord() != null) {
                statsWrappersJSON.put(instanceName, record.getRecord().getSimpleField(statsFieldName));
            }
        }
        ObjectMapper mapper = new ObjectMapper();
        results = clusterAggregator.doWork(statsWrappersJSON, statsReportType);
        String resultId = String.format("%s%s", AGGREGATED_REPORT_PREFIX, healthReportName);
        ZNRecord znRecord = new ZNRecord(resultId);
        znRecord.setSimpleField(RAW_VALID_SIZE_FIELD_NAME, mapper.writeValueAsString(results.getFirst()));
        znRecord.setSimpleField(VALID_SIZE_FIELD_NAME, mapper.writeValueAsString(results.getSecond()));
        znRecord.setSimpleField(TIMESTAMP_FIELD_NAME, String.valueOf(time.milliseconds()));
        znRecord.setListField(ERROR_OCCURRED_INSTANCES_FIELD_NAME, clusterAggregator.getExceptionOccurredInstances(statsReportType));
        String path = String.format("/%s", resultId);
        manager.getHelixPropertyStore().set(path, znRecord, AccessOption.PERSISTENT);
        return new TaskResult(TaskResult.Status.COMPLETED, "Aggregation success");
    } catch (Exception e) {
        logger.error("Exception thrown while aggregating stats from health reports across all nodes ", e);
        exception = e;
        return new TaskResult(TaskResult.Status.FAILED, "Exception thrown");
    } finally {
        if (clusterMapConfig.clustermapEnableContainerDeletionAggregation && callback != null && results != null && statsReportType.equals(StatsReportType.ACCOUNT_REPORT)) {
            callback.onCompletion(results.getFirst(), exception);
        }
    }
}
Also used : HashMap(java.util.HashMap) HelixDataAccessor(org.apache.helix.HelixDataAccessor) HelixProperty(org.apache.helix.HelixProperty) TaskResult(org.apache.helix.task.TaskResult) PropertyKey(org.apache.helix.PropertyKey) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord) StatsSnapshot(com.github.ambry.server.StatsSnapshot)

Aggregations

HelixDataAccessor (org.apache.helix.HelixDataAccessor)173 ZNRecord (org.apache.helix.ZNRecord)91 PropertyKey (org.apache.helix.PropertyKey)69 Test (org.testng.annotations.Test)67 Builder (org.apache.helix.PropertyKey.Builder)59 ZKHelixDataAccessor (org.apache.helix.manager.zk.ZKHelixDataAccessor)40 Date (java.util.Date)39 HelixManager (org.apache.helix.HelixManager)35 IdealState (org.apache.helix.model.IdealState)33 LiveInstance (org.apache.helix.model.LiveInstance)31 HashMap (java.util.HashMap)30 MockParticipantManager (org.apache.helix.integration.manager.MockParticipantManager)30 Message (org.apache.helix.model.Message)30 ArrayList (java.util.ArrayList)28 ExternalView (org.apache.helix.model.ExternalView)26 PropertyPathBuilder (org.apache.helix.PropertyPathBuilder)25 Map (java.util.Map)19 HelixException (org.apache.helix.HelixException)19 ClusterControllerManager (org.apache.helix.integration.manager.ClusterControllerManager)19 InstanceConfig (org.apache.helix.model.InstanceConfig)17