Search in sources :

Example 71 with InstanceConfig

use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.

the class HelixClusterManagerTest method verifyInitialClusterChanges.

// Helpers
/**
 * Verify that {@link HelixClusterManager} receives and handles initial cluster changes (i.e InstanceConfig, IdealState
 * change), and populates the in-mem clustermap correctly.
 * @param clusterManager the {@link HelixClusterManager} to use for verification.
 * @param helixCluster the {@link MockHelixCluster} to provide cluster infos
 */
private void verifyInitialClusterChanges(HelixClusterManager clusterManager, MockHelixCluster helixCluster, String[] dcs) {
    // get in-mem data structures populated based on initial notification
    Map<String, Map<String, String>> partitionToResouceByDc = clusterManager.getPartitionToResourceMap();
    Map<String, Set<AmbryDataNode>> dataNodesByDc = clusterManager.getDcToDataNodesMap();
    for (String dc : dcs) {
        // 1. verify all instanceConfigs from Helix are present in cluster manager
        List<InstanceConfig> instanceConfigsInCluster = helixCluster.getInstanceConfigsFromDcs(new String[] { dc });
        assertEquals("Mismatch in number of instances", instanceConfigsInCluster.size(), dataNodesByDc.get(dc).size());
        Set<String> hostsFromClusterManager = dataNodesByDc.get(dc).stream().map(AmbryDataNode::getHostname).collect(Collectors.toSet());
        Set<String> hostsFromHelix = instanceConfigsInCluster.stream().map(InstanceConfig::getHostName).collect(Collectors.toSet());
        assertEquals("Mismatch in hosts set", hostsFromHelix, hostsFromClusterManager);
        // 2. verify all resources and partitions from Helix are present in cluster manager
        Map<String, String> partitionToResourceMap = partitionToResouceByDc.get(dc);
        MockHelixAdmin helixAdmin = helixCluster.getHelixAdminFromDc(dc);
        List<IdealState> idealStates = helixAdmin.getIdealStates();
        for (IdealState idealState : idealStates) {
            String resourceName = idealState.getResourceName();
            Set<String> partitionSet = idealState.getPartitionSet();
            for (String partitionStr : partitionSet) {
                assertEquals("Mismatch in resource name", resourceName, partitionToResourceMap.get(partitionStr));
            }
        }
    }
    if (cloudDc != null) {
        // If one cloud DC is present, there should be exactly one virtual replica for every partition.
        for (PartitionId partitionId : clusterManager.getAllPartitionIds(null)) {
            List<? extends ReplicaId> replicaIds = partitionId.getReplicaIds();
            int count = 0;
            for (ReplicaId replicaId : replicaIds) {
                if (replicaId instanceof CloudServiceReplica) {
                    count++;
                }
            }
            assertEquals("Unexpected number of CloudServiceReplicas in partition: " + replicaIds, 1, count);
        }
    }
}
Also used : HashSet(java.util.HashSet) Set(java.util.Set) IdealState(org.apache.helix.model.IdealState) InstanceConfig(org.apache.helix.model.InstanceConfig) HashMap(java.util.HashMap) Map(java.util.Map)

Example 72 with InstanceConfig

use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.

the class HelixClusterManagerTest method routingTableProviderChangeTest.

/**
 * Test that routing table change reflects correct state of each replica and {@link HelixClusterManager} is able to get
 * replica in required state.
 */
@Test
public void routingTableProviderChangeTest() throws Exception {
    assumeTrue(!useComposite && !overrideEnabled && !listenCrossColo);
    // Change zk connect strings to ensure HelixClusterManager sees local DC only
    JSONObject zkJson = constructZkLayoutJSON(Collections.singletonList(dcsToZkInfo.get(localDc)));
    Properties props = new Properties();
    props.setProperty("clustermap.host.name", hostname);
    props.setProperty("clustermap.cluster.name", clusterNamePrefixInHelix + clusterNameStatic);
    props.setProperty("clustermap.datacenter.name", localDc);
    props.setProperty("clustermap.port", Integer.toString(portNum));
    props.setProperty("clustermap.dcs.zk.connect.strings", zkJson.toString(2));
    props.setProperty("clustermap.current.xid", Long.toString(CURRENT_XID));
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
    // Mock metricRegistry here to introduce a latch based counter for testing purpose
    metricRegistry = new MetricRegistry();
    HelixClusterManager helixClusterManager = new HelixClusterManager(clusterMapConfig, selfInstanceName, new MockHelixManagerFactory(helixCluster, null, null), metricRegistry);
    Map<String, RoutingTableSnapshot> snapshotsByDc = helixClusterManager.getRoutingTableSnapshots();
    RoutingTableSnapshot localDcSnapshot = snapshotsByDc.get(localDc);
    Set<InstanceConfig> instanceConfigsInSnapshot = new HashSet<>(localDcSnapshot.getInstanceConfigs());
    Set<InstanceConfig> instanceConfigsInCluster = new HashSet<>(helixCluster.getInstanceConfigsFromDcs(new String[] { localDc }));
    assertEquals("Mismatch in instance configs", instanceConfigsInCluster, instanceConfigsInSnapshot);
    // verify leader replica of each partition is correct
    verifyLeaderReplicasInDc(helixClusterManager, localDc);
    // test live instance triggered routing table change
    // we purposely bring down one instance and wait for expected number of live instance unless times out.
    int initialLiveCnt = localDcSnapshot.getLiveInstances().size();
    MockHelixAdmin mockHelixAdmin = helixCluster.getHelixAdminFromDc(localDc);
    String instance = instanceConfigsInCluster.stream().filter(insConfig -> !insConfig.getInstanceName().equals(selfInstanceName)).findFirst().get().getInstanceName();
    mockHelixAdmin.bringInstanceDown(instance);
    mockHelixAdmin.triggerRoutingTableNotification();
    int sleepCnt = 0;
    while (helixClusterManager.getRoutingTableSnapshots().get(localDc).getLiveInstances().size() != initialLiveCnt - 1) {
        assertTrue("Routing table change (triggered by bringing down node) didn't come within 1 sec", sleepCnt < 5);
        Thread.sleep(200);
        sleepCnt++;
    }
    // then bring up the same instance, the number of live instances should equal to initial count
    mockHelixAdmin.bringInstanceUp(instance);
    mockHelixAdmin.triggerRoutingTableNotification();
    sleepCnt = 0;
    while (helixClusterManager.getRoutingTableSnapshots().get(localDc).getLiveInstances().size() != initialLiveCnt) {
        assertTrue("Routing table change (triggered by bringing up node) didn't come within 1 sec", sleepCnt < 5);
        Thread.sleep(200);
        sleepCnt++;
    }
    // randomly choose a partition and change the leader replica of it in cluster
    List<? extends PartitionId> defaultPartitionIds = helixClusterManager.getAllPartitionIds(DEFAULT_PARTITION_CLASS);
    PartitionId partitionToChange = defaultPartitionIds.get((new Random()).nextInt(defaultPartitionIds.size()));
    String currentLeaderInstance = mockHelixAdmin.getPartitionToLeaderReplica().get(partitionToChange.toPathString());
    int currentLeaderPort = Integer.parseInt(currentLeaderInstance.split("_")[1]);
    String newLeaderInstance = mockHelixAdmin.getInstancesForPartition(partitionToChange.toPathString()).stream().filter(k -> !k.equals(currentLeaderInstance)).findFirst().get();
    mockHelixAdmin.changeLeaderReplicaForPartition(partitionToChange.toPathString(), newLeaderInstance);
    mockHelixAdmin.triggerRoutingTableNotification();
    sleepCnt = 0;
    while (partitionToChange.getReplicaIdsByState(ReplicaState.LEADER, localDc).get(0).getDataNodeId().getPort() == currentLeaderPort) {
        assertTrue("Routing table change (triggered by leadership change) didn't come within 1 sec", sleepCnt < 5);
        Thread.sleep(200);
        sleepCnt++;
    }
    verifyLeaderReplicasInDc(helixClusterManager, localDc);
    helixClusterManager.close();
}
Also used : CoreMatchers(org.hamcrest.CoreMatchers) Arrays(java.util.Arrays) IdealState(org.apache.helix.model.IdealState) ClusterMapUtils(com.github.ambry.clustermap.ClusterMapUtils) ServerErrorCode(com.github.ambry.server.ServerErrorCode) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) Random(java.util.Random) RoutingTableSnapshot(org.apache.helix.spectator.RoutingTableSnapshot) ByteBuffer(java.nio.ByteBuffer) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) MockitoAnnotations(org.mockito.MockitoAnnotations) TestUtils(com.github.ambry.clustermap.TestUtils) JSONException(org.json.JSONException) JSONObject(org.json.JSONObject) Map(java.util.Map) After(org.junit.After) Counter(com.codahale.metrics.Counter) Assume(org.junit.Assume) Parameterized(org.junit.runners.Parameterized) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) Pair(com.github.ambry.utils.Pair) Files(java.nio.file.Files) VerifiableProperties(com.github.ambry.config.VerifiableProperties) Set(java.util.Set) HelixManager(org.apache.helix.HelixManager) Utils(com.github.ambry.utils.Utils) IOException(java.io.IOException) Test(org.junit.Test) Collectors(java.util.stream.Collectors) InstanceConfig(org.apache.helix.model.InstanceConfig) File(java.io.File) ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord) Mockito(org.mockito.Mockito) List(java.util.List) ByteBufferInputStream(com.github.ambry.utils.ByteBufferInputStream) InstanceType(org.apache.helix.InstanceType) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) Gauge(com.codahale.metrics.Gauge) Assert(org.junit.Assert) Collections(java.util.Collections) ResponseHandler(com.github.ambry.commons.ResponseHandler) InputStream(java.io.InputStream) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) VerifiableProperties(com.github.ambry.config.VerifiableProperties) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) RoutingTableSnapshot(org.apache.helix.spectator.RoutingTableSnapshot) JSONObject(org.json.JSONObject) InstanceConfig(org.apache.helix.model.InstanceConfig) Random(java.util.Random) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 73 with InstanceConfig

use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.

the class AmbryStateModelFactoryTest method testAmbryPartitionStateModel.

/**
 * Test that {@link HelixParticipantMetrics} keeps track of partition during state transition
 */
@Test
public void testAmbryPartitionStateModel() {
    assumeTrue(stateModelDef.equals(ClusterMapConfig.AMBRY_STATE_MODEL_DEF));
    MetricRegistry metricRegistry = new MetricRegistry();
    MockHelixParticipant.metricRegistry = metricRegistry;
    DataNodeConfig mockDataNodeConfig = Mockito.mock(DataNodeConfig.class);
    Set<String> disabledPartitionSet = new HashSet<>();
    Set<String> enabledPartitionSet = new HashSet<>();
    when(mockDataNodeConfig.getDisabledReplicas()).thenReturn(disabledPartitionSet);
    DataNodeConfigSource mockConfigSource = Mockito.mock(DataNodeConfigSource.class);
    when(mockConfigSource.get(anyString())).thenReturn(mockDataNodeConfig);
    HelixAdmin mockHelixAdmin = Mockito.mock(HelixAdmin.class);
    InstanceConfig mockInstanceConfig = Mockito.mock(InstanceConfig.class);
    doAnswer(invocation -> {
        String partitionName = invocation.getArgument(1);
        boolean enable = invocation.getArgument(2);
        if (enable) {
            enabledPartitionSet.add(partitionName);
        }
        return null;
    }).when(mockInstanceConfig).setInstanceEnabledForPartition(any(), any(), anyBoolean());
    when(mockHelixAdmin.getInstanceConfig(anyString(), anyString())).thenReturn(mockInstanceConfig);
    when(mockHelixAdmin.setInstanceConfig(anyString(), anyString(), any())).thenReturn(true);
    HelixManager mockHelixManager = Mockito.mock(HelixManager.class);
    when(mockHelixManager.getClusterManagmentTool()).thenReturn(mockHelixAdmin);
    MockHelixManagerFactory.overrideGetHelixManager = true;
    MockHelixParticipant.mockHelixFactory = new MockHelixManagerFactory(mockConfigSource, mockHelixManager);
    MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(config);
    HelixParticipantMetrics participantMetrics = mockHelixParticipant.getHelixParticipantMetrics();
    String resourceName = "0";
    String partitionName = "1";
    Message mockMessage = Mockito.mock(Message.class);
    when(mockMessage.getPartitionName()).thenReturn(partitionName);
    when(mockMessage.getResourceName()).thenReturn(resourceName);
    AmbryPartitionStateModel stateModel = new AmbryPartitionStateModel(resourceName, partitionName, mockHelixParticipant, config);
    mockHelixParticipant.setInitialLocalPartitions(new HashSet<>(Collections.singletonList(partitionName)));
    assertEquals("Offline count is not expected", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
    // OFFLINE -> BOOTSTRAP
    stateModel.onBecomeBootstrapFromOffline(mockMessage, null);
    assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
    assertEquals("Bootstrap count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "bootstrapPartitionCount"));
    // BOOTSTRAP -> STANDBY
    stateModel.onBecomeStandbyFromBootstrap(mockMessage, null);
    assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
    assertEquals("Standby count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "standbyPartitionCount"));
    assertEquals("Bootstrap count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "bootstrapPartitionCount"));
    // STANDBY -> LEADER
    stateModel.onBecomeLeaderFromStandby(mockMessage, null);
    assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
    assertEquals("Leader count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "leaderPartitionCount"));
    assertEquals("Standby count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "standbyPartitionCount"));
    // LEADER -> STANDBY
    stateModel.onBecomeStandbyFromLeader(mockMessage, null);
    assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
    assertEquals("Standby count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "standbyPartitionCount"));
    assertEquals("Leader count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "leaderPartitionCount"));
    // STANDBY -> INACTIVE
    stateModel.onBecomeInactiveFromStandby(mockMessage, null);
    assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
    assertEquals("Inactive count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "inactivePartitionCount"));
    assertEquals("Standby count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "standbyPartitionCount"));
    // INACTIVE -> OFFLINE
    stateModel.onBecomeOfflineFromInactive(mockMessage, null);
    assertEquals("Offline count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
    assertEquals("Inactive count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "inactivePartitionCount"));
    // OFFLINE -> DROPPED
    disabledPartitionSet.add(partitionName);
    stateModel.onBecomeDroppedFromOffline(mockMessage, null);
    assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
    assertEquals("Dropped count should be updated", 1, participantMetrics.partitionDroppedCount.getCount());
    assertTrue("Partition should be removed from disabled partition set", disabledPartitionSet.isEmpty());
    assertEquals("Mismatch in enabled partition", partitionName, enabledPartitionSet.iterator().next());
    // ERROR -> DROPPED
    stateModel.onBecomeDroppedFromError(mockMessage, null);
    assertEquals("Dropped count should be updated", 2, participantMetrics.partitionDroppedCount.getCount());
    // ERROR -> OFFLINE (this occurs when we use Helix API to reset certain partition in ERROR state)
    stateModel.onBecomeOfflineFromError(mockMessage, null);
    assertEquals("Offline count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
    // reset method
    stateModel.reset();
    assertEquals("Offline count should be 1 after reset", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
    // call reset method again to mock the case where same partition is reset multiple times during zk disconnection or shutdown
    stateModel.reset();
    assertEquals("Offline count should still be 1 after reset twice", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
    MockHelixManagerFactory.overrideGetHelixManager = false;
}
Also used : HelixManager(org.apache.helix.HelixManager) Message(org.apache.helix.model.Message) MetricRegistry(com.codahale.metrics.MetricRegistry) HelixAdmin(org.apache.helix.HelixAdmin) InstanceConfig(org.apache.helix.model.InstanceConfig) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 74 with InstanceConfig

use of org.apache.helix.model.InstanceConfig in project pinot by linkedin.

the class Instance method toInstanceConfig.

public InstanceConfig toInstanceConfig() {
    final InstanceConfig iConfig = new InstanceConfig(toInstanceId());
    iConfig.setHostName(_host);
    iConfig.setPort(_port);
    iConfig.setInstanceEnabled(true);
    iConfig.addTag(getTagOrDefaultTag());
    return iConfig;
}
Also used : InstanceConfig(org.apache.helix.model.InstanceConfig)

Example 75 with InstanceConfig

use of org.apache.helix.model.InstanceConfig in project pinot by linkedin.

the class PinotInstanceRestletResource method getInstanceInformation.

/**
   * Gets the information for an instance.
   *
   * @param instanceName The instance name
   */
@HttpVerb("get")
@Summary("Gets information for an instance")
@Tags({ "instance" })
@Paths({ "/instances/{instanceName}", "/instances/{instanceName}/" })
@Responses({ @Response(statusCode = "200", description = "Information about the specified instance"), @Response(statusCode = "404", description = "The specified instance does not exist"), @Response(statusCode = "500", description = "There was an error while fetching information for the given instance") })
private Representation getInstanceInformation(@Parameter(name = "instanceName", description = "The name of the instance (eg. Server_1.2.3.4_1234 or Broker_someHost.example.com_2345)", in = "path", required = true) String instanceName) {
    try {
        if (!_pinotHelixResourceManager.instanceExists(instanceName)) {
            setStatus(Status.CLIENT_ERROR_NOT_FOUND);
            return new StringRepresentation("Error: Instance " + instanceName + " not found.");
        }
        InstanceConfig instanceConfig = _pinotHelixResourceManager.getHelixInstanceConfig(instanceName);
        JSONObject response = new JSONObject();
        response.put("instanceName", instanceConfig.getInstanceName());
        response.put("hostName", instanceConfig.getHostName());
        response.put("enabled", instanceConfig.getInstanceEnabled());
        response.put("port", instanceConfig.getPort());
        response.put("tags", new JSONArray(instanceConfig.getTags()));
        return new StringRepresentation(response.toString());
    } catch (Exception e) {
        LOGGER.warn("Caught exception while fetching information for instance {}", instanceName, e);
        setStatus(Status.SERVER_ERROR_INTERNAL);
        return new StringRepresentation("{}");
    }
}
Also used : InstanceConfig(org.apache.helix.model.InstanceConfig) JSONObject(org.json.JSONObject) StringRepresentation(org.restlet.representation.StringRepresentation) JSONArray(org.json.JSONArray) JSONException(org.json.JSONException) Summary(com.linkedin.pinot.common.restlet.swagger.Summary) HttpVerb(com.linkedin.pinot.common.restlet.swagger.HttpVerb) Paths(com.linkedin.pinot.common.restlet.swagger.Paths) Tags(com.linkedin.pinot.common.restlet.swagger.Tags) Responses(com.linkedin.pinot.common.restlet.swagger.Responses)

Aggregations

InstanceConfig (org.apache.helix.model.InstanceConfig)149 ArrayList (java.util.ArrayList)40 Test (org.testng.annotations.Test)35 HashMap (java.util.HashMap)32 HashSet (java.util.HashSet)28 ZNRecord (org.apache.helix.ZNRecord)26 IdealState (org.apache.helix.model.IdealState)24 ExternalView (org.apache.helix.model.ExternalView)23 Map (java.util.Map)21 HelixException (org.apache.helix.HelixException)21 HelixAdmin (org.apache.helix.HelixAdmin)20 List (java.util.List)19 ZKHelixAdmin (org.apache.helix.manager.zk.ZKHelixAdmin)19 HelixDataAccessor (org.apache.helix.HelixDataAccessor)17 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)15 Test (org.junit.Test)15 Set (java.util.Set)13 VerifiableProperties (com.github.ambry.config.VerifiableProperties)12 IOException (java.io.IOException)12 ZNRecord (org.apache.helix.zookeeper.datamodel.ZNRecord)12