use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.
the class HelixClusterManagerTest method verifyInitialClusterChanges.
// Helpers
/**
* Verify that {@link HelixClusterManager} receives and handles initial cluster changes (i.e InstanceConfig, IdealState
* change), and populates the in-mem clustermap correctly.
* @param clusterManager the {@link HelixClusterManager} to use for verification.
* @param helixCluster the {@link MockHelixCluster} to provide cluster infos
*/
private void verifyInitialClusterChanges(HelixClusterManager clusterManager, MockHelixCluster helixCluster, String[] dcs) {
// get in-mem data structures populated based on initial notification
Map<String, Map<String, String>> partitionToResouceByDc = clusterManager.getPartitionToResourceMap();
Map<String, Set<AmbryDataNode>> dataNodesByDc = clusterManager.getDcToDataNodesMap();
for (String dc : dcs) {
// 1. verify all instanceConfigs from Helix are present in cluster manager
List<InstanceConfig> instanceConfigsInCluster = helixCluster.getInstanceConfigsFromDcs(new String[] { dc });
assertEquals("Mismatch in number of instances", instanceConfigsInCluster.size(), dataNodesByDc.get(dc).size());
Set<String> hostsFromClusterManager = dataNodesByDc.get(dc).stream().map(AmbryDataNode::getHostname).collect(Collectors.toSet());
Set<String> hostsFromHelix = instanceConfigsInCluster.stream().map(InstanceConfig::getHostName).collect(Collectors.toSet());
assertEquals("Mismatch in hosts set", hostsFromHelix, hostsFromClusterManager);
// 2. verify all resources and partitions from Helix are present in cluster manager
Map<String, String> partitionToResourceMap = partitionToResouceByDc.get(dc);
MockHelixAdmin helixAdmin = helixCluster.getHelixAdminFromDc(dc);
List<IdealState> idealStates = helixAdmin.getIdealStates();
for (IdealState idealState : idealStates) {
String resourceName = idealState.getResourceName();
Set<String> partitionSet = idealState.getPartitionSet();
for (String partitionStr : partitionSet) {
assertEquals("Mismatch in resource name", resourceName, partitionToResourceMap.get(partitionStr));
}
}
}
if (cloudDc != null) {
// If one cloud DC is present, there should be exactly one virtual replica for every partition.
for (PartitionId partitionId : clusterManager.getAllPartitionIds(null)) {
List<? extends ReplicaId> replicaIds = partitionId.getReplicaIds();
int count = 0;
for (ReplicaId replicaId : replicaIds) {
if (replicaId instanceof CloudServiceReplica) {
count++;
}
}
assertEquals("Unexpected number of CloudServiceReplicas in partition: " + replicaIds, 1, count);
}
}
}
use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.
the class HelixClusterManagerTest method routingTableProviderChangeTest.
/**
* Test that routing table change reflects correct state of each replica and {@link HelixClusterManager} is able to get
* replica in required state.
*/
@Test
public void routingTableProviderChangeTest() throws Exception {
assumeTrue(!useComposite && !overrideEnabled && !listenCrossColo);
// Change zk connect strings to ensure HelixClusterManager sees local DC only
JSONObject zkJson = constructZkLayoutJSON(Collections.singletonList(dcsToZkInfo.get(localDc)));
Properties props = new Properties();
props.setProperty("clustermap.host.name", hostname);
props.setProperty("clustermap.cluster.name", clusterNamePrefixInHelix + clusterNameStatic);
props.setProperty("clustermap.datacenter.name", localDc);
props.setProperty("clustermap.port", Integer.toString(portNum));
props.setProperty("clustermap.dcs.zk.connect.strings", zkJson.toString(2));
props.setProperty("clustermap.current.xid", Long.toString(CURRENT_XID));
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
// Mock metricRegistry here to introduce a latch based counter for testing purpose
metricRegistry = new MetricRegistry();
HelixClusterManager helixClusterManager = new HelixClusterManager(clusterMapConfig, selfInstanceName, new MockHelixManagerFactory(helixCluster, null, null), metricRegistry);
Map<String, RoutingTableSnapshot> snapshotsByDc = helixClusterManager.getRoutingTableSnapshots();
RoutingTableSnapshot localDcSnapshot = snapshotsByDc.get(localDc);
Set<InstanceConfig> instanceConfigsInSnapshot = new HashSet<>(localDcSnapshot.getInstanceConfigs());
Set<InstanceConfig> instanceConfigsInCluster = new HashSet<>(helixCluster.getInstanceConfigsFromDcs(new String[] { localDc }));
assertEquals("Mismatch in instance configs", instanceConfigsInCluster, instanceConfigsInSnapshot);
// verify leader replica of each partition is correct
verifyLeaderReplicasInDc(helixClusterManager, localDc);
// test live instance triggered routing table change
// we purposely bring down one instance and wait for expected number of live instance unless times out.
int initialLiveCnt = localDcSnapshot.getLiveInstances().size();
MockHelixAdmin mockHelixAdmin = helixCluster.getHelixAdminFromDc(localDc);
String instance = instanceConfigsInCluster.stream().filter(insConfig -> !insConfig.getInstanceName().equals(selfInstanceName)).findFirst().get().getInstanceName();
mockHelixAdmin.bringInstanceDown(instance);
mockHelixAdmin.triggerRoutingTableNotification();
int sleepCnt = 0;
while (helixClusterManager.getRoutingTableSnapshots().get(localDc).getLiveInstances().size() != initialLiveCnt - 1) {
assertTrue("Routing table change (triggered by bringing down node) didn't come within 1 sec", sleepCnt < 5);
Thread.sleep(200);
sleepCnt++;
}
// then bring up the same instance, the number of live instances should equal to initial count
mockHelixAdmin.bringInstanceUp(instance);
mockHelixAdmin.triggerRoutingTableNotification();
sleepCnt = 0;
while (helixClusterManager.getRoutingTableSnapshots().get(localDc).getLiveInstances().size() != initialLiveCnt) {
assertTrue("Routing table change (triggered by bringing up node) didn't come within 1 sec", sleepCnt < 5);
Thread.sleep(200);
sleepCnt++;
}
// randomly choose a partition and change the leader replica of it in cluster
List<? extends PartitionId> defaultPartitionIds = helixClusterManager.getAllPartitionIds(DEFAULT_PARTITION_CLASS);
PartitionId partitionToChange = defaultPartitionIds.get((new Random()).nextInt(defaultPartitionIds.size()));
String currentLeaderInstance = mockHelixAdmin.getPartitionToLeaderReplica().get(partitionToChange.toPathString());
int currentLeaderPort = Integer.parseInt(currentLeaderInstance.split("_")[1]);
String newLeaderInstance = mockHelixAdmin.getInstancesForPartition(partitionToChange.toPathString()).stream().filter(k -> !k.equals(currentLeaderInstance)).findFirst().get();
mockHelixAdmin.changeLeaderReplicaForPartition(partitionToChange.toPathString(), newLeaderInstance);
mockHelixAdmin.triggerRoutingTableNotification();
sleepCnt = 0;
while (partitionToChange.getReplicaIdsByState(ReplicaState.LEADER, localDc).get(0).getDataNodeId().getPort() == currentLeaderPort) {
assertTrue("Routing table change (triggered by leadership change) didn't come within 1 sec", sleepCnt < 5);
Thread.sleep(200);
sleepCnt++;
}
verifyLeaderReplicasInDc(helixClusterManager, localDc);
helixClusterManager.close();
}
use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.
the class AmbryStateModelFactoryTest method testAmbryPartitionStateModel.
/**
* Test that {@link HelixParticipantMetrics} keeps track of partition during state transition
*/
@Test
public void testAmbryPartitionStateModel() {
assumeTrue(stateModelDef.equals(ClusterMapConfig.AMBRY_STATE_MODEL_DEF));
MetricRegistry metricRegistry = new MetricRegistry();
MockHelixParticipant.metricRegistry = metricRegistry;
DataNodeConfig mockDataNodeConfig = Mockito.mock(DataNodeConfig.class);
Set<String> disabledPartitionSet = new HashSet<>();
Set<String> enabledPartitionSet = new HashSet<>();
when(mockDataNodeConfig.getDisabledReplicas()).thenReturn(disabledPartitionSet);
DataNodeConfigSource mockConfigSource = Mockito.mock(DataNodeConfigSource.class);
when(mockConfigSource.get(anyString())).thenReturn(mockDataNodeConfig);
HelixAdmin mockHelixAdmin = Mockito.mock(HelixAdmin.class);
InstanceConfig mockInstanceConfig = Mockito.mock(InstanceConfig.class);
doAnswer(invocation -> {
String partitionName = invocation.getArgument(1);
boolean enable = invocation.getArgument(2);
if (enable) {
enabledPartitionSet.add(partitionName);
}
return null;
}).when(mockInstanceConfig).setInstanceEnabledForPartition(any(), any(), anyBoolean());
when(mockHelixAdmin.getInstanceConfig(anyString(), anyString())).thenReturn(mockInstanceConfig);
when(mockHelixAdmin.setInstanceConfig(anyString(), anyString(), any())).thenReturn(true);
HelixManager mockHelixManager = Mockito.mock(HelixManager.class);
when(mockHelixManager.getClusterManagmentTool()).thenReturn(mockHelixAdmin);
MockHelixManagerFactory.overrideGetHelixManager = true;
MockHelixParticipant.mockHelixFactory = new MockHelixManagerFactory(mockConfigSource, mockHelixManager);
MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(config);
HelixParticipantMetrics participantMetrics = mockHelixParticipant.getHelixParticipantMetrics();
String resourceName = "0";
String partitionName = "1";
Message mockMessage = Mockito.mock(Message.class);
when(mockMessage.getPartitionName()).thenReturn(partitionName);
when(mockMessage.getResourceName()).thenReturn(resourceName);
AmbryPartitionStateModel stateModel = new AmbryPartitionStateModel(resourceName, partitionName, mockHelixParticipant, config);
mockHelixParticipant.setInitialLocalPartitions(new HashSet<>(Collections.singletonList(partitionName)));
assertEquals("Offline count is not expected", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
// OFFLINE -> BOOTSTRAP
stateModel.onBecomeBootstrapFromOffline(mockMessage, null);
assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
assertEquals("Bootstrap count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "bootstrapPartitionCount"));
// BOOTSTRAP -> STANDBY
stateModel.onBecomeStandbyFromBootstrap(mockMessage, null);
assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
assertEquals("Standby count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "standbyPartitionCount"));
assertEquals("Bootstrap count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "bootstrapPartitionCount"));
// STANDBY -> LEADER
stateModel.onBecomeLeaderFromStandby(mockMessage, null);
assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
assertEquals("Leader count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "leaderPartitionCount"));
assertEquals("Standby count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "standbyPartitionCount"));
// LEADER -> STANDBY
stateModel.onBecomeStandbyFromLeader(mockMessage, null);
assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
assertEquals("Standby count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "standbyPartitionCount"));
assertEquals("Leader count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "leaderPartitionCount"));
// STANDBY -> INACTIVE
stateModel.onBecomeInactiveFromStandby(mockMessage, null);
assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
assertEquals("Inactive count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "inactivePartitionCount"));
assertEquals("Standby count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "standbyPartitionCount"));
// INACTIVE -> OFFLINE
stateModel.onBecomeOfflineFromInactive(mockMessage, null);
assertEquals("Offline count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
assertEquals("Inactive count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "inactivePartitionCount"));
// OFFLINE -> DROPPED
disabledPartitionSet.add(partitionName);
stateModel.onBecomeDroppedFromOffline(mockMessage, null);
assertEquals("Offline count should be 0", 0, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
assertEquals("Dropped count should be updated", 1, participantMetrics.partitionDroppedCount.getCount());
assertTrue("Partition should be removed from disabled partition set", disabledPartitionSet.isEmpty());
assertEquals("Mismatch in enabled partition", partitionName, enabledPartitionSet.iterator().next());
// ERROR -> DROPPED
stateModel.onBecomeDroppedFromError(mockMessage, null);
assertEquals("Dropped count should be updated", 2, participantMetrics.partitionDroppedCount.getCount());
// ERROR -> OFFLINE (this occurs when we use Helix API to reset certain partition in ERROR state)
stateModel.onBecomeOfflineFromError(mockMessage, null);
assertEquals("Offline count should be 1", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
// reset method
stateModel.reset();
assertEquals("Offline count should be 1 after reset", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
// call reset method again to mock the case where same partition is reset multiple times during zk disconnection or shutdown
stateModel.reset();
assertEquals("Offline count should still be 1 after reset twice", 1, getHelixParticipantMetricValue(metricRegistry, HelixParticipant.class.getName(), "offlinePartitionCount"));
MockHelixManagerFactory.overrideGetHelixManager = false;
}
use of org.apache.helix.model.InstanceConfig in project pinot by linkedin.
the class Instance method toInstanceConfig.
public InstanceConfig toInstanceConfig() {
final InstanceConfig iConfig = new InstanceConfig(toInstanceId());
iConfig.setHostName(_host);
iConfig.setPort(_port);
iConfig.setInstanceEnabled(true);
iConfig.addTag(getTagOrDefaultTag());
return iConfig;
}
use of org.apache.helix.model.InstanceConfig in project pinot by linkedin.
the class PinotInstanceRestletResource method getInstanceInformation.
/**
* Gets the information for an instance.
*
* @param instanceName The instance name
*/
@HttpVerb("get")
@Summary("Gets information for an instance")
@Tags({ "instance" })
@Paths({ "/instances/{instanceName}", "/instances/{instanceName}/" })
@Responses({ @Response(statusCode = "200", description = "Information about the specified instance"), @Response(statusCode = "404", description = "The specified instance does not exist"), @Response(statusCode = "500", description = "There was an error while fetching information for the given instance") })
private Representation getInstanceInformation(@Parameter(name = "instanceName", description = "The name of the instance (eg. Server_1.2.3.4_1234 or Broker_someHost.example.com_2345)", in = "path", required = true) String instanceName) {
try {
if (!_pinotHelixResourceManager.instanceExists(instanceName)) {
setStatus(Status.CLIENT_ERROR_NOT_FOUND);
return new StringRepresentation("Error: Instance " + instanceName + " not found.");
}
InstanceConfig instanceConfig = _pinotHelixResourceManager.getHelixInstanceConfig(instanceName);
JSONObject response = new JSONObject();
response.put("instanceName", instanceConfig.getInstanceName());
response.put("hostName", instanceConfig.getHostName());
response.put("enabled", instanceConfig.getInstanceEnabled());
response.put("port", instanceConfig.getPort());
response.put("tags", new JSONArray(instanceConfig.getTags()));
return new StringRepresentation(response.toString());
} catch (Exception e) {
LOGGER.warn("Caught exception while fetching information for instance {}", instanceName, e);
setStatus(Status.SERVER_ERROR_INTERNAL);
return new StringRepresentation("{}");
}
}
Aggregations