use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.
the class HelixBootstrapUpgradeUtil method controlPartitionState.
/**
* Control state of partition on certain node. (i.e. DisablePartition, EnablePartition)
*/
private void controlPartitionState() {
// in adminForDc map.
if (adminForDc.size() != 1) {
throw new IllegalStateException("The dc count is not 1 for partition state control operation");
}
HelixAdmin helixAdmin = adminForDc.values().iterator().next();
String instanceName;
if (portNum == null) {
Optional<DataNodeId> optionalDataNode = staticClusterMap.getDataNodeIds().stream().filter(node -> node.getHostname().equals(hostName)).findFirst();
if (!optionalDataNode.isPresent()) {
throw new IllegalStateException("Host " + hostName + " is not found in static clustermap");
}
DataNode dataNode = (DataNode) optionalDataNode.get();
instanceName = getInstanceName(dataNode);
} else {
instanceName = ClusterMapUtils.getInstanceName(hostName, portNum);
}
InstanceConfig instanceConfig = helixAdmin.getInstanceConfig(clusterName, instanceName);
String resourceNameForPartition = getResourceNameOfPartition(helixAdmin, clusterName, partitionName);
info("{} partition {} under resource {} on node {}", helixAdminOperation == HelixAdminOperation.EnablePartition ? "Enabling" : "Disabling", partitionName, resourceNameForPartition, instanceName);
instanceConfig.setInstanceEnabledForPartition(resourceNameForPartition, partitionName, helixAdminOperation == HelixAdminOperation.EnablePartition);
// clean up the disabled partition entry if it exists and is empty.
Map<String, String> disabledPartitions = instanceConfig.getRecord().getMapFields().get(HELIX_DISABLED_PARTITION_STR);
if (disabledPartitions != null && disabledPartitions.isEmpty()) {
instanceConfig.getRecord().getMapFields().remove(HELIX_DISABLED_PARTITION_STR);
}
helixAdmin.setInstanceConfig(clusterName, instanceName, instanceConfig);
instancesUpdated.getAndIncrement();
if (helixAdminOperation == HelixAdminOperation.EnablePartition) {
partitionsEnabled.getAndIncrement();
} else {
partitionsDisabled.getAndIncrement();
}
}
use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.
the class HelixParticipant method setPartitionDisabledState.
/**
* Disable/enable partition on local node. This method will update both InstanceConfig and DataNodeConfig in PropertyStore.
* @param partitionName name of partition on local node
* @param disable if {@code true}, disable given partition on current node. {@code false} otherwise.
*/
protected void setPartitionDisabledState(String partitionName, boolean disable) {
synchronized (helixAdministrationLock) {
// 1. update disabled replica list in DataNodeConfig. This modifies ListFields only
boolean dataNodeConfigChanged = false;
DataNodeConfig dataNodeConfig = getDataNodeConfig();
if (!disable && dataNodeConfig.getDisabledReplicas().remove(partitionName)) {
logger.info("Removing the partition {} from disabledReplicas list", partitionName);
dataNodeConfigChanged = true;
} else if (disable && dataNodeConfig.getDisabledReplicas().add(partitionName)) {
logger.info("Adding the partition {} to disabledReplicas list", partitionName);
dataNodeConfigChanged = true;
}
if (dataNodeConfigChanged) {
logger.info("Setting config with list of disabled replicas: {}", dataNodeConfig.getDisabledReplicas());
if (!dataNodeConfigSource.set(dataNodeConfig)) {
participantMetrics.setReplicaDisabledStateErrorCount.inc();
logger.warn("setReplicaDisabledState() failed DataNodeConfig update");
}
// 2. If the DataNodeConfig was changed, invoke Helix native method to enable/disable partition on local node,
// this will trigger subsequent state transition on given replica. This method modifies MapFields in
// InstanceConfig.
InstanceConfig instanceConfig = getInstanceConfig();
String resourceNameForPartition = getResourceNameOfPartition(helixAdmin, clusterName, partitionName);
logger.info("{} replica {} on current node", disable ? "Disabling" : "Enabling", partitionName);
instanceConfig.setInstanceEnabledForPartition(resourceNameForPartition, partitionName, !disable);
if (!helixAdmin.setInstanceConfig(clusterName, instanceName, instanceConfig)) {
participantMetrics.setReplicaDisabledStateErrorCount.inc();
logger.warn("setReplicaDisabledState() failed InstanceConfig update");
}
}
logger.info("Disabled state of partition {} is updated", partitionName);
}
}
use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.
the class HelixBootstrapUpgradeUtil method migrateToPropertyStore.
/**
* Convert instance configs to the new DataNodeConfig format and persist them in the property store.
*/
private void migrateToPropertyStore() throws InterruptedException {
CountDownLatch migrationComplete = new CountDownLatch(adminForDc.size());
// different DCs can be migrated in parallel
adminForDc.forEach((dcName, helixAdmin) -> Utils.newThread(() -> {
try {
logger.info("Starting property store migration in {}", dcName);
ClusterMapConfig config = getClusterMapConfig(clusterName, dcName, null);
InstanceConfigToDataNodeConfigAdapter.Converter instanceConfigConverter = new InstanceConfigToDataNodeConfigAdapter.Converter(config);
String zkConnectStr = dataCenterToZkAddress.get(dcName).getZkConnectStrs().get(0);
try (DataNodeConfigSource source = new PropertyStoreToDataNodeConfigAdapter(zkConnectStr, config)) {
List<String> instanceNames = helixAdmin.getInstancesInCluster(clusterName);
logger.info("Found {} instances in cluster", instanceNames.size());
instanceNames.forEach(instanceName -> {
logger.info("Copying config for node {}", instanceName);
InstanceConfig instanceConfig = helixAdmin.getInstanceConfig(clusterName, instanceName);
DataNodeConfig dataNodeConfig = instanceConfigConverter.convert(instanceConfig);
logger.debug("Writing {} to property store in {}", dataNodeConfig, dcName);
if (!source.set(dataNodeConfig)) {
logger.error("Failed to persist config for node {} in the property store.", dataNodeConfig.getInstanceName());
}
});
}
logger.info("Successfully migrated to property store in {}", dcName);
} catch (Throwable t) {
logger.error("Error while migrating to property store in {}", dcName, t);
} finally {
migrationComplete.countDown();
}
}, false).start());
migrationComplete.await();
}
use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.
the class HelixVcrClusterParticipant method participate.
@Override
public void participate() throws Exception {
manager = HelixManagerFactory.getZKHelixManager(vcrClusterName, vcrInstanceName, InstanceType.PARTICIPANT, cloudConfig.vcrClusterZkConnectString);
VcrStateModelFactory stateModelFactory = Utils.getObj(cloudConfig.vcrHelixStateModelFactoryClass, this);
manager.getStateMachineEngine().registerStateModelFactory(stateModelFactory.getStateModelName(), stateModelFactory);
if (cloudConfig.cloudContainerCompactionEnabled) {
registerContainerDeletionSyncTask(manager.getStateMachineEngine());
}
manager.connect();
helixAdmin = manager.getClusterManagmentTool();
InstanceConfig instanceConfig = helixAdmin.getInstanceConfig(vcrClusterName, vcrInstanceName);
if (!instanceConfig.getRecord().getBooleanField(VCR_HELIX_CONFIG_READY, false)) {
if (cloudConfig.vcrSslPort != null) {
instanceConfig.getRecord().setSimpleField(SSL_PORT_STR, Integer.toString(cloudConfig.vcrSslPort));
}
if (cloudConfig.vcrHttp2Port != null) {
instanceConfig.getRecord().setSimpleField(HTTP2_PORT_STR, Integer.toString(cloudConfig.vcrHttp2Port));
}
// Set HELIX_ENABLED to be true. Listeners take action only when this value is True.
instanceConfig.getRecord().setBooleanField(VCR_HELIX_CONFIG_READY, true);
logger.info("Set VCR_HELIX_CONFIG_READY to true.");
} else {
logger.info("VCR_HELIX_CONFIG_READY is true.");
}
helixAdmin.setInstanceConfig(vcrClusterName, vcrInstanceName, instanceConfig);
logger.info("Participated in HelixVcrCluster successfully.");
}
use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.
the class ClusterChangeHandlerTest method moveReplicaTest.
/**
* Test the case where a current replica is moved between existing nodes.
*/
@Test
public void moveReplicaTest() throws Exception {
// create a HelixClusterManager with DynamicClusterChangeHandler
Properties properties = new Properties();
properties.putAll(props);
properties.setProperty("clustermap.cluster.change.handler.type", "DynamicClusterChangeHandler");
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(properties));
HelixClusterManager helixClusterManager = new HelixClusterManager(clusterMapConfig, selfInstanceName, helixManagerFactory, new MetricRegistry());
// pick a partition and move one of its replicas
Partition testPartition = (Partition) testPartitionLayout.getPartitionLayout().getRandomWritablePartition(null, null);
int previousReplicaCnt = testPartition.getReplicaIds().size();
// 1. find out nodes in local dc that host this partition
List<DataNodeId> localDcNodes = getPartitionDataNodesFromDc(testPartition, localDc);
// 2. then find a node in local dc that doesn't host this partition (this is the node we will add replica to)
Datacenter localDatacenter = testHardwareLayout.getHardwareLayout().getDatacenters().stream().filter(dc -> dc.getName().equals(localDc)).findFirst().get();
// since we didn't populate bootstrap replica map, we have to avoid adding replica to currentNode
DataNode nodeToAddReplica = localDatacenter.getDataNodes().stream().filter(node -> !localDcNodes.contains(node) && node != currentNode).findFirst().get();
testPartitionLayout.addReplicaToPartition(nodeToAddReplica, testPartition);
Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
// 3. We upgrade helix by adding new replica to the chosen node in local dc. This is to mock "replica addition" on
// chosen node and chosen node updates its instanceConfig in Helix. There should be 7 (= 6+1) replicas in the
// intermediate state.
helixCluster.upgradeWithNewPartitionLayout(partitionLayoutPath, HelixBootstrapUpgradeUtil.HelixAdminOperation.BootstrapCluster);
PartitionId partitionInManager = helixClusterManager.getAllPartitionIds(null).stream().filter(p -> p.toPathString().equals(testPartition.toPathString())).findFirst().get();
assertEquals("Replica count of testing partition is not correct", previousReplicaCnt + 1, partitionInManager.getReplicaIds().size());
// 4. find a replica (from same partition) in local dc that is not just added one
Replica oldReplica = (Replica) testPartition.getReplicaIds().stream().filter(r -> r.getDataNodeId().getDatacenterName().equals(localDc) && r.getDataNodeId() != nodeToAddReplica).findFirst().get();
testPartitionLayout.removeReplicaFromPartition(oldReplica);
Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
// 5. disable the replica in Helix to mock replica decommission which will remove it from InstanceConfig but keep it
// in IdealState temporarily.
helixCluster.upgradeWithNewPartitionLayout(partitionLayoutPath, HelixBootstrapUpgradeUtil.HelixAdminOperation.DisablePartition);
Set<ReplicaId> replicasInDifferentStates = new HashSet<>();
for (ReplicaState state : EnumSet.of(ReplicaState.STANDBY, ReplicaState.LEADER, ReplicaState.OFFLINE, ReplicaState.INACTIVE, ReplicaState.BOOTSTRAP)) {
replicasInDifferentStates.addAll(partitionInManager.getReplicaIdsByState(state, null));
}
// verify there is no null replica
replicasInDifferentStates.forEach(r -> assertNotNull("found null replica", r));
// 6. updates the IdealState in Helix. The number of replicas should become 6 again.
helixCluster.upgradeWithNewPartitionLayout(partitionLayoutPath, HelixBootstrapUpgradeUtil.HelixAdminOperation.UpdateIdealState);
assertEquals("Replica count of testing partition is not correct", previousReplicaCnt, partitionInManager.getReplicaIds().size());
helixClusterManager.close();
}
Aggregations