use of org.apache.helix.HelixAdmin in project ambry by linkedin.
the class HelixBootstrapUpgradeUtil method updateClusterMapInHelix.
/**
* Map the information in the layout files to Helix. Calling this method multiple times has no effect if the
* information in the static files do not change. This tool is therefore safe to use for upgrades.
*
* Instead of defining the entire cluster under a single resource, or defining a resource for every partition, the
* tool groups together partitions under resources, with a limit to the number of partitions that will be grouped
* under a single resource.
*
* @param helixAdminFactory the {@link HelixAdminFactory} to use to instantiate {@link HelixAdmin}
*/
private void updateClusterMapInHelix(HelixAdminFactory helixAdminFactory) {
initializeAdminsAndAddCluster(helixAdminFactory);
HelixAdmin refAdmin = localDc != null ? adminForDc.get(localDc) : adminForDc.values().iterator().next();
populateResourcesAndPartitionsSet(refAdmin);
addNewDataNodes();
long nextResource = existingResources.isEmpty() ? 1 : existingResources.last() + 1;
List<Partition> partitionsUnderNextResource = new ArrayList<>();
for (PartitionId partitionId : staticClusterMap.partitionLayout.getPartitions()) {
Partition partition = (Partition) partitionId;
if (existingPartitions.contains(partition.getId())) {
updatePartitionInfoIfChanged(partition);
} else {
partitionsUnderNextResource.add(partition);
if (partitionsUnderNextResource.size() == maxPartitionsInOneResource) {
addNewAmbryPartitions(partitionsUnderNextResource, Long.toString(nextResource));
partitionsUnderNextResource.clear();
nextResource++;
}
}
}
if (!partitionsUnderNextResource.isEmpty()) {
addNewAmbryPartitions(partitionsUnderNextResource, Long.toString(nextResource));
}
}
use of org.apache.helix.HelixAdmin in project ambry by linkedin.
the class HelixBootstrapUpgradeUtil method initializeAdminsAndAddCluster.
/**
* Initialize a map of dataCenter to HelixAdmin based on the given zk Connect Strings.
* @param helixAdminFactory the {@link HelixAdminFactory} to use to instantiate {@link HelixAdmin}
*/
private void initializeAdminsAndAddCluster(HelixAdminFactory helixAdminFactory) {
for (Map.Entry<String, ClusterMapUtils.DcZkInfo> entry : dataCenterToZkAddress.entrySet()) {
HelixAdmin admin = helixAdminFactory.getHelixAdmin(entry.getValue().getZkConnectStr());
adminForDc.put(entry.getKey(), admin);
// Add a cluster entry in every DC
if (!admin.getClusters().contains(clusterName)) {
admin.addCluster(clusterName);
admin.addStateModelDef(clusterName, LeaderStandbySMD.name, LeaderStandbySMD.build());
}
}
}
use of org.apache.helix.HelixAdmin in project ambry by linkedin.
the class HelixBootstrapUpgradeUtil method addNewAmbryPartitions.
/**
* Adds all partitions to every datacenter with replicas in nodes as specified in the static clustermap (unless it
* was already added).
*
* The assumption is that in the static layout, every partition is contained in every colo. We make this assumption
* to ensure that partitions are grouped under the same resource in all colos (since the resource id is not
* something that is present today in the static cluster map). This is not a strict requirement though, but helps
* ease the logic.
*
* Note: 1. We ensure that the partition names are unique in the Ambry cluster even across resources.
* 2. New Ambry partitions will not be added to Helix resources that are already present before the call to this
* method.
*/
private void addNewAmbryPartitions(List<Partition> partitions, String resourceName) {
// resources are created and partitions are grouped under these resources upto a maximum threshold.
if (partitions.isEmpty()) {
throw new IllegalArgumentException("Cannot add resource with zero partitions");
}
for (Map.Entry<String, HelixAdmin> entry : adminForDc.entrySet()) {
String dcName = entry.getKey();
HelixAdmin dcAdmin = entry.getValue();
AutoModeISBuilder resourceISBuilder = new AutoModeISBuilder(resourceName);
int numReplicas = 0;
resourceISBuilder.setStateModel(LeaderStandbySMD.name);
for (Partition partition : partitions) {
String partitionName = Long.toString(partition.getId());
boolean sealed = partition.getPartitionState().equals(PartitionState.READ_ONLY);
List<ReplicaId> replicaList = getReplicasInDc(partition, dcName);
numReplicas = replicaList.size();
String[] instances = updateInstancesAndGetInstanceNames(dcAdmin, partitionName, replicaList, sealed);
Collections.shuffle(Arrays.asList(instances));
resourceISBuilder.assignPreferenceList(partitionName, instances);
}
resourceISBuilder.setNumPartitions(partitions.size());
resourceISBuilder.setNumReplica(numReplicas);
IdealState idealState = resourceISBuilder.build();
dcAdmin.addResource(clusterName, resourceName, idealState);
System.out.println("Added " + partitions.size() + " new partitions under resource " + resourceName + " in datacenter " + dcName);
}
}
use of org.apache.helix.HelixAdmin in project ambry by linkedin.
the class HelixBootstrapUpgradeUtil method getSealedPartitionsInHelixCluster.
/**
* Get sealed partitions from Helix cluster.
* @return a set of sealed partitions across all DCs.
*/
private Set<String> getSealedPartitionsInHelixCluster() throws Exception {
info("Aggregating sealed partitions from cluster {} in Helix", clusterName);
CountDownLatch sealedPartitionLatch = new CountDownLatch(adminForDc.size());
AtomicInteger errorCount = new AtomicInteger();
Map<String, Set<String>> dcToSealedPartitions = new ConcurrentHashMap<>();
Map<String, Set<String>> nodeToNonExistentReplicas = new ConcurrentHashMap<>();
for (Datacenter dc : staticClusterMap.hardwareLayout.getDatacenters()) {
HelixAdmin admin = adminForDc.get(dc.getName());
if (admin == null) {
info("Skipping {}", dc.getName());
continue;
}
ensureOrThrow(isClusterPresent(dc.getName()), "Cluster not found in ZK " + dataCenterToZkAddress.get(dc.getName()));
Utils.newThread(() -> {
try {
getSealedPartitionsInDc(dc, dcToSealedPartitions, nodeToNonExistentReplicas);
} catch (Throwable t) {
logger.error("[{}] error message: {}", dc.getName().toUpperCase(), t.getMessage());
errorCount.getAndIncrement();
} finally {
sealedPartitionLatch.countDown();
}
}, false).start();
}
sealedPartitionLatch.await(10, TimeUnit.MINUTES);
ensureOrThrow(errorCount.get() == 0, "Error occurred when aggregating sealed partitions in cluster " + clusterName);
Set<String> sealedPartitionsInCluster = new HashSet<>();
info("========================= Summary =========================");
for (Map.Entry<String, Set<String>> entry : dcToSealedPartitions.entrySet()) {
info("Dc {} has {} sealed partitions.", entry.getKey(), entry.getValue().size());
sealedPartitionsInCluster.addAll(entry.getValue());
}
info("========================= Sealed Partitions across All DCs =========================");
info("Total number of sealed partitions in cluster = {}", sealedPartitionsInCluster.size());
info("Sealed partitions are {}", sealedPartitionsInCluster.toString());
if (!nodeToNonExistentReplicas.isEmpty()) {
info("Following {} nodes have sealed replica that are not actually present", nodeToNonExistentReplicas.size());
for (Map.Entry<String, Set<String>> entry : nodeToNonExistentReplicas.entrySet()) {
info("{} has non-existent replicas: {}", entry.getKey(), entry.getValue().toString());
}
}
info("Successfully aggregate sealed from cluster {} in Helix", clusterName);
return sealedPartitionsInCluster;
}
use of org.apache.helix.HelixAdmin in project ambry by linkedin.
the class HelixBootstrapUpgradeUtil method verifyResourcesAndPartitionEquivalencyInDc.
/**
* Verify that the partition layout information is in sync.
* @param dc the datacenter whose information is to be verified.
* @param clusterName the cluster to be verified.
* @param partitionLayout the {@link PartitionLayout} of the static clustermap.
*/
private void verifyResourcesAndPartitionEquivalencyInDc(Datacenter dc, String clusterName, PartitionLayout partitionLayout) {
String dcName = dc.getName();
HelixAdmin admin = adminForDc.get(dc.getName());
Map<String, Set<String>> allPartitionsToInstancesInHelix = new HashMap<>();
for (String resourceName : admin.getResourcesInCluster(clusterName)) {
if (!resourceName.matches("\\d+")) {
info("[{}] Ignoring resource {} as it is not part of the cluster map", dcName.toUpperCase(), resourceName);
continue;
}
IdealState resourceIS = admin.getResourceIdealState(clusterName, resourceName);
ensureOrThrow(resourceIS.getStateModelDefRef().equals(stateModelDef), "[" + dcName.toUpperCase() + "] StateModel name mismatch for resource " + resourceName);
Set<String> resourcePartitions = resourceIS.getPartitionSet();
for (String resourcePartition : resourcePartitions) {
Set<String> partitionInstanceSet = resourceIS.getInstanceSet(resourcePartition);
ensureOrThrow(allPartitionsToInstancesInHelix.put(resourcePartition, partitionInstanceSet) == null, "[" + dcName.toUpperCase() + "] Partition " + resourcePartition + " already found under a different resource.");
}
}
for (PartitionId partitionId : partitionLayout.getPartitions(null)) {
Partition partition = (Partition) partitionId;
String partitionName = Long.toString(partition.getId());
Set<String> replicaHostsInHelix = allPartitionsToInstancesInHelix.remove(partitionName);
Set<String> expectedInHelix = new HashSet<>();
List<ReplicaId> replicasInStatic = partition.getReplicas().stream().filter(replica -> replica.getDataNodeId().getDatacenterName().equals(dcName)).collect(Collectors.toList());
ensureOrThrow(replicasInStatic.size() == 0 || replicaHostsInHelix != null, "[" + dcName.toUpperCase() + "] No replicas found for partition " + partitionName + " in Helix");
for (ReplicaId replica : replicasInStatic) {
String instanceName = getInstanceName(replica.getDataNodeId());
expectedInHelix.add(instanceName);
ensureOrThrow(replicaHostsInHelix.remove(instanceName), "[" + dcName.toUpperCase() + "] Instance " + instanceName + " for the given replica in the clustermap not found in Helix");
}
if (!expectMoreInHelixDuringValidate) {
ensureOrThrow(replicaHostsInHelix == null || replicaHostsInHelix.isEmpty(), "[" + dcName.toUpperCase() + "] More instances in Helix than in clustermap for partition: " + partitionName + ", expected: " + expectedInHelix + ", found additional instances: " + replicaHostsInHelix);
}
}
if (expectMoreInHelixDuringValidate) {
ensureOrThrow(allPartitionsToInstancesInHelix.keySet().equals(partitionsNotForceRemovedByDc.getOrDefault(dcName, new HashSet<>())), "[" + dcName.toUpperCase() + "] Additional partitions in Helix: " + allPartitionsToInstancesInHelix.keySet() + " not what is expected " + partitionsNotForceRemovedByDc.get(dcName));
info("[{}] *** Helix may have more partitions or replicas than in the given clustermap as removals were not forced.", dcName.toUpperCase());
} else {
ensureOrThrow(allPartitionsToInstancesInHelix.isEmpty(), "[" + dcName.toUpperCase() + "] More partitions in Helix than in clustermap, additional partitions: " + allPartitionsToInstancesInHelix.keySet());
}
info("[{}] Successfully verified resources and partitions equivalency in dc {}", dcName.toUpperCase(), dcName);
}
Aggregations