Search in sources :

Example 61 with HelixAdmin

use of org.apache.helix.HelixAdmin in project ambry by linkedin.

the class HelixBootstrapUpgradeUtil method updateClusterMapInHelix.

/**
 * Map the information in the layout files to Helix. Calling this method multiple times has no effect if the
 * information in the static files do not change. This tool is therefore safe to use for upgrades.
 *
 * Instead of defining the entire cluster under a single resource, or defining a resource for every partition, the
 * tool groups together partitions under resources, with a limit to the number of partitions that will be grouped
 * under a single resource.
 *
 * @param helixAdminFactory the {@link HelixAdminFactory} to use to instantiate {@link HelixAdmin}
 */
private void updateClusterMapInHelix(HelixAdminFactory helixAdminFactory) {
    initializeAdminsAndAddCluster(helixAdminFactory);
    HelixAdmin refAdmin = localDc != null ? adminForDc.get(localDc) : adminForDc.values().iterator().next();
    populateResourcesAndPartitionsSet(refAdmin);
    addNewDataNodes();
    long nextResource = existingResources.isEmpty() ? 1 : existingResources.last() + 1;
    List<Partition> partitionsUnderNextResource = new ArrayList<>();
    for (PartitionId partitionId : staticClusterMap.partitionLayout.getPartitions()) {
        Partition partition = (Partition) partitionId;
        if (existingPartitions.contains(partition.getId())) {
            updatePartitionInfoIfChanged(partition);
        } else {
            partitionsUnderNextResource.add(partition);
            if (partitionsUnderNextResource.size() == maxPartitionsInOneResource) {
                addNewAmbryPartitions(partitionsUnderNextResource, Long.toString(nextResource));
                partitionsUnderNextResource.clear();
                nextResource++;
            }
        }
    }
    if (!partitionsUnderNextResource.isEmpty()) {
        addNewAmbryPartitions(partitionsUnderNextResource, Long.toString(nextResource));
    }
}
Also used : ArrayList(java.util.ArrayList) HelixAdmin(org.apache.helix.HelixAdmin)

Example 62 with HelixAdmin

use of org.apache.helix.HelixAdmin in project ambry by linkedin.

the class HelixBootstrapUpgradeUtil method initializeAdminsAndAddCluster.

/**
 * Initialize a map of dataCenter to HelixAdmin based on the given zk Connect Strings.
 * @param helixAdminFactory the {@link HelixAdminFactory} to use to instantiate {@link HelixAdmin}
 */
private void initializeAdminsAndAddCluster(HelixAdminFactory helixAdminFactory) {
    for (Map.Entry<String, ClusterMapUtils.DcZkInfo> entry : dataCenterToZkAddress.entrySet()) {
        HelixAdmin admin = helixAdminFactory.getHelixAdmin(entry.getValue().getZkConnectStr());
        adminForDc.put(entry.getKey(), admin);
        // Add a cluster entry in every DC
        if (!admin.getClusters().contains(clusterName)) {
            admin.addCluster(clusterName);
            admin.addStateModelDef(clusterName, LeaderStandbySMD.name, LeaderStandbySMD.build());
        }
    }
}
Also used : HelixAdmin(org.apache.helix.HelixAdmin) HashMap(java.util.HashMap) Map(java.util.Map)

Example 63 with HelixAdmin

use of org.apache.helix.HelixAdmin in project ambry by linkedin.

the class HelixBootstrapUpgradeUtil method addNewAmbryPartitions.

/**
 * Adds all partitions to every datacenter with replicas in nodes as specified in the static clustermap (unless it
 * was already added).
 *
 * The assumption is that in the static layout, every partition is contained in every colo. We make this assumption
 * to ensure that partitions are grouped under the same resource in all colos (since the resource id is not
 * something that is present today in the static cluster map). This is not a strict requirement though, but helps
 * ease the logic.
 *
 * Note: 1. We ensure that the partition names are unique in the Ambry cluster even across resources.
 *       2. New Ambry partitions will not be added to Helix resources that are already present before the call to this
 *          method.
 */
private void addNewAmbryPartitions(List<Partition> partitions, String resourceName) {
    // resources are created and partitions are grouped under these resources upto a maximum threshold.
    if (partitions.isEmpty()) {
        throw new IllegalArgumentException("Cannot add resource with zero partitions");
    }
    for (Map.Entry<String, HelixAdmin> entry : adminForDc.entrySet()) {
        String dcName = entry.getKey();
        HelixAdmin dcAdmin = entry.getValue();
        AutoModeISBuilder resourceISBuilder = new AutoModeISBuilder(resourceName);
        int numReplicas = 0;
        resourceISBuilder.setStateModel(LeaderStandbySMD.name);
        for (Partition partition : partitions) {
            String partitionName = Long.toString(partition.getId());
            boolean sealed = partition.getPartitionState().equals(PartitionState.READ_ONLY);
            List<ReplicaId> replicaList = getReplicasInDc(partition, dcName);
            numReplicas = replicaList.size();
            String[] instances = updateInstancesAndGetInstanceNames(dcAdmin, partitionName, replicaList, sealed);
            Collections.shuffle(Arrays.asList(instances));
            resourceISBuilder.assignPreferenceList(partitionName, instances);
        }
        resourceISBuilder.setNumPartitions(partitions.size());
        resourceISBuilder.setNumReplica(numReplicas);
        IdealState idealState = resourceISBuilder.build();
        dcAdmin.addResource(clusterName, resourceName, idealState);
        System.out.println("Added " + partitions.size() + " new partitions under resource " + resourceName + " in datacenter " + dcName);
    }
}
Also used : HelixAdmin(org.apache.helix.HelixAdmin) IdealState(org.apache.helix.model.IdealState) AutoModeISBuilder(org.apache.helix.model.builder.AutoModeISBuilder) HashMap(java.util.HashMap) Map(java.util.Map)

Example 64 with HelixAdmin

use of org.apache.helix.HelixAdmin in project ambry by linkedin.

the class HelixBootstrapUpgradeUtil method getSealedPartitionsInHelixCluster.

/**
 * Get sealed partitions from Helix cluster.
 * @return a set of sealed partitions across all DCs.
 */
private Set<String> getSealedPartitionsInHelixCluster() throws Exception {
    info("Aggregating sealed partitions from cluster {} in Helix", clusterName);
    CountDownLatch sealedPartitionLatch = new CountDownLatch(adminForDc.size());
    AtomicInteger errorCount = new AtomicInteger();
    Map<String, Set<String>> dcToSealedPartitions = new ConcurrentHashMap<>();
    Map<String, Set<String>> nodeToNonExistentReplicas = new ConcurrentHashMap<>();
    for (Datacenter dc : staticClusterMap.hardwareLayout.getDatacenters()) {
        HelixAdmin admin = adminForDc.get(dc.getName());
        if (admin == null) {
            info("Skipping {}", dc.getName());
            continue;
        }
        ensureOrThrow(isClusterPresent(dc.getName()), "Cluster not found in ZK " + dataCenterToZkAddress.get(dc.getName()));
        Utils.newThread(() -> {
            try {
                getSealedPartitionsInDc(dc, dcToSealedPartitions, nodeToNonExistentReplicas);
            } catch (Throwable t) {
                logger.error("[{}] error message: {}", dc.getName().toUpperCase(), t.getMessage());
                errorCount.getAndIncrement();
            } finally {
                sealedPartitionLatch.countDown();
            }
        }, false).start();
    }
    sealedPartitionLatch.await(10, TimeUnit.MINUTES);
    ensureOrThrow(errorCount.get() == 0, "Error occurred when aggregating sealed partitions in cluster " + clusterName);
    Set<String> sealedPartitionsInCluster = new HashSet<>();
    info("========================= Summary =========================");
    for (Map.Entry<String, Set<String>> entry : dcToSealedPartitions.entrySet()) {
        info("Dc {} has {} sealed partitions.", entry.getKey(), entry.getValue().size());
        sealedPartitionsInCluster.addAll(entry.getValue());
    }
    info("========================= Sealed Partitions across All DCs =========================");
    info("Total number of sealed partitions in cluster = {}", sealedPartitionsInCluster.size());
    info("Sealed partitions are {}", sealedPartitionsInCluster.toString());
    if (!nodeToNonExistentReplicas.isEmpty()) {
        info("Following {} nodes have sealed replica that are not actually present", nodeToNonExistentReplicas.size());
        for (Map.Entry<String, Set<String>> entry : nodeToNonExistentReplicas.entrySet()) {
            info("{} has non-existent replicas: {}", entry.getKey(), entry.getValue().toString());
        }
    }
    info("Successfully aggregate sealed from cluster {} in Helix", clusterName);
    return sealedPartitionsInCluster;
}
Also used : SortedSet(java.util.SortedSet) EnumSet(java.util.EnumSet) Set(java.util.Set) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) CountDownLatch(java.util.concurrent.CountDownLatch) HelixAdmin(org.apache.helix.HelixAdmin) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) HashSet(java.util.HashSet)

Example 65 with HelixAdmin

use of org.apache.helix.HelixAdmin in project ambry by linkedin.

the class HelixBootstrapUpgradeUtil method verifyResourcesAndPartitionEquivalencyInDc.

/**
 * Verify that the partition layout information is in sync.
 * @param dc the datacenter whose information is to be verified.
 * @param clusterName the cluster to be verified.
 * @param partitionLayout the {@link PartitionLayout} of the static clustermap.
 */
private void verifyResourcesAndPartitionEquivalencyInDc(Datacenter dc, String clusterName, PartitionLayout partitionLayout) {
    String dcName = dc.getName();
    HelixAdmin admin = adminForDc.get(dc.getName());
    Map<String, Set<String>> allPartitionsToInstancesInHelix = new HashMap<>();
    for (String resourceName : admin.getResourcesInCluster(clusterName)) {
        if (!resourceName.matches("\\d+")) {
            info("[{}] Ignoring resource {} as it is not part of the cluster map", dcName.toUpperCase(), resourceName);
            continue;
        }
        IdealState resourceIS = admin.getResourceIdealState(clusterName, resourceName);
        ensureOrThrow(resourceIS.getStateModelDefRef().equals(stateModelDef), "[" + dcName.toUpperCase() + "] StateModel name mismatch for resource " + resourceName);
        Set<String> resourcePartitions = resourceIS.getPartitionSet();
        for (String resourcePartition : resourcePartitions) {
            Set<String> partitionInstanceSet = resourceIS.getInstanceSet(resourcePartition);
            ensureOrThrow(allPartitionsToInstancesInHelix.put(resourcePartition, partitionInstanceSet) == null, "[" + dcName.toUpperCase() + "] Partition " + resourcePartition + " already found under a different resource.");
        }
    }
    for (PartitionId partitionId : partitionLayout.getPartitions(null)) {
        Partition partition = (Partition) partitionId;
        String partitionName = Long.toString(partition.getId());
        Set<String> replicaHostsInHelix = allPartitionsToInstancesInHelix.remove(partitionName);
        Set<String> expectedInHelix = new HashSet<>();
        List<ReplicaId> replicasInStatic = partition.getReplicas().stream().filter(replica -> replica.getDataNodeId().getDatacenterName().equals(dcName)).collect(Collectors.toList());
        ensureOrThrow(replicasInStatic.size() == 0 || replicaHostsInHelix != null, "[" + dcName.toUpperCase() + "] No replicas found for partition " + partitionName + " in Helix");
        for (ReplicaId replica : replicasInStatic) {
            String instanceName = getInstanceName(replica.getDataNodeId());
            expectedInHelix.add(instanceName);
            ensureOrThrow(replicaHostsInHelix.remove(instanceName), "[" + dcName.toUpperCase() + "] Instance " + instanceName + " for the given replica in the clustermap not found in Helix");
        }
        if (!expectMoreInHelixDuringValidate) {
            ensureOrThrow(replicaHostsInHelix == null || replicaHostsInHelix.isEmpty(), "[" + dcName.toUpperCase() + "] More instances in Helix than in clustermap for partition: " + partitionName + ", expected: " + expectedInHelix + ", found additional instances: " + replicaHostsInHelix);
        }
    }
    if (expectMoreInHelixDuringValidate) {
        ensureOrThrow(allPartitionsToInstancesInHelix.keySet().equals(partitionsNotForceRemovedByDc.getOrDefault(dcName, new HashSet<>())), "[" + dcName.toUpperCase() + "] Additional partitions in Helix: " + allPartitionsToInstancesInHelix.keySet() + " not what is expected " + partitionsNotForceRemovedByDc.get(dcName));
        info("[{}] *** Helix may have more partitions or replicas than in the given clustermap as removals were not forced.", dcName.toUpperCase());
    } else {
        ensureOrThrow(allPartitionsToInstancesInHelix.isEmpty(), "[" + dcName.toUpperCase() + "] More partitions in Helix than in clustermap, additional partitions: " + allPartitionsToInstancesInHelix.keySet());
    }
    info("[{}] Successfully verified resources and partitions equivalency in dc {}", dcName.toUpperCase(), dcName);
}
Also used : Arrays(java.util.Arrays) ClusterMapUtils(com.github.ambry.clustermap.ClusterMapUtils) SortedSet(java.util.SortedSet) LoggerFactory(org.slf4j.LoggerFactory) JSONException(org.json.JSONException) JSONObject(org.json.JSONObject) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) SharedZkClientFactory(org.apache.helix.zookeeper.impl.factory.SharedZkClientFactory) AccessOption(org.apache.helix.AccessOption) EnumSet(java.util.EnumSet) LeaderStandbySMD(org.apache.helix.model.LeaderStandbySMD) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ZKUtil(org.apache.helix.manager.zk.ZKUtil) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) HelixPropertyStoreConfig(com.github.ambry.config.HelixPropertyStoreConfig) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Optional(java.util.Optional) IdealState(org.apache.helix.model.IdealState) CommonUtils(com.github.ambry.commons.CommonUtils) HashMap(java.util.HashMap) HelixZkClient(org.apache.helix.zookeeper.api.client.HelixZkClient) ResourceConfig(org.apache.helix.model.ResourceConfig) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) HelixPropertyStore(org.apache.helix.store.HelixPropertyStore) RealmAwareZkClient(org.apache.helix.zookeeper.api.client.RealmAwareZkClient) DataNodeConfigSourceType(com.github.ambry.clustermap.DataNodeConfigSourceType) StateModelDefinition(org.apache.helix.model.StateModelDefinition) Properties(java.util.Properties) Logger(org.slf4j.Logger) VerifiableProperties(com.github.ambry.config.VerifiableProperties) IOException(java.io.IOException) InstanceConfig(org.apache.helix.model.InstanceConfig) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) HelixAdmin(org.apache.helix.HelixAdmin) TreeMap(java.util.TreeMap) ZNRecordSerializer(org.apache.helix.manager.zk.ZNRecordSerializer) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) Comparator(java.util.Comparator) Collections(java.util.Collections) SortedSet(java.util.SortedSet) EnumSet(java.util.EnumSet) Set(java.util.Set) TreeSet(java.util.TreeSet) HashSet(java.util.HashSet) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) HelixAdmin(org.apache.helix.HelixAdmin) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) IdealState(org.apache.helix.model.IdealState) HashSet(java.util.HashSet)

Aggregations

HelixAdmin (org.apache.helix.HelixAdmin)104 IdealState (org.apache.helix.model.IdealState)44 ZKHelixAdmin (org.apache.helix.manager.zk.ZKHelixAdmin)43 Test (org.testng.annotations.Test)40 HashMap (java.util.HashMap)30 ZNRecord (org.apache.helix.ZNRecord)28 Date (java.util.Date)22 InstanceConfig (org.apache.helix.model.InstanceConfig)22 ArrayList (java.util.ArrayList)18 Map (java.util.Map)17 HashSet (java.util.HashSet)16 ExternalView (org.apache.helix.model.ExternalView)16 StateModelDefinition (org.apache.helix.model.StateModelDefinition)16 IOException (java.io.IOException)13 ClusterControllerManager (org.apache.helix.integration.manager.ClusterControllerManager)13 HelixDataAccessor (org.apache.helix.HelixDataAccessor)12 MockParticipantManager (org.apache.helix.integration.manager.MockParticipantManager)12 TreeMap (java.util.TreeMap)11 PropertyKey (org.apache.helix.PropertyKey)11 HelixConfigScopeBuilder (org.apache.helix.model.builder.HelixConfigScopeBuilder)11