Search in sources :

Example 96 with InstanceConfig

use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.

the class HelixBootstrapUpgradeUtil method controlPartitionState.

/**
 * Control state of partition on certain node. (i.e. DisablePartition, EnablePartition)
 */
private void controlPartitionState() {
    // in adminForDc map.
    if (adminForDc.size() != 1) {
        throw new IllegalStateException("The dc count is not 1 for partition state control operation");
    }
    HelixAdmin helixAdmin = adminForDc.values().iterator().next();
    String instanceName;
    if (portNum == null) {
        Optional<DataNodeId> optionalDataNode = staticClusterMap.getDataNodeIds().stream().filter(node -> node.getHostname().equals(hostName)).findFirst();
        if (!optionalDataNode.isPresent()) {
            throw new IllegalStateException("Host " + hostName + " is not found in static clustermap");
        }
        DataNode dataNode = (DataNode) optionalDataNode.get();
        instanceName = getInstanceName(dataNode);
    } else {
        instanceName = ClusterMapUtils.getInstanceName(hostName, portNum);
    }
    InstanceConfig instanceConfig = helixAdmin.getInstanceConfig(clusterName, instanceName);
    String resourceNameForPartition = getResourceNameOfPartition(helixAdmin, clusterName, partitionName);
    info("{} partition {} under resource {} on node {}", helixAdminOperation == HelixAdminOperation.EnablePartition ? "Enabling" : "Disabling", partitionName, resourceNameForPartition, instanceName);
    instanceConfig.setInstanceEnabledForPartition(resourceNameForPartition, partitionName, helixAdminOperation == HelixAdminOperation.EnablePartition);
    // clean up the disabled partition entry if it exists and is empty.
    Map<String, String> disabledPartitions = instanceConfig.getRecord().getMapFields().get(HELIX_DISABLED_PARTITION_STR);
    if (disabledPartitions != null && disabledPartitions.isEmpty()) {
        instanceConfig.getRecord().getMapFields().remove(HELIX_DISABLED_PARTITION_STR);
    }
    helixAdmin.setInstanceConfig(clusterName, instanceName, instanceConfig);
    instancesUpdated.getAndIncrement();
    if (helixAdminOperation == HelixAdminOperation.EnablePartition) {
        partitionsEnabled.getAndIncrement();
    } else {
        partitionsDisabled.getAndIncrement();
    }
}
Also used : Arrays(java.util.Arrays) ClusterMapUtils(com.github.ambry.clustermap.ClusterMapUtils) SortedSet(java.util.SortedSet) LoggerFactory(org.slf4j.LoggerFactory) JSONException(org.json.JSONException) JSONObject(org.json.JSONObject) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) SharedZkClientFactory(org.apache.helix.zookeeper.impl.factory.SharedZkClientFactory) AccessOption(org.apache.helix.AccessOption) EnumSet(java.util.EnumSet) LeaderStandbySMD(org.apache.helix.model.LeaderStandbySMD) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ZKUtil(org.apache.helix.manager.zk.ZKUtil) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) HelixPropertyStoreConfig(com.github.ambry.config.HelixPropertyStoreConfig) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord) CountDownLatch(java.util.concurrent.CountDownLatch) List(java.util.List) Optional(java.util.Optional) IdealState(org.apache.helix.model.IdealState) CommonUtils(com.github.ambry.commons.CommonUtils) HashMap(java.util.HashMap) HelixZkClient(org.apache.helix.zookeeper.api.client.HelixZkClient) ResourceConfig(org.apache.helix.model.ResourceConfig) TreeSet(java.util.TreeSet) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) HelixPropertyStore(org.apache.helix.store.HelixPropertyStore) RealmAwareZkClient(org.apache.helix.zookeeper.api.client.RealmAwareZkClient) DataNodeConfigSourceType(com.github.ambry.clustermap.DataNodeConfigSourceType) StateModelDefinition(org.apache.helix.model.StateModelDefinition) Properties(java.util.Properties) Logger(org.slf4j.Logger) VerifiableProperties(com.github.ambry.config.VerifiableProperties) IOException(java.io.IOException) InstanceConfig(org.apache.helix.model.InstanceConfig) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) HelixAdmin(org.apache.helix.HelixAdmin) TreeMap(java.util.TreeMap) ZNRecordSerializer(org.apache.helix.manager.zk.ZNRecordSerializer) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) Comparator(java.util.Comparator) Collections(java.util.Collections) InstanceConfig(org.apache.helix.model.InstanceConfig) HelixAdmin(org.apache.helix.HelixAdmin) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin)

Example 97 with InstanceConfig

use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.

the class HelixParticipant method setPartitionDisabledState.

/**
 * Disable/enable partition on local node. This method will update both InstanceConfig and DataNodeConfig in PropertyStore.
 * @param partitionName name of partition on local node
 * @param disable if {@code true}, disable given partition on current node. {@code false} otherwise.
 */
protected void setPartitionDisabledState(String partitionName, boolean disable) {
    synchronized (helixAdministrationLock) {
        // 1. update disabled replica list in DataNodeConfig. This modifies ListFields only
        boolean dataNodeConfigChanged = false;
        DataNodeConfig dataNodeConfig = getDataNodeConfig();
        if (!disable && dataNodeConfig.getDisabledReplicas().remove(partitionName)) {
            logger.info("Removing the partition {} from disabledReplicas list", partitionName);
            dataNodeConfigChanged = true;
        } else if (disable && dataNodeConfig.getDisabledReplicas().add(partitionName)) {
            logger.info("Adding the partition {} to disabledReplicas list", partitionName);
            dataNodeConfigChanged = true;
        }
        if (dataNodeConfigChanged) {
            logger.info("Setting config with list of disabled replicas: {}", dataNodeConfig.getDisabledReplicas());
            if (!dataNodeConfigSource.set(dataNodeConfig)) {
                participantMetrics.setReplicaDisabledStateErrorCount.inc();
                logger.warn("setReplicaDisabledState() failed DataNodeConfig update");
            }
            // 2. If the DataNodeConfig was changed, invoke Helix native method to enable/disable partition on local node,
            // this will trigger subsequent state transition on given replica. This method modifies MapFields in
            // InstanceConfig.
            InstanceConfig instanceConfig = getInstanceConfig();
            String resourceNameForPartition = getResourceNameOfPartition(helixAdmin, clusterName, partitionName);
            logger.info("{} replica {} on current node", disable ? "Disabling" : "Enabling", partitionName);
            instanceConfig.setInstanceEnabledForPartition(resourceNameForPartition, partitionName, !disable);
            if (!helixAdmin.setInstanceConfig(clusterName, instanceName, instanceConfig)) {
                participantMetrics.setReplicaDisabledStateErrorCount.inc();
                logger.warn("setReplicaDisabledState() failed InstanceConfig update");
            }
        }
        logger.info("Disabled state of partition {} is updated", partitionName);
    }
}
Also used : InstanceConfig(org.apache.helix.model.InstanceConfig)

Example 98 with InstanceConfig

use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.

the class HelixBootstrapUpgradeUtil method migrateToPropertyStore.

/**
 * Convert instance configs to the new DataNodeConfig format and persist them in the property store.
 */
private void migrateToPropertyStore() throws InterruptedException {
    CountDownLatch migrationComplete = new CountDownLatch(adminForDc.size());
    // different DCs can be migrated in parallel
    adminForDc.forEach((dcName, helixAdmin) -> Utils.newThread(() -> {
        try {
            logger.info("Starting property store migration in {}", dcName);
            ClusterMapConfig config = getClusterMapConfig(clusterName, dcName, null);
            InstanceConfigToDataNodeConfigAdapter.Converter instanceConfigConverter = new InstanceConfigToDataNodeConfigAdapter.Converter(config);
            String zkConnectStr = dataCenterToZkAddress.get(dcName).getZkConnectStrs().get(0);
            try (DataNodeConfigSource source = new PropertyStoreToDataNodeConfigAdapter(zkConnectStr, config)) {
                List<String> instanceNames = helixAdmin.getInstancesInCluster(clusterName);
                logger.info("Found {} instances in cluster", instanceNames.size());
                instanceNames.forEach(instanceName -> {
                    logger.info("Copying config for node {}", instanceName);
                    InstanceConfig instanceConfig = helixAdmin.getInstanceConfig(clusterName, instanceName);
                    DataNodeConfig dataNodeConfig = instanceConfigConverter.convert(instanceConfig);
                    logger.debug("Writing {} to property store in {}", dataNodeConfig, dcName);
                    if (!source.set(dataNodeConfig)) {
                        logger.error("Failed to persist config for node {} in the property store.", dataNodeConfig.getInstanceName());
                    }
                });
            }
            logger.info("Successfully migrated to property store in {}", dcName);
        } catch (Throwable t) {
            logger.error("Error while migrating to property store in {}", dcName, t);
        } finally {
            migrationComplete.countDown();
        }
    }, false).start());
    migrationComplete.await();
}
Also used : InstanceConfig(org.apache.helix.model.InstanceConfig) CountDownLatch(java.util.concurrent.CountDownLatch) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig)

Example 99 with InstanceConfig

use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.

the class HelixVcrClusterParticipant method participate.

@Override
public void participate() throws Exception {
    manager = HelixManagerFactory.getZKHelixManager(vcrClusterName, vcrInstanceName, InstanceType.PARTICIPANT, cloudConfig.vcrClusterZkConnectString);
    VcrStateModelFactory stateModelFactory = Utils.getObj(cloudConfig.vcrHelixStateModelFactoryClass, this);
    manager.getStateMachineEngine().registerStateModelFactory(stateModelFactory.getStateModelName(), stateModelFactory);
    if (cloudConfig.cloudContainerCompactionEnabled) {
        registerContainerDeletionSyncTask(manager.getStateMachineEngine());
    }
    manager.connect();
    helixAdmin = manager.getClusterManagmentTool();
    InstanceConfig instanceConfig = helixAdmin.getInstanceConfig(vcrClusterName, vcrInstanceName);
    if (!instanceConfig.getRecord().getBooleanField(VCR_HELIX_CONFIG_READY, false)) {
        if (cloudConfig.vcrSslPort != null) {
            instanceConfig.getRecord().setSimpleField(SSL_PORT_STR, Integer.toString(cloudConfig.vcrSslPort));
        }
        if (cloudConfig.vcrHttp2Port != null) {
            instanceConfig.getRecord().setSimpleField(HTTP2_PORT_STR, Integer.toString(cloudConfig.vcrHttp2Port));
        }
        // Set HELIX_ENABLED to be true. Listeners take action only when this value is True.
        instanceConfig.getRecord().setBooleanField(VCR_HELIX_CONFIG_READY, true);
        logger.info("Set VCR_HELIX_CONFIG_READY to true.");
    } else {
        logger.info("VCR_HELIX_CONFIG_READY is true.");
    }
    helixAdmin.setInstanceConfig(vcrClusterName, vcrInstanceName, instanceConfig);
    logger.info("Participated in HelixVcrCluster successfully.");
}
Also used : InstanceConfig(org.apache.helix.model.InstanceConfig)

Example 100 with InstanceConfig

use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.

the class ClusterChangeHandlerTest method moveReplicaTest.

/**
 * Test the case where a current replica is moved between existing nodes.
 */
@Test
public void moveReplicaTest() throws Exception {
    // create a HelixClusterManager with DynamicClusterChangeHandler
    Properties properties = new Properties();
    properties.putAll(props);
    properties.setProperty("clustermap.cluster.change.handler.type", "DynamicClusterChangeHandler");
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(properties));
    HelixClusterManager helixClusterManager = new HelixClusterManager(clusterMapConfig, selfInstanceName, helixManagerFactory, new MetricRegistry());
    // pick a partition and move one of its replicas
    Partition testPartition = (Partition) testPartitionLayout.getPartitionLayout().getRandomWritablePartition(null, null);
    int previousReplicaCnt = testPartition.getReplicaIds().size();
    // 1. find out nodes in local dc that host this partition
    List<DataNodeId> localDcNodes = getPartitionDataNodesFromDc(testPartition, localDc);
    // 2. then find a node in local dc that doesn't host this partition (this is the node we will add replica to)
    Datacenter localDatacenter = testHardwareLayout.getHardwareLayout().getDatacenters().stream().filter(dc -> dc.getName().equals(localDc)).findFirst().get();
    // since we didn't populate bootstrap replica map, we have to avoid adding replica to currentNode
    DataNode nodeToAddReplica = localDatacenter.getDataNodes().stream().filter(node -> !localDcNodes.contains(node) && node != currentNode).findFirst().get();
    testPartitionLayout.addReplicaToPartition(nodeToAddReplica, testPartition);
    Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
    // 3. We upgrade helix by adding new replica to the chosen node in local dc. This is to mock "replica addition" on
    // chosen node and chosen node updates its instanceConfig in Helix. There should be 7 (= 6+1) replicas in the
    // intermediate state.
    helixCluster.upgradeWithNewPartitionLayout(partitionLayoutPath, HelixBootstrapUpgradeUtil.HelixAdminOperation.BootstrapCluster);
    PartitionId partitionInManager = helixClusterManager.getAllPartitionIds(null).stream().filter(p -> p.toPathString().equals(testPartition.toPathString())).findFirst().get();
    assertEquals("Replica count of testing partition is not correct", previousReplicaCnt + 1, partitionInManager.getReplicaIds().size());
    // 4. find a replica (from same partition) in local dc that is not just added one
    Replica oldReplica = (Replica) testPartition.getReplicaIds().stream().filter(r -> r.getDataNodeId().getDatacenterName().equals(localDc) && r.getDataNodeId() != nodeToAddReplica).findFirst().get();
    testPartitionLayout.removeReplicaFromPartition(oldReplica);
    Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
    // 5. disable the replica in Helix to mock replica decommission which will remove it from InstanceConfig but keep it
    // in IdealState temporarily.
    helixCluster.upgradeWithNewPartitionLayout(partitionLayoutPath, HelixBootstrapUpgradeUtil.HelixAdminOperation.DisablePartition);
    Set<ReplicaId> replicasInDifferentStates = new HashSet<>();
    for (ReplicaState state : EnumSet.of(ReplicaState.STANDBY, ReplicaState.LEADER, ReplicaState.OFFLINE, ReplicaState.INACTIVE, ReplicaState.BOOTSTRAP)) {
        replicasInDifferentStates.addAll(partitionInManager.getReplicaIdsByState(state, null));
    }
    // verify there is no null replica
    replicasInDifferentStates.forEach(r -> assertNotNull("found null replica", r));
    // 6. updates the IdealState in Helix. The number of replicas should become 6 again.
    helixCluster.upgradeWithNewPartitionLayout(partitionLayoutPath, HelixBootstrapUpgradeUtil.HelixAdminOperation.UpdateIdealState);
    assertEquals("Replica count of testing partition is not correct", previousReplicaCnt, partitionInManager.getReplicaIds().size());
    helixClusterManager.close();
}
Also used : Arrays(java.util.Arrays) ClusterMapUtils(com.github.ambry.clustermap.ClusterMapUtils) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) Random(java.util.Random) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) TestUtils(com.github.ambry.clustermap.TestUtils) JSONObject(org.json.JSONObject) Map(java.util.Map) After(org.junit.After) Counter(com.codahale.metrics.Counter) Assume(org.junit.Assume) EnumSet(java.util.EnumSet) Parameterized(org.junit.runners.Parameterized) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) Files(java.nio.file.Files) VerifiableProperties(com.github.ambry.config.VerifiableProperties) Set(java.util.Set) Utils(com.github.ambry.utils.Utils) IOException(java.io.IOException) Test(org.junit.Test) Collectors(java.util.stream.Collectors) InstanceConfig(org.apache.helix.model.InstanceConfig) File(java.io.File) AdditionalAnswers(org.mockito.AdditionalAnswers) ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord) AtomicLong(java.util.concurrent.atomic.AtomicLong) Mockito(org.mockito.Mockito) List(java.util.List) HelixClusterManagerTest(com.github.ambry.clustermap.HelixClusterManagerTest) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) Assert(org.junit.Assert) Collections(java.util.Collections) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) VerifiableProperties(com.github.ambry.config.VerifiableProperties) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) HashSet(java.util.HashSet) Test(org.junit.Test) HelixClusterManagerTest(com.github.ambry.clustermap.HelixClusterManagerTest)

Aggregations

InstanceConfig (org.apache.helix.model.InstanceConfig)149 ArrayList (java.util.ArrayList)40 Test (org.testng.annotations.Test)35 HashMap (java.util.HashMap)32 HashSet (java.util.HashSet)28 ZNRecord (org.apache.helix.ZNRecord)26 IdealState (org.apache.helix.model.IdealState)24 ExternalView (org.apache.helix.model.ExternalView)23 Map (java.util.Map)21 HelixException (org.apache.helix.HelixException)21 HelixAdmin (org.apache.helix.HelixAdmin)20 List (java.util.List)19 ZKHelixAdmin (org.apache.helix.manager.zk.ZKHelixAdmin)19 HelixDataAccessor (org.apache.helix.HelixDataAccessor)17 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)15 Test (org.junit.Test)15 Set (java.util.Set)13 VerifiableProperties (com.github.ambry.config.VerifiableProperties)12 IOException (java.io.IOException)12 ZNRecord (org.apache.helix.zookeeper.datamodel.ZNRecord)12