Search in sources :

Example 41 with ZKHelixAdmin

use of org.apache.helix.manager.zk.ZKHelixAdmin in project ambry by linkedin.

the class HelixBootstrapUpgradeToolTest method testUpdateIdealStateAdminOp.

/**
 * Test that when AdminOperation is specified to UpdateIdealState, Helix bootstrap tool updates IdealState only without
 * changing InstanceConfig.
 */
@Test
public void testUpdateIdealStateAdminOp() throws Exception {
    String clusterName = CLUSTER_NAME_PREFIX + CLUSTER_NAME_IN_STATIC_CLUSTER_MAP;
    // Test regular bootstrap. This is to ensure InstanceConfig and IdealState are there before testing changing
    // IdealState (to trigger replica movement)
    long expectedResourceCount = (testPartitionLayout.getPartitionLayout().getPartitionCount() - 1) / DEFAULT_MAX_PARTITIONS_PER_RESOURCE + 1;
    writeBootstrapOrUpgrade(expectedResourceCount, false);
    // Now, change the replica count for two partitions.
    int totalPartitionCount = testPartitionLayout.getPartitionCount();
    int firstPartitionIndex = RANDOM.nextInt(totalPartitionCount);
    int secondPartitionIndex = (firstPartitionIndex + 1) % totalPartitionCount;
    List<PartitionId> allPartitions = testPartitionLayout.getPartitionLayout().getPartitions(null);
    Partition partition1 = (Partition) allPartitions.get(firstPartitionIndex);
    Partition partition2 = (Partition) allPartitions.get(secondPartitionIndex);
    // Add a new replica for partition1. Find a disk on a data node that does not already have a replica for partition1.
    HashSet<DataNodeId> partition1Nodes = new HashSet<>();
    for (ReplicaId replica : partition1.getReplicas()) {
        partition1Nodes.add(replica.getDataNodeId());
    }
    Disk diskForNewReplica;
    do {
        diskForNewReplica = testHardwareLayout.getRandomDisk();
    } while (partition1Nodes.contains(diskForNewReplica.getDataNode()) || !diskForNewReplica.getDataNode().getDatacenterName().equals("DC1"));
    // Add new replica into partition1
    ReplicaId replicaToAdd = new Replica(partition1, diskForNewReplica, testHardwareLayout.clusterMapConfig);
    partition1.addReplica(replicaToAdd);
    // Remove a replica from partition2.
    ReplicaId removedReplica = partition2.getReplicas().remove(0);
    String dcName = replicaToAdd.getDataNodeId().getDatacenterName();
    ZkInfo zkInfo = dcsToZkInfo.get(dcName);
    ZKHelixAdmin admin = new ZKHelixAdmin("localhost:" + zkInfo.getPort());
    InstanceConfig instanceConfig = admin.getInstanceConfig(clusterName, getInstanceName(replicaToAdd.getDataNodeId()));
    // deep copy for subsequent verification
    InstanceConfig previousInstanceConfig = new InstanceConfig(instanceConfig.getRecord());
    Utils.writeJsonObjectToFile(zkJson, zkLayoutPath);
    Utils.writeJsonObjectToFile(testHardwareLayout.getHardwareLayout().toJSONObject(), hardwareLayoutPath);
    Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
    // upgrade Helix by updating IdealState: AdminOperation = UpdateIdealState
    HelixBootstrapUpgradeUtil.bootstrapOrUpgrade(hardwareLayoutPath, partitionLayoutPath, zkLayoutPath, CLUSTER_NAME_PREFIX, dcStr, DEFAULT_MAX_PARTITIONS_PER_RESOURCE, false, false, new HelixAdminFactory(), false, ClusterMapConfig.DEFAULT_STATE_MODEL_DEF, UpdateIdealState, dataNodeConfigSourceType, false);
    verifyResourceCount(testHardwareLayout.getHardwareLayout(), expectedResourceCount);
    // verify IdealState has been updated
    // 1. new added replica is indeed present in the IdealState
    verifyIdealStateForPartition(replicaToAdd, true, 4, expectedResourceCount);
    // 2. removed old replica is no longer present in the IdealState
    verifyIdealStateForPartition(removedReplica, false, 2, expectedResourceCount);
    // verify the InstanceConfig stays unchanged
    InstanceConfig currentInstanceConfig = admin.getInstanceConfig(clusterName, getInstanceName(replicaToAdd.getDataNodeId()));
    assertEquals("InstanceConfig should stay unchanged", previousInstanceConfig.getRecord(), currentInstanceConfig.getRecord());
}
Also used : ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) InstanceConfig(org.apache.helix.model.InstanceConfig) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 42 with ZKHelixAdmin

use of org.apache.helix.manager.zk.ZKHelixAdmin in project ambry by linkedin.

the class HelixBootstrapUpgradeToolTest method testDisablePartitionAdminOp.

/**
 * Test when AdminOperation is specified to DisablePartition, Helix bootstrap tool is able to disable certain partition
 * only without changing IdealState and InstanceConfig. (In practice, this is first step to decommission a replica)
 * @throws Exception
 */
@Test
public void testDisablePartitionAdminOp() throws Exception {
    String clusterName = CLUSTER_NAME_PREFIX + CLUSTER_NAME_IN_STATIC_CLUSTER_MAP;
    // Test regular bootstrap. This is to ensure DataNodeConfig and IdealState are there before testing disabling
    // certain replica on specific node.
    long expectedResourceCount = (testPartitionLayout.getPartitionLayout().getPartitionCount() - 1) / DEFAULT_MAX_PARTITIONS_PER_RESOURCE + 1;
    writeBootstrapOrUpgrade(expectedResourceCount, false);
    int totalPartitionCount = testPartitionLayout.getPartitionCount();
    // Randomly pick a partition to remove one of its replicas
    Partition testPartition = (Partition) testPartitionLayout.getPartitionLayout().getPartitions(null).get(RANDOM.nextInt(totalPartitionCount));
    ReplicaId removedReplica = testPartition.getReplicaIds().stream().filter(r -> r.getDataNodeId().getDatacenterName().equals("DC1")).findFirst().get();
    testPartition.getReplicas().remove(removedReplica);
    ZkInfo zkInfo = dcsToZkInfo.get(removedReplica.getDataNodeId().getDatacenterName());
    // create a participant that hosts this removed replica
    Properties props = new Properties();
    props.setProperty("clustermap.host.name", "localhost");
    props.setProperty("clustermap.port", String.valueOf(removedReplica.getDataNodeId().getPort()));
    props.setProperty("clustermap.cluster.name", clusterName);
    props.setProperty("clustermap.datacenter.name", "DC1");
    props.setProperty("clustermap.update.datanode.info", Boolean.toString(true));
    props.setProperty("clustermap.dcs.zk.connect.strings", zkJson.toString(2));
    props.setProperty("clustermap.retry.disable.partition.completion.backoff.ms", Integer.toString(100));
    props.setProperty("clustermap.data.node.config.source.type", dataNodeConfigSourceType.name());
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
    HelixParticipant helixParticipant = new HelixParticipant(clusterMapConfig, new HelixFactory(), new MetricRegistry(), "localhost:" + zkInfo.getPort(), true);
    PropertyStoreToDataNodeConfigAdapter propertyStoreAdapter = dataNodeConfigSourceType == DataNodeConfigSourceType.INSTANCE_CONFIG ? null : new PropertyStoreToDataNodeConfigAdapter("localhost:" + zkInfo.getPort(), clusterMapConfig);
    InstanceConfigToDataNodeConfigAdapter.Converter instanceConfigConverter = new InstanceConfigToDataNodeConfigAdapter.Converter(clusterMapConfig);
    // create HelixAdmin
    ZKHelixAdmin admin = new ZKHelixAdmin("localhost:" + zkInfo.getPort());
    // Write changes to static files
    Utils.writeJsonObjectToFile(zkJson, zkLayoutPath);
    Utils.writeJsonObjectToFile(testHardwareLayout.getHardwareLayout().toJSONObject(), hardwareLayoutPath);
    Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
    // make bootstrap tool blocked by count down latch before removing znodes for disabling partitions
    blockRemovingNodeLatch = new CountDownLatch(1);
    disablePartitionLatch = new CountDownLatch(activeDcSet.size());
    CountDownLatch bootstrapCompletionLatch = new CountDownLatch(1);
    Utils.newThread(() -> {
        try {
            // Upgrade Helix by updating IdealState: AdminOperation = DisablePartition
            HelixBootstrapUpgradeUtil.bootstrapOrUpgrade(hardwareLayoutPath, partitionLayoutPath, zkLayoutPath, CLUSTER_NAME_PREFIX, dcStr, DEFAULT_MAX_PARTITIONS_PER_RESOURCE, false, false, new HelixAdminFactory(), false, ClusterMapConfig.DEFAULT_STATE_MODEL_DEF, DisablePartition, dataNodeConfigSourceType, false);
            bootstrapCompletionLatch.countDown();
        } catch (Exception e) {
        // do nothing, if there is any exception subsequent test should fail.
        }
    }, false).start();
    assertTrue("Disable partition latch didn't come down within 5 seconds", disablePartitionLatch.await(5, TimeUnit.SECONDS));
    // Let's attempt to update InstanceConfig/DataNodeConfig via HelixParticipant, which should be blocked
    CountDownLatch updateCompletionLatch = new CountDownLatch(1);
    Utils.newThread(() -> {
        helixParticipant.updateDataNodeInfoInCluster(removedReplica, false);
        updateCompletionLatch.countDown();
    }, false).start();
    // sleep 100 ms to ensure updateDataNodeInfoInCluster is blocked due to disabling partition hasn't completed yet
    Thread.sleep(100);
    // Ensure the DataNodeConfig still has the replica
    String instanceName = getInstanceName(removedReplica.getDataNodeId());
    DataNodeConfig currentDataNodeConfig = dataNodeConfigSourceType == DataNodeConfigSourceType.INSTANCE_CONFIG ? instanceConfigConverter.convert(admin.getInstanceConfig(clusterName, instanceName)) : propertyStoreAdapter.get(instanceName);
    verifyReplicaInfoInDataNodeConfig(currentDataNodeConfig, removedReplica, true);
    // verify the znode is created for the node on which partition has been disabled.
    Properties properties = new Properties();
    properties.setProperty("helix.property.store.root.path", "/" + clusterName + "/" + PROPERTYSTORE_STR);
    HelixPropertyStoreConfig propertyStoreConfig = new HelixPropertyStoreConfig(new VerifiableProperties(properties));
    HelixPropertyStore<ZNRecord> helixPropertyStore = CommonUtils.createHelixPropertyStore("localhost:" + zkInfo.getPort(), propertyStoreConfig, null);
    String path = PARTITION_DISABLED_ZNODE_PATH + getInstanceName(removedReplica.getDataNodeId());
    assertTrue("ZNode is not found for disabled partition node.", helixPropertyStore.exists(path, AccessOption.PERSISTENT));
    helixPropertyStore.stop();
    // unblock HelixBootstrapTool
    blockRemovingNodeLatch.countDown();
    // waiting for bootstrap tool to complete
    assertTrue("Helix tool didn't complete within 5 seconds", bootstrapCompletionLatch.await(5, TimeUnit.SECONDS));
    verifyResourceCount(testHardwareLayout.getHardwareLayout(), expectedResourceCount);
    assertTrue("Helix participant didn't complete update within 5 seconds", updateCompletionLatch.await(5, TimeUnit.SECONDS));
    InstanceConfig currentInstanceConfig = admin.getInstanceConfig(clusterName, getInstanceName(removedReplica.getDataNodeId()));
    // Verify that replica has been disabled
    String resourceName = null;
    for (String rs : admin.getResourcesInCluster(clusterName)) {
        IdealState is = admin.getResourceIdealState(clusterName, rs);
        if (is.getPartitionSet().contains(removedReplica.getPartitionId().toPathString())) {
            resourceName = rs;
            break;
        }
    }
    List<String> disabledPartition = currentInstanceConfig.getDisabledPartitions(resourceName);
    assertEquals("Disabled partition not as expected", Collections.singletonList(removedReplica.getPartitionId().toPathString()), disabledPartition);
    // Verify that IdealState has no change
    verifyIdealStateForPartition(removedReplica, true, 3, expectedResourceCount);
    // Verify the InstanceConfig is changed in MapFields (Disabled partitions are added to this field, also the replica entry has been removed)
    String disabledPartitionStr = currentInstanceConfig.getRecord().getMapFields().keySet().stream().filter(k -> !k.startsWith("/mnt")).findFirst().get();
    // Verify the disabled partition string contains correct partition
    Map<String, String> expectedDisabledPartitionMap = new HashMap<>();
    expectedDisabledPartitionMap.put(resourceName, removedReplica.getPartitionId().toPathString());
    assertEquals("Mismatch in disabled partition string in InstanceConfig", expectedDisabledPartitionMap, currentInstanceConfig.getRecord().getMapField(disabledPartitionStr));
    // verify the removed replica is no longer in InstanceConfig
    currentDataNodeConfig = dataNodeConfigSourceType == DataNodeConfigSourceType.INSTANCE_CONFIG ? instanceConfigConverter.convert(admin.getInstanceConfig(clusterName, instanceName)) : propertyStoreAdapter.get(instanceName);
    verifyReplicaInfoInDataNodeConfig(currentDataNodeConfig, removedReplica, false);
    if (propertyStoreAdapter != null) {
        propertyStoreAdapter.close();
    }
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HelixPropertyStoreConfig(com.github.ambry.config.HelixPropertyStoreConfig) Properties(java.util.Properties) VerifiableProperties(com.github.ambry.config.VerifiableProperties) IdealState(org.apache.helix.model.IdealState) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) InstanceConfig(org.apache.helix.model.InstanceConfig) ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MetricRegistry(com.codahale.metrics.MetricRegistry) CountDownLatch(java.util.concurrent.CountDownLatch) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) HelixException(org.apache.helix.HelixException) JSONException(org.json.JSONException) IOException(java.io.IOException) Test(org.junit.Test)

Example 43 with ZKHelixAdmin

use of org.apache.helix.manager.zk.ZKHelixAdmin in project ambry by linkedin.

the class TestUtils method isSrcDestSync.

/**
 * A method to verify resources and partitions in src cluster and dest cluster are same.
 */
public static boolean isSrcDestSync(String srcZkString, String srcClusterName, String destZkString, String destClusterName) {
    HelixAdmin srcAdmin = new ZKHelixAdmin(srcZkString);
    Set<String> srcResources = new HashSet<>(srcAdmin.getResourcesInCluster(srcClusterName));
    HelixAdmin destAdmin = new ZKHelixAdmin(destZkString);
    Set<String> destResources = new HashSet<>(destAdmin.getResourcesInCluster(destClusterName));
    for (String resource : srcResources) {
        if (HelixVcrUtil.ignoreResourceKeyWords.stream().anyMatch(resource::contains)) {
            System.out.println("Resource " + resource + " from src cluster is ignored");
            continue;
        }
        if (destResources.contains(resource)) {
            // check if every partition exist.
            Set<String> srcPartitions = srcAdmin.getResourceIdealState(srcClusterName, resource).getPartitionSet();
            Set<String> destPartitions = destAdmin.getResourceIdealState(destClusterName, resource).getPartitionSet();
            for (String partition : srcPartitions) {
                if (!destPartitions.contains(partition)) {
                    return false;
                }
            }
        } else {
            return false;
        }
    }
    return true;
}
Also used : ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) HelixAdmin(org.apache.helix.HelixAdmin) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) HashSet(java.util.HashSet)

Example 44 with ZKHelixAdmin

use of org.apache.helix.manager.zk.ZKHelixAdmin in project ambry by linkedin.

the class HelixBootstrapUpgradeToolTest method testAddStateModelDef.

/**
 * Test {@link HelixBootstrapUpgradeUtil} addStateModelDef() method.
 */
@Test
public void testAddStateModelDef() throws Exception {
    Utils.writeJsonObjectToFile(zkJson, zkLayoutPath);
    Utils.writeJsonObjectToFile(testHardwareLayout.getHardwareLayout().toJSONObject(), hardwareLayoutPath);
    Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
    // test add state model to non-exist cluster, which should fail
    try {
        HelixBootstrapUpgradeUtil.addStateModelDef(hardwareLayoutPath, partitionLayoutPath, zkLayoutPath, CLUSTER_NAME_PREFIX, dcStr, ClusterMapConfig.AMBRY_STATE_MODEL_DEF);
        fail("should fail due to non-exist cluster");
    } catch (IllegalStateException e) {
    // expected
    }
    // bootstrap a cluster
    HelixBootstrapUpgradeUtil.bootstrapOrUpgrade(hardwareLayoutPath, partitionLayoutPath, zkLayoutPath, CLUSTER_NAME_PREFIX, dcStr, DEFAULT_MAX_PARTITIONS_PER_RESOURCE, false, false, new HelixAdminFactory(), false, ClusterMapConfig.OLD_STATE_MODEL_DEF, BootstrapCluster, dataNodeConfigSourceType, false);
    // add new state model def
    HelixBootstrapUpgradeUtil.addStateModelDef(hardwareLayoutPath, partitionLayoutPath, zkLayoutPath, CLUSTER_NAME_PREFIX, dcStr, ClusterMapConfig.AMBRY_STATE_MODEL_DEF);
    // add existing state model def should be no-op
    HelixBootstrapUpgradeUtil.addStateModelDef(hardwareLayoutPath, partitionLayoutPath, zkLayoutPath, CLUSTER_NAME_PREFIX, dcStr, ClusterMapConfig.OLD_STATE_MODEL_DEF);
    // ensure that active dcs have new state model def
    String clusterName = CLUSTER_NAME_PREFIX + CLUSTER_NAME_IN_STATIC_CLUSTER_MAP;
    for (Datacenter dc : testHardwareLayout.getHardwareLayout().getDatacenters()) {
        ZkInfo zkInfo = dcsToZkInfo.get(dc.getName());
        ZKHelixAdmin admin = new ZKHelixAdmin("localhost:" + zkInfo.getPort());
        if (!activeDcSet.contains(dc.getName())) {
            Assert.assertFalse("Cluster should not be present, as dc " + dc.getName() + " is not enabled", admin.getClusters().contains(CLUSTER_NAME_PREFIX + CLUSTER_NAME_IN_STATIC_CLUSTER_MAP));
        } else {
            assertEquals("Mismatch in number of state model defs in cluster", 2, admin.getStateModelDefs(clusterName).size());
            assertTrue("Missing ambry state model in cluster", admin.getStateModelDefs(clusterName).contains(ClusterMapConfig.AMBRY_STATE_MODEL_DEF));
        }
    }
}
Also used : ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) Test(org.junit.Test)

Example 45 with ZKHelixAdmin

use of org.apache.helix.manager.zk.ZKHelixAdmin in project helix by apache.

the class TestClusterInMaintenanceModeWhenReachingOfflineInstancesLimit method testWithDisabledInstancesLimit.

@Test
public void testWithDisabledInstancesLimit() throws Exception {
    MaintenanceSignal maintenanceSignal = _dataAccessor.getProperty(_dataAccessor.keyBuilder().maintenance());
    Assert.assertNull(maintenanceSignal);
    HelixAdmin admin = new ZKHelixAdmin(_gZkClient);
    // disable instance
    int i;
    for (i = 2; i < 2 + _maxOfflineInstancesAllowed; i++) {
        String instance = _participants.get(i).getInstanceName();
        admin.enableInstance(CLUSTER_NAME, instance, false);
    }
    Thread.sleep(500);
    maintenanceSignal = _dataAccessor.getProperty(_dataAccessor.keyBuilder().maintenance());
    Assert.assertNull(maintenanceSignal);
    String instance = _participants.get(i).getInstanceName();
    admin.enableInstance(CLUSTER_NAME, instance, false);
    Thread.sleep(500);
    maintenanceSignal = _dataAccessor.getProperty(_dataAccessor.keyBuilder().maintenance());
    Assert.assertNotNull(maintenanceSignal);
    Assert.assertNotNull(maintenanceSignal.getReason());
    for (i = 2; i < 2 + _maxOfflineInstancesAllowed + 1; i++) {
        instance = _participants.get(i).getInstanceName();
        admin.enableInstance(CLUSTER_NAME, instance, true);
    }
    admin.enableMaintenanceMode(CLUSTER_NAME, false);
}
Also used : ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) MaintenanceSignal(org.apache.helix.model.MaintenanceSignal) HelixAdmin(org.apache.helix.HelixAdmin) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) Test(org.testng.annotations.Test)

Aggregations

ZKHelixAdmin (org.apache.helix.manager.zk.ZKHelixAdmin)70 HelixAdmin (org.apache.helix.HelixAdmin)31 IdealState (org.apache.helix.model.IdealState)25 Test (org.testng.annotations.Test)23 Date (java.util.Date)21 InstanceConfig (org.apache.helix.model.InstanceConfig)16 ClusterControllerManager (org.apache.helix.integration.manager.ClusterControllerManager)14 ZNRecord (org.apache.helix.ZNRecord)13 MockParticipantManager (org.apache.helix.integration.manager.MockParticipantManager)13 ZNRecordSerializer (org.apache.helix.manager.zk.ZNRecordSerializer)12 ZkClient (org.apache.helix.manager.zk.ZkClient)12 ClusterStateVerifier (org.apache.helix.tools.ClusterStateVerifier)12 ZKHelixDataAccessor (org.apache.helix.manager.zk.ZKHelixDataAccessor)11 StateModelDefinition (org.apache.helix.model.StateModelDefinition)11 HashMap (java.util.HashMap)10 HashSet (java.util.HashSet)10 HelixDataAccessor (org.apache.helix.HelixDataAccessor)8 ExternalView (org.apache.helix.model.ExternalView)8 Test (org.junit.Test)8 BestPossAndExtViewZkVerifier (org.apache.helix.tools.ClusterStateVerifier.BestPossAndExtViewZkVerifier)7