Search in sources :

Example 11 with HelixPropertyStoreConfig

use of com.github.ambry.config.HelixPropertyStoreConfig in project ambry by linkedin.

the class HelixBootstrapUpgradeToolTest method testDisablePartitionAdminOp.

/**
 * Test when AdminOperation is specified to DisablePartition, Helix bootstrap tool is able to disable certain partition
 * only without changing IdealState and InstanceConfig. (In practice, this is first step to decommission a replica)
 * @throws Exception
 */
@Test
public void testDisablePartitionAdminOp() throws Exception {
    String clusterName = CLUSTER_NAME_PREFIX + CLUSTER_NAME_IN_STATIC_CLUSTER_MAP;
    // Test regular bootstrap. This is to ensure DataNodeConfig and IdealState are there before testing disabling
    // certain replica on specific node.
    long expectedResourceCount = (testPartitionLayout.getPartitionLayout().getPartitionCount() - 1) / DEFAULT_MAX_PARTITIONS_PER_RESOURCE + 1;
    writeBootstrapOrUpgrade(expectedResourceCount, false);
    int totalPartitionCount = testPartitionLayout.getPartitionCount();
    // Randomly pick a partition to remove one of its replicas
    Partition testPartition = (Partition) testPartitionLayout.getPartitionLayout().getPartitions(null).get(RANDOM.nextInt(totalPartitionCount));
    ReplicaId removedReplica = testPartition.getReplicaIds().stream().filter(r -> r.getDataNodeId().getDatacenterName().equals("DC1")).findFirst().get();
    testPartition.getReplicas().remove(removedReplica);
    ZkInfo zkInfo = dcsToZkInfo.get(removedReplica.getDataNodeId().getDatacenterName());
    // create a participant that hosts this removed replica
    Properties props = new Properties();
    props.setProperty("clustermap.host.name", "localhost");
    props.setProperty("clustermap.port", String.valueOf(removedReplica.getDataNodeId().getPort()));
    props.setProperty("clustermap.cluster.name", clusterName);
    props.setProperty("clustermap.datacenter.name", "DC1");
    props.setProperty("clustermap.update.datanode.info", Boolean.toString(true));
    props.setProperty("clustermap.dcs.zk.connect.strings", zkJson.toString(2));
    props.setProperty("clustermap.retry.disable.partition.completion.backoff.ms", Integer.toString(100));
    props.setProperty("clustermap.data.node.config.source.type", dataNodeConfigSourceType.name());
    ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
    HelixParticipant helixParticipant = new HelixParticipant(clusterMapConfig, new HelixFactory(), new MetricRegistry(), "localhost:" + zkInfo.getPort(), true);
    PropertyStoreToDataNodeConfigAdapter propertyStoreAdapter = dataNodeConfigSourceType == DataNodeConfigSourceType.INSTANCE_CONFIG ? null : new PropertyStoreToDataNodeConfigAdapter("localhost:" + zkInfo.getPort(), clusterMapConfig);
    InstanceConfigToDataNodeConfigAdapter.Converter instanceConfigConverter = new InstanceConfigToDataNodeConfigAdapter.Converter(clusterMapConfig);
    // create HelixAdmin
    ZKHelixAdmin admin = new ZKHelixAdmin("localhost:" + zkInfo.getPort());
    // Write changes to static files
    Utils.writeJsonObjectToFile(zkJson, zkLayoutPath);
    Utils.writeJsonObjectToFile(testHardwareLayout.getHardwareLayout().toJSONObject(), hardwareLayoutPath);
    Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
    // make bootstrap tool blocked by count down latch before removing znodes for disabling partitions
    blockRemovingNodeLatch = new CountDownLatch(1);
    disablePartitionLatch = new CountDownLatch(activeDcSet.size());
    CountDownLatch bootstrapCompletionLatch = new CountDownLatch(1);
    Utils.newThread(() -> {
        try {
            // Upgrade Helix by updating IdealState: AdminOperation = DisablePartition
            HelixBootstrapUpgradeUtil.bootstrapOrUpgrade(hardwareLayoutPath, partitionLayoutPath, zkLayoutPath, CLUSTER_NAME_PREFIX, dcStr, DEFAULT_MAX_PARTITIONS_PER_RESOURCE, false, false, new HelixAdminFactory(), false, ClusterMapConfig.DEFAULT_STATE_MODEL_DEF, DisablePartition, dataNodeConfigSourceType, false);
            bootstrapCompletionLatch.countDown();
        } catch (Exception e) {
        // do nothing, if there is any exception subsequent test should fail.
        }
    }, false).start();
    assertTrue("Disable partition latch didn't come down within 5 seconds", disablePartitionLatch.await(5, TimeUnit.SECONDS));
    // Let's attempt to update InstanceConfig/DataNodeConfig via HelixParticipant, which should be blocked
    CountDownLatch updateCompletionLatch = new CountDownLatch(1);
    Utils.newThread(() -> {
        helixParticipant.updateDataNodeInfoInCluster(removedReplica, false);
        updateCompletionLatch.countDown();
    }, false).start();
    // sleep 100 ms to ensure updateDataNodeInfoInCluster is blocked due to disabling partition hasn't completed yet
    Thread.sleep(100);
    // Ensure the DataNodeConfig still has the replica
    String instanceName = getInstanceName(removedReplica.getDataNodeId());
    DataNodeConfig currentDataNodeConfig = dataNodeConfigSourceType == DataNodeConfigSourceType.INSTANCE_CONFIG ? instanceConfigConverter.convert(admin.getInstanceConfig(clusterName, instanceName)) : propertyStoreAdapter.get(instanceName);
    verifyReplicaInfoInDataNodeConfig(currentDataNodeConfig, removedReplica, true);
    // verify the znode is created for the node on which partition has been disabled.
    Properties properties = new Properties();
    properties.setProperty("helix.property.store.root.path", "/" + clusterName + "/" + PROPERTYSTORE_STR);
    HelixPropertyStoreConfig propertyStoreConfig = new HelixPropertyStoreConfig(new VerifiableProperties(properties));
    HelixPropertyStore<ZNRecord> helixPropertyStore = CommonUtils.createHelixPropertyStore("localhost:" + zkInfo.getPort(), propertyStoreConfig, null);
    String path = PARTITION_DISABLED_ZNODE_PATH + getInstanceName(removedReplica.getDataNodeId());
    assertTrue("ZNode is not found for disabled partition node.", helixPropertyStore.exists(path, AccessOption.PERSISTENT));
    helixPropertyStore.stop();
    // unblock HelixBootstrapTool
    blockRemovingNodeLatch.countDown();
    // waiting for bootstrap tool to complete
    assertTrue("Helix tool didn't complete within 5 seconds", bootstrapCompletionLatch.await(5, TimeUnit.SECONDS));
    verifyResourceCount(testHardwareLayout.getHardwareLayout(), expectedResourceCount);
    assertTrue("Helix participant didn't complete update within 5 seconds", updateCompletionLatch.await(5, TimeUnit.SECONDS));
    InstanceConfig currentInstanceConfig = admin.getInstanceConfig(clusterName, getInstanceName(removedReplica.getDataNodeId()));
    // Verify that replica has been disabled
    String resourceName = null;
    for (String rs : admin.getResourcesInCluster(clusterName)) {
        IdealState is = admin.getResourceIdealState(clusterName, rs);
        if (is.getPartitionSet().contains(removedReplica.getPartitionId().toPathString())) {
            resourceName = rs;
            break;
        }
    }
    List<String> disabledPartition = currentInstanceConfig.getDisabledPartitions(resourceName);
    assertEquals("Disabled partition not as expected", Collections.singletonList(removedReplica.getPartitionId().toPathString()), disabledPartition);
    // Verify that IdealState has no change
    verifyIdealStateForPartition(removedReplica, true, 3, expectedResourceCount);
    // Verify the InstanceConfig is changed in MapFields (Disabled partitions are added to this field, also the replica entry has been removed)
    String disabledPartitionStr = currentInstanceConfig.getRecord().getMapFields().keySet().stream().filter(k -> !k.startsWith("/mnt")).findFirst().get();
    // Verify the disabled partition string contains correct partition
    Map<String, String> expectedDisabledPartitionMap = new HashMap<>();
    expectedDisabledPartitionMap.put(resourceName, removedReplica.getPartitionId().toPathString());
    assertEquals("Mismatch in disabled partition string in InstanceConfig", expectedDisabledPartitionMap, currentInstanceConfig.getRecord().getMapField(disabledPartitionStr));
    // verify the removed replica is no longer in InstanceConfig
    currentDataNodeConfig = dataNodeConfigSourceType == DataNodeConfigSourceType.INSTANCE_CONFIG ? instanceConfigConverter.convert(admin.getInstanceConfig(clusterName, instanceName)) : propertyStoreAdapter.get(instanceName);
    verifyReplicaInfoInDataNodeConfig(currentDataNodeConfig, removedReplica, false);
    if (propertyStoreAdapter != null) {
        propertyStoreAdapter.close();
    }
}
Also used : HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HelixPropertyStoreConfig(com.github.ambry.config.HelixPropertyStoreConfig) Properties(java.util.Properties) VerifiableProperties(com.github.ambry.config.VerifiableProperties) IdealState(org.apache.helix.model.IdealState) ZKHelixAdmin(org.apache.helix.manager.zk.ZKHelixAdmin) InstanceConfig(org.apache.helix.model.InstanceConfig) ZNRecord(org.apache.helix.zookeeper.datamodel.ZNRecord) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MetricRegistry(com.codahale.metrics.MetricRegistry) CountDownLatch(java.util.concurrent.CountDownLatch) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) HelixException(org.apache.helix.HelixException) JSONException(org.json.JSONException) IOException(java.io.IOException) Test(org.junit.Test)

Example 12 with HelixPropertyStoreConfig

use of com.github.ambry.config.HelixPropertyStoreConfig in project ambry by linkedin.

the class HelixBootstrapUpgradeToolTest method initialize.

@BeforeClass
public static void initialize() throws IOException {
    tempDirPath = getTempDir("helixBootstrapUpgrade-");
    Properties storeProps = new Properties();
    storeProps.setProperty("helix.property.store.root.path", ROOT_PATH);
    propertyStoreConfig = new HelixPropertyStoreConfig(new VerifiableProperties(storeProps));
    int port = 2200;
    for (int i = 0; i < dcs.length; i++) {
        dcsToZkInfo.put(dcs[i], new ZkInfo(tempDirPath, dcs[i], ids[i], port++, true));
    }
}
Also used : VerifiableProperties(com.github.ambry.config.VerifiableProperties) HelixPropertyStoreConfig(com.github.ambry.config.HelixPropertyStoreConfig) Properties(java.util.Properties) VerifiableProperties(com.github.ambry.config.VerifiableProperties) BeforeClass(org.junit.BeforeClass)

Aggregations

HelixPropertyStoreConfig (com.github.ambry.config.HelixPropertyStoreConfig)12 VerifiableProperties (com.github.ambry.config.VerifiableProperties)12 MetricRegistry (com.codahale.metrics.MetricRegistry)7 Test (org.junit.Test)7 Properties (java.util.Properties)6 ZNRecord (org.apache.helix.zookeeper.datamodel.ZNRecord)3 CountDownLatch (java.util.concurrent.CountDownLatch)2 Account (com.github.ambry.account.Account)1 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)1 File (java.io.File)1 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 HelixException (org.apache.helix.HelixException)1 ZKHelixAdmin (org.apache.helix.manager.zk.ZKHelixAdmin)1 IdealState (org.apache.helix.model.IdealState)1 InstanceConfig (org.apache.helix.model.InstanceConfig)1 JSONException (org.json.JSONException)1 Before (org.junit.Before)1 BeforeClass (org.junit.BeforeClass)1