use of org.apache.helix.zookeeper.datamodel.ZNRecord in project ambry by linkedin.
the class HelixBootstrapUpgradeToolTest method testDisablePartitionAdminOp.
/**
* Test when AdminOperation is specified to DisablePartition, Helix bootstrap tool is able to disable certain partition
* only without changing IdealState and InstanceConfig. (In practice, this is first step to decommission a replica)
* @throws Exception
*/
@Test
public void testDisablePartitionAdminOp() throws Exception {
String clusterName = CLUSTER_NAME_PREFIX + CLUSTER_NAME_IN_STATIC_CLUSTER_MAP;
// Test regular bootstrap. This is to ensure DataNodeConfig and IdealState are there before testing disabling
// certain replica on specific node.
long expectedResourceCount = (testPartitionLayout.getPartitionLayout().getPartitionCount() - 1) / DEFAULT_MAX_PARTITIONS_PER_RESOURCE + 1;
writeBootstrapOrUpgrade(expectedResourceCount, false);
int totalPartitionCount = testPartitionLayout.getPartitionCount();
// Randomly pick a partition to remove one of its replicas
Partition testPartition = (Partition) testPartitionLayout.getPartitionLayout().getPartitions(null).get(RANDOM.nextInt(totalPartitionCount));
ReplicaId removedReplica = testPartition.getReplicaIds().stream().filter(r -> r.getDataNodeId().getDatacenterName().equals("DC1")).findFirst().get();
testPartition.getReplicas().remove(removedReplica);
ZkInfo zkInfo = dcsToZkInfo.get(removedReplica.getDataNodeId().getDatacenterName());
// create a participant that hosts this removed replica
Properties props = new Properties();
props.setProperty("clustermap.host.name", "localhost");
props.setProperty("clustermap.port", String.valueOf(removedReplica.getDataNodeId().getPort()));
props.setProperty("clustermap.cluster.name", clusterName);
props.setProperty("clustermap.datacenter.name", "DC1");
props.setProperty("clustermap.update.datanode.info", Boolean.toString(true));
props.setProperty("clustermap.dcs.zk.connect.strings", zkJson.toString(2));
props.setProperty("clustermap.retry.disable.partition.completion.backoff.ms", Integer.toString(100));
props.setProperty("clustermap.data.node.config.source.type", dataNodeConfigSourceType.name());
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
HelixParticipant helixParticipant = new HelixParticipant(clusterMapConfig, new HelixFactory(), new MetricRegistry(), "localhost:" + zkInfo.getPort(), true);
PropertyStoreToDataNodeConfigAdapter propertyStoreAdapter = dataNodeConfigSourceType == DataNodeConfigSourceType.INSTANCE_CONFIG ? null : new PropertyStoreToDataNodeConfigAdapter("localhost:" + zkInfo.getPort(), clusterMapConfig);
InstanceConfigToDataNodeConfigAdapter.Converter instanceConfigConverter = new InstanceConfigToDataNodeConfigAdapter.Converter(clusterMapConfig);
// create HelixAdmin
ZKHelixAdmin admin = new ZKHelixAdmin("localhost:" + zkInfo.getPort());
// Write changes to static files
Utils.writeJsonObjectToFile(zkJson, zkLayoutPath);
Utils.writeJsonObjectToFile(testHardwareLayout.getHardwareLayout().toJSONObject(), hardwareLayoutPath);
Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
// make bootstrap tool blocked by count down latch before removing znodes for disabling partitions
blockRemovingNodeLatch = new CountDownLatch(1);
disablePartitionLatch = new CountDownLatch(activeDcSet.size());
CountDownLatch bootstrapCompletionLatch = new CountDownLatch(1);
Utils.newThread(() -> {
try {
// Upgrade Helix by updating IdealState: AdminOperation = DisablePartition
HelixBootstrapUpgradeUtil.bootstrapOrUpgrade(hardwareLayoutPath, partitionLayoutPath, zkLayoutPath, CLUSTER_NAME_PREFIX, dcStr, DEFAULT_MAX_PARTITIONS_PER_RESOURCE, false, false, new HelixAdminFactory(), false, ClusterMapConfig.DEFAULT_STATE_MODEL_DEF, DisablePartition, dataNodeConfigSourceType, false);
bootstrapCompletionLatch.countDown();
} catch (Exception e) {
// do nothing, if there is any exception subsequent test should fail.
}
}, false).start();
assertTrue("Disable partition latch didn't come down within 5 seconds", disablePartitionLatch.await(5, TimeUnit.SECONDS));
// Let's attempt to update InstanceConfig/DataNodeConfig via HelixParticipant, which should be blocked
CountDownLatch updateCompletionLatch = new CountDownLatch(1);
Utils.newThread(() -> {
helixParticipant.updateDataNodeInfoInCluster(removedReplica, false);
updateCompletionLatch.countDown();
}, false).start();
// sleep 100 ms to ensure updateDataNodeInfoInCluster is blocked due to disabling partition hasn't completed yet
Thread.sleep(100);
// Ensure the DataNodeConfig still has the replica
String instanceName = getInstanceName(removedReplica.getDataNodeId());
DataNodeConfig currentDataNodeConfig = dataNodeConfigSourceType == DataNodeConfigSourceType.INSTANCE_CONFIG ? instanceConfigConverter.convert(admin.getInstanceConfig(clusterName, instanceName)) : propertyStoreAdapter.get(instanceName);
verifyReplicaInfoInDataNodeConfig(currentDataNodeConfig, removedReplica, true);
// verify the znode is created for the node on which partition has been disabled.
Properties properties = new Properties();
properties.setProperty("helix.property.store.root.path", "/" + clusterName + "/" + PROPERTYSTORE_STR);
HelixPropertyStoreConfig propertyStoreConfig = new HelixPropertyStoreConfig(new VerifiableProperties(properties));
HelixPropertyStore<ZNRecord> helixPropertyStore = CommonUtils.createHelixPropertyStore("localhost:" + zkInfo.getPort(), propertyStoreConfig, null);
String path = PARTITION_DISABLED_ZNODE_PATH + getInstanceName(removedReplica.getDataNodeId());
assertTrue("ZNode is not found for disabled partition node.", helixPropertyStore.exists(path, AccessOption.PERSISTENT));
helixPropertyStore.stop();
// unblock HelixBootstrapTool
blockRemovingNodeLatch.countDown();
// waiting for bootstrap tool to complete
assertTrue("Helix tool didn't complete within 5 seconds", bootstrapCompletionLatch.await(5, TimeUnit.SECONDS));
verifyResourceCount(testHardwareLayout.getHardwareLayout(), expectedResourceCount);
assertTrue("Helix participant didn't complete update within 5 seconds", updateCompletionLatch.await(5, TimeUnit.SECONDS));
InstanceConfig currentInstanceConfig = admin.getInstanceConfig(clusterName, getInstanceName(removedReplica.getDataNodeId()));
// Verify that replica has been disabled
String resourceName = null;
for (String rs : admin.getResourcesInCluster(clusterName)) {
IdealState is = admin.getResourceIdealState(clusterName, rs);
if (is.getPartitionSet().contains(removedReplica.getPartitionId().toPathString())) {
resourceName = rs;
break;
}
}
List<String> disabledPartition = currentInstanceConfig.getDisabledPartitions(resourceName);
assertEquals("Disabled partition not as expected", Collections.singletonList(removedReplica.getPartitionId().toPathString()), disabledPartition);
// Verify that IdealState has no change
verifyIdealStateForPartition(removedReplica, true, 3, expectedResourceCount);
// Verify the InstanceConfig is changed in MapFields (Disabled partitions are added to this field, also the replica entry has been removed)
String disabledPartitionStr = currentInstanceConfig.getRecord().getMapFields().keySet().stream().filter(k -> !k.startsWith("/mnt")).findFirst().get();
// Verify the disabled partition string contains correct partition
Map<String, String> expectedDisabledPartitionMap = new HashMap<>();
expectedDisabledPartitionMap.put(resourceName, removedReplica.getPartitionId().toPathString());
assertEquals("Mismatch in disabled partition string in InstanceConfig", expectedDisabledPartitionMap, currentInstanceConfig.getRecord().getMapField(disabledPartitionStr));
// verify the removed replica is no longer in InstanceConfig
currentDataNodeConfig = dataNodeConfigSourceType == DataNodeConfigSourceType.INSTANCE_CONFIG ? instanceConfigConverter.convert(admin.getInstanceConfig(clusterName, instanceName)) : propertyStoreAdapter.get(instanceName);
verifyReplicaInfoInDataNodeConfig(currentDataNodeConfig, removedReplica, false);
if (propertyStoreAdapter != null) {
propertyStoreAdapter.close();
}
}
use of org.apache.helix.zookeeper.datamodel.ZNRecord in project ambry by linkedin.
the class HelixBootstrapUpgradeToolTest method uploadClusterConfigsAndVerify.
/**
* Write the layout files out from the constructed in-memory hardware and partition layouts; use the upload cluster config
* tool to upload the partition seal states onto Zookeeper; verify that the writable partitions are consistent between the two.
* After verification is done, partition override config will be safely deleted.
* @throws IOException if a file read error is encountered.
* @throws JSONException if a JSON parse error is encountered.
*/
private void uploadClusterConfigsAndVerify() throws Exception {
List<PartitionId> writablePartitions = testPartitionLayout.getPartitionLayout().getWritablePartitions(null);
Set<String> writableInPartitionLayout = new HashSet<>();
writablePartitions.forEach(k -> writableInPartitionLayout.add(k.toPathString()));
Utils.writeJsonObjectToFile(zkJson, zkLayoutPath);
Utils.writeJsonObjectToFile(testHardwareLayout.getHardwareLayout().toJSONObject(), hardwareLayoutPath);
Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
HelixBootstrapUpgradeUtil.uploadOrDeleteAdminConfigs(hardwareLayoutPath, partitionLayoutPath, zkLayoutPath, CLUSTER_NAME_PREFIX, dcStr, false, new String[] { ClusterMapUtils.PARTITION_OVERRIDE_STR }, null);
// Check writable partitions in each datacenter
for (ZkInfo zkInfo : dcsToZkInfo.values()) {
HelixPropertyStore<ZNRecord> propertyStore = CommonUtils.createHelixPropertyStore("localhost:" + zkInfo.getPort(), propertyStoreConfig, Collections.singletonList(propertyStoreConfig.rootPath));
ZNRecord zNRecord = propertyStore.get(ClusterMapUtils.PARTITION_OVERRIDE_ZNODE_PATH, null, AccessOption.PERSISTENT);
if (!activeDcSet.contains(zkInfo.getDcName())) {
assertNull(zNRecord);
} else {
assertNotNull(zNRecord);
Map<String, Map<String, String>> overridePartition = zNRecord.getMapFields();
Set<String> writableInDC = new HashSet<>();
for (Map.Entry<String, Map<String, String>> entry : overridePartition.entrySet()) {
if (entry.getValue().get(ClusterMapUtils.PARTITION_STATE).equals(ClusterMapUtils.READ_WRITE_STR)) {
writableInDC.add(entry.getKey());
}
}
// Verify writable partitions in DC match writable partitions in Partition Layout
assertEquals("Mismatch in writable partitions for partitionLayout and propertyStore", writableInPartitionLayout, writableInDC);
}
}
// delete partition override config
HelixBootstrapUpgradeUtil.uploadOrDeleteAdminConfigs(hardwareLayoutPath, partitionLayoutPath, zkLayoutPath, CLUSTER_NAME_PREFIX, dcStr, true, new String[] { ClusterMapUtils.PARTITION_OVERRIDE_STR }, null);
// verify that the config is cleaned up
for (ZkInfo zkInfo : dcsToZkInfo.values()) {
HelixPropertyStore<ZNRecord> propertyStore = CommonUtils.createHelixPropertyStore("localhost:" + zkInfo.getPort(), propertyStoreConfig, Collections.singletonList(propertyStoreConfig.rootPath));
ZNRecord zNRecord = propertyStore.get(ClusterMapUtils.PARTITION_OVERRIDE_ZNODE_PATH, null, AccessOption.PERSISTENT);
assertNull("Partition override config should no longer exist", zNRecord);
}
}
Aggregations