use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class HelixClusterManagerTest method inconsistentReplicaCapacityTest.
/**
* Test the case where replicas from same partition have different capacities (which should block the startup)
* @throws Exception
*/
@Test
public void inconsistentReplicaCapacityTest() throws Exception {
assumeTrue(listenCrossColo);
clusterManager.close();
metricRegistry = new MetricRegistry();
String staticClusterName = "TestOnly";
File tempDir = Files.createTempDirectory("helixClusterManagerTest").toFile();
tempDir.deleteOnExit();
String tempDirPath = tempDir.getAbsolutePath();
String testHardwareLayoutPath = tempDirPath + File.separator + "hardwareLayoutTest.json";
String testPartitionLayoutPath = tempDirPath + File.separator + "partitionLayoutTest.json";
String testZkLayoutPath = tempDirPath + File.separator + "zkLayoutPath.json";
// initialize test hardware layout and partition layout, create mock helix cluster for testing.
TestHardwareLayout testHardwareLayout1 = constructInitialHardwareLayoutJSON(staticClusterName);
TestPartitionLayout testPartitionLayout1 = constructInitialPartitionLayoutJSON(testHardwareLayout1, 3, localDc);
JSONObject zkJson = constructZkLayoutJSON(dcsToZkInfo.values());
Utils.writeJsonObjectToFile(zkJson, testZkLayoutPath);
Utils.writeJsonObjectToFile(testHardwareLayout1.getHardwareLayout().toJSONObject(), testHardwareLayoutPath);
Utils.writeJsonObjectToFile(testPartitionLayout1.getPartitionLayout().toJSONObject(), testPartitionLayoutPath);
MockHelixCluster testCluster = new MockHelixCluster("AmbryTest-", testHardwareLayoutPath, testPartitionLayoutPath, testZkLayoutPath);
List<DataNode> initialNodes = testHardwareLayout1.getAllExistingDataNodes();
Partition partitionToTest = (Partition) testPartitionLayout1.getPartitionLayout().getPartitions(null).get(0);
// add a new node into cluster
testHardwareLayout1.addNewDataNodes(1);
Utils.writeJsonObjectToFile(testHardwareLayout1.getHardwareLayout().toJSONObject(), testHardwareLayoutPath);
DataNode newAddedNode = testHardwareLayout1.getAllExistingDataNodes().stream().filter(n -> !initialNodes.contains(n)).findAny().get();
// add a new replica on new node for partitionToTest
Disk diskOnNewNode = newAddedNode.getDisks().get(0);
// deliberately change capacity of partition to ensure new replica picks new capacity
partitionToTest.replicaCapacityInBytes += 1;
partitionToTest.addReplica(new Replica(partitionToTest, diskOnNewNode, testHardwareLayout1.clusterMapConfig));
Utils.writeJsonObjectToFile(testPartitionLayout1.getPartitionLayout().toJSONObject(), testPartitionLayoutPath);
testCluster.upgradeWithNewHardwareLayout(testHardwareLayoutPath);
testCluster.upgradeWithNewPartitionLayout(testPartitionLayoutPath, HelixBootstrapUpgradeUtil.HelixAdminOperation.BootstrapCluster);
// reset hardware/partition layout, this also resets replica capacity of partitionToTest. However, it won't touch
// instanceConfig of new added node because it is not in hardware layout. So, replica on new added node still has
// larger capacity. We use this particular replica to mock inconsistent replica capacity case.
// Note that instanceConfig of new node is still kept in cluster because upgrading cluster didn't force remove
// instanceConfig that not present in static clustermap.
testHardwareLayout1 = constructInitialHardwareLayoutJSON(staticClusterName);
testPartitionLayout1 = constructInitialPartitionLayoutJSON(testHardwareLayout1, 3, localDc);
Utils.writeJsonObjectToFile(testHardwareLayout1.getHardwareLayout().toJSONObject(), testHardwareLayoutPath);
Utils.writeJsonObjectToFile(testPartitionLayout1.getPartitionLayout().toJSONObject(), testPartitionLayoutPath);
testCluster.upgradeWithNewHardwareLayout(testHardwareLayoutPath);
testCluster.upgradeWithNewPartitionLayout(testPartitionLayoutPath, HelixBootstrapUpgradeUtil.HelixAdminOperation.BootstrapCluster);
Properties props = new Properties();
props.setProperty("clustermap.host.name", hostname);
props.setProperty("clustermap.cluster.name", "AmbryTest-" + staticClusterName);
props.setProperty("clustermap.datacenter.name", localDc);
props.setProperty("clustermap.port", Integer.toString(portNum));
props.setProperty("clustermap.dcs.zk.connect.strings", zkJson.toString(2));
props.setProperty("clustermap.current.xid", Long.toString(CURRENT_XID));
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
// succeed (The aforementioned replica has larger capacity than its peers)
try {
new HelixClusterManager(clusterMapConfig, selfInstanceName, new MockHelixManagerFactory(testCluster, null, null), metricRegistry);
fail("Initialization should fail due to inconsistent replica capacity");
} catch (IOException e) {
// expected
}
}
use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class HelixClusterManagerTest method instantiationTest.
/**
* Test instantiations.
* @throws Exception
*/
@Test
public void instantiationTest() throws Exception {
assumeTrue(!overrideEnabled);
// Several good instantiations happens in the constructor itself.
assertEquals(0L, metricRegistry.getGauges().get(HelixClusterManager.class.getName() + ".instantiationFailed").getValue());
if (clusterManager instanceof HelixClusterManager) {
verifyInitialClusterChanges((HelixClusterManager) clusterManager, helixCluster, helixDcs);
}
int savedPort = dcsToZkInfo.get(remoteDc).getPort();
// Connectivity failure to remote should not prevent instantiation.
dcsToZkInfo.get(remoteDc).setPort(0);
Set<com.github.ambry.utils.TestUtils.ZkInfo> zkInfos = new HashSet<>(dcsToZkInfo.values());
JSONObject invalidZkJson = constructZkLayoutJSON(zkInfos);
if (cloudDc != null) {
addCloudDc(invalidZkJson, (byte) zkInfos.size(), cloudDc);
}
Properties props = new Properties();
props.setProperty("clustermap.host.name", hostname);
props.setProperty("clustermap.port", Integer.toString(portNum));
props.setProperty("clustermap.cluster.name", clusterNamePrefixInHelix + clusterNameStatic);
props.setProperty("clustermap.datacenter.name", localDc);
props.setProperty("clustermap.dcs.zk.connect.strings", invalidZkJson.toString(2));
ClusterMapConfig invalidClusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
metricRegistry = new MetricRegistry();
HelixClusterManager clusterManager = new HelixClusterManager(invalidClusterMapConfig, selfInstanceName, new MockHelixManagerFactory(helixCluster, null, null), metricRegistry);
assertEquals(0L, metricRegistry.getGauges().get(HelixClusterManager.class.getName() + ".instantiationFailed").getValue());
assertEquals(1L, metricRegistry.getGauges().get(HelixClusterManager.class.getName() + ".instantiationExceptionCount").getValue());
verifyInitialClusterChanges(clusterManager, helixCluster, new String[] { localDc });
// Local dc connectivity failure should fail instantiation.
dcsToZkInfo.get(remoteDc).setPort(savedPort);
dcsToZkInfo.get(localDc).setPort(0);
zkInfos = new HashSet<>(dcsToZkInfo.values());
invalidZkJson = constructZkLayoutJSON(zkInfos);
props.setProperty("clustermap.dcs.zk.connect.strings", invalidZkJson.toString(2));
invalidClusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
metricRegistry = new MetricRegistry();
try {
new HelixClusterManager(invalidClusterMapConfig, selfInstanceName, new MockHelixManagerFactory(helixCluster, null, null), metricRegistry);
fail("Instantiation should have failed with invalid zk addresses");
} catch (IOException e) {
assertEquals(1L, metricRegistry.getGauges().get(HelixClusterManager.class.getName() + ".instantiationFailed").getValue());
assertEquals(1L, metricRegistry.getGauges().get(HelixClusterManager.class.getName() + ".instantiationExceptionCount").getValue());
}
metricRegistry = new MetricRegistry();
try {
new HelixClusterManager(clusterMapConfig, selfInstanceName, new MockHelixManagerFactory(helixCluster, null, new Exception("beBad")), metricRegistry);
fail("Instantiation should fail with a HelixManager factory that throws exception on listener registrations");
} catch (Exception e) {
assertEquals(1L, metricRegistry.getGauges().get(HelixClusterManager.class.getName() + ".instantiationFailed").getValue());
assertEquals("beBad", e.getCause().getMessage());
}
}
use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class HelixClusterManagerTest method testCloudDcAsLocal.
/**
* Test startup when setting the cloud datacenter as the local datacenter.
* @throws Exception
*/
@Test
public void testCloudDcAsLocal() throws Exception {
assumeTrue(cloudDc != null);
assumeTrue(!overrideEnabled);
String hostname = "localhost";
String selfInstanceName = getInstanceName(hostname, null);
Properties props = new Properties();
props.setProperty("clustermap.host.name", hostname);
props.setProperty("clustermap.cluster.name", clusterNamePrefixInHelix + clusterNameStatic);
props.setProperty("clustermap.datacenter.name", cloudDc);
props.setProperty("clustermap.dcs.zk.connect.strings", zkJson.toString(2));
props.setProperty("clustermap.current.xid", Long.toString(CURRENT_XID));
props.setProperty("clustermap.enable.partition.override", Boolean.toString(overrideEnabled));
props.setProperty("clustermap.listen.cross.colo", Boolean.toString(listenCrossColo));
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
MockHelixManagerFactory helixManagerFactory = new MockHelixManagerFactory(helixCluster, new HashMap<>(), null);
metricRegistry = new MetricRegistry();
clusterManager = new HelixClusterManager(clusterMapConfig, selfInstanceName, helixManagerFactory, metricRegistry);
verifyInitialClusterChanges((HelixClusterManager) clusterManager, helixCluster, helixDcs);
}
use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class ClusterChangeHandlerTest method initializationSuccessTest.
/**
* 1. test static initialization success case
* 2. verify live instance change is able to make node HardwareState.AVAILABLE
* 3. verify partition override behaves correctly (if enabled)
* 4. verify equivalence between {@link SimpleClusterChangeHandler} and {@link DynamicClusterChangeHandler} in terms of
* in-memory cluster info.
* @throws Exception
*/
@Test
public void initializationSuccessTest() throws Exception {
// After Helix bootstrap tool adds instances to cluster, MockHelixAdmin makes them up by default. Let's test a more
// realistic case where all instances are added but no node has participated yet. For dynamic cluster change handler,
// all instances in this case should be initialized to UNAVAILABLE. Until they have participated into cluster, the
// subsequent live instance changes will make them up.
helixCluster.bringAllInstancesDown();
ClusterMapConfig clusterMapConfig1 = new ClusterMapConfig(new VerifiableProperties(props));
HelixClusterManager managerWithSimpleHandler = new HelixClusterManager(clusterMapConfig1, selfInstanceName, helixManagerFactory, new MetricRegistry());
Properties properties = new Properties();
properties.putAll(props);
properties.setProperty("clustermap.cluster.change.handler.type", "DynamicClusterChangeHandler");
ClusterMapConfig clusterMapConfig2 = new ClusterMapConfig(new VerifiableProperties(properties));
HelixClusterManager managerWithDynamicHandler = new HelixClusterManager(clusterMapConfig2, selfInstanceName, helixManagerFactory, new MetricRegistry());
Set<String> partitionsInStaticMap = new HashSet<>(testPartitionLayout.getPartitionLayout().getAllPartitionNames());
Set<String> partitionsInSimpleHandler = managerWithSimpleHandler.getAllPartitionIds(null).stream().map(PartitionId::toPathString).collect(Collectors.toSet());
Set<String> partitionsInDynamicHandler = managerWithDynamicHandler.getAllPartitionIds(null).stream().map(PartitionId::toPathString).collect(Collectors.toSet());
assertEquals("Partitions from dynamic change handler don't match those in static layout", partitionsInStaticMap, partitionsInDynamicHandler);
assertEquals("Partitions from two HelixClusterManagers don't match", partitionsInSimpleHandler, partitionsInDynamicHandler);
// verify metrics in managers with simple/dynamic handler are same
HelixClusterManager.HelixClusterManagerCallback dynamicHandlerCallback = managerWithDynamicHandler.getManagerCallback();
HelixClusterManager.HelixClusterManagerCallback simpleHandlerCallback = managerWithSimpleHandler.getManagerCallback();
assertEquals("Datacenter count doesn't match", simpleHandlerCallback.getDatacenterCount(), dynamicHandlerCallback.getDatacenterCount());
assertEquals("Node count doesn't match", simpleHandlerCallback.getDatanodeCount(), dynamicHandlerCallback.getDatanodeCount());
assertEquals("Disk count doesn't match", simpleHandlerCallback.getDiskCount(), dynamicHandlerCallback.getDiskCount());
assertEquals("Sealed count doesn't match", simpleHandlerCallback.getPartitionSealedCount(), dynamicHandlerCallback.getPartitionSealedCount());
assertEquals("Raw capacity doesn't match", simpleHandlerCallback.getRawCapacity(), dynamicHandlerCallback.getRawCapacity());
assertEquals("Allocated raw capacity doesn't match", simpleHandlerCallback.getAllocatedRawCapacity(), dynamicHandlerCallback.getAllocatedRawCapacity());
assertEquals("Allocated usable capacity doesn't match", simpleHandlerCallback.getAllocatedUsableCapacity(), dynamicHandlerCallback.getAllocatedUsableCapacity());
// verify that all nodes (except for current node) are down in HelixClusterManager with dynamic cluster change handler
assertEquals("All nodes (except for self node) should be down", helixCluster.getDownInstances().size() - 1, dynamicHandlerCallback.getDownDatanodesCount());
// then we bring all instances up and trigger live instance change again
helixCluster.bringAllInstancesUp();
// verify all nodes are up now up
assertEquals("All nodes should be up now", 0, dynamicHandlerCallback.getDownDatanodesCount());
// verify partition override, for now we have 3 partitions and one of them is overridden to Read_Only (if enabled)
int partitionCnt = testPartitionLayout.getPartitionCount();
assertEquals("Number of writable partitions is not correct", overrideEnabled ? partitionCnt - 1 : partitionCnt, dynamicHandlerCallback.getPartitionReadWriteCount());
// close helix cluster managers
managerWithDynamicHandler.close();
managerWithSimpleHandler.close();
}
use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class ClusterChangeHandlerTest method partitionSelectionOnReplicaAddedOrRemovedTest.
/**
* Test the case where replica is added or removed and {@link PartitionSelectionHelper} is able to incorporate cluster
* map changes.
* Test setup: (1) remove one replica of Partition1 from local dc;
* (2) add another replica of Partition2 to local dc;
* (3) add one replica of Partition1 to remote dc;
* @throws Exception
*/
@Test
public void partitionSelectionOnReplicaAddedOrRemovedTest() throws Exception {
assumeTrue(!overrideEnabled);
// create a HelixClusterManager with DynamicClusterChangeHandler
Properties properties = new Properties();
properties.putAll(props);
properties.setProperty("clustermap.cluster.change.handler.type", "DynamicClusterChangeHandler");
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(properties));
HelixClusterManager helixClusterManager = new HelixClusterManager(clusterMapConfig, selfInstanceName, helixManagerFactory, new MetricRegistry());
List<PartitionId> partitions = testPartitionLayout.getPartitionLayout().getPartitions(null);
// partition1 has a replica to remove in local dc; partition2 has a replica to add in local dc
// also add an extra replica from partition1 in remote dc
PartitionId partition1 = partitions.get(0);
PartitionId partition2 = partitions.get(partitions.size() - 1);
int replicaCountForPart1 = partition1.getReplicaIds().size();
int replicaCountForPart2 = partition2.getReplicaIds().size();
List<DataNodeId> remoteNodesForPartition1 = getPartitionDataNodesFromDc(partition1, remoteDc);
List<DataNodeId> localNodesForPartition2 = getPartitionDataNodesFromDc(partition2, localDc);
// remove one replica from partition1 in local dc
Replica replicaToRemove = (Replica) partition1.getReplicaIds().stream().filter(r -> r.getDataNodeId().getDatacenterName().equals(localDc)).findFirst().get();
testPartitionLayout.removeReplicaFromPartition(replicaToRemove);
// add one replica of partition1 to remote dc
Datacenter localDataCenter = null, remoteDataCenter = null;
for (Datacenter datacenter : testHardwareLayout.getHardwareLayout().getDatacenters()) {
if (datacenter.getName().equals(localDc)) {
localDataCenter = datacenter;
} else {
remoteDataCenter = datacenter;
}
}
assertNotNull("Remote data center is null", remoteDataCenter);
assertNotNull("Local data center is null", localDataCenter);
DataNode nodeToAddPartition1 = remoteDataCenter.getDataNodes().stream().filter(n -> !remoteNodesForPartition1.contains(n)).findFirst().get();
testPartitionLayout.addReplicaToPartition(nodeToAddPartition1, (Partition) partition1);
// add one replica of partition2 to node in local dc (the node should be different to currentNode as we didn't
// populate bootstrap replica map for this test)
DataNode nodeToAddPartition2 = localDataCenter.getDataNodes().stream().filter(n -> !localNodesForPartition2.contains(n) && n != currentNode).findFirst().get();
testPartitionLayout.addReplicaToPartition(nodeToAddPartition2, (Partition) partition2);
Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
helixCluster.upgradeWithNewPartitionLayout(partitionLayoutPath, HelixBootstrapUpgradeUtil.HelixAdminOperation.BootstrapCluster);
List<PartitionId> partitionsInManager = helixClusterManager.getAllPartitionIds(null);
PartitionId ambryPartition1 = partitionsInManager.stream().filter(p -> p.toPathString().equals(partition1.toPathString())).findFirst().get();
PartitionId ambryPartition2 = partitionsInManager.stream().filter(p -> p.toPathString().equals(partition2.toPathString())).findFirst().get();
assertEquals("Replica count of partition1 is not expected", replicaCountForPart1, ambryPartition1.getReplicaIds().size());
assertEquals("Replica count of partition2 is not expected", replicaCountForPart2 + 1, ambryPartition2.getReplicaIds().size());
// Note that there are 3 partitions in total in cluster, partition[0] has 2 replicas, partition[1] has 3 replicas
// and partition[2] has 4 replicas in local dc. Also the minimumLocalReplicaCount = 3, so this is to test if partition
// selection helper is able to pick right partitions with local replica count >= 3.
List<PartitionId> writablePartitions = helixClusterManager.getWritablePartitionIds(DEFAULT_PARTITION_CLASS);
assertEquals("Number of writable partition is not expected", 2, writablePartitions.size());
helixClusterManager.close();
}
Aggregations