use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class HelixParticipantTest method testBadCases.
/**
* Test bad instantiation and initialization scenarios of the {@link HelixParticipant}
*/
@Test
public void testBadCases() {
// Invalid state model def
props.setProperty("clustermap.state.model.definition", "InvalidStateModelDef");
try {
new ClusterMapConfig(new VerifiableProperties(props));
fail("should fail due to invalid state model definition");
} catch (IllegalArgumentException e) {
// expected and restore previous props
props.setProperty("clustermap.state.model.definition", stateModelDef);
}
// Connect failure.
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
helixManagerFactory.getHelixManager(InstanceType.PARTICIPANT).beBad = true;
HelixParticipant helixParticipant = new HelixParticipant(clusterMapConfig, helixManagerFactory, new MetricRegistry(), getDefaultZkConnectStr(clusterMapConfig), true);
try {
helixParticipant.participate(Collections.emptyList(), null, null);
fail("Participation should have failed");
} catch (IOException e) {
// OK
}
// Bad param during instantiation.
props.setProperty("clustermap.cluster.name", "");
clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
try {
new HelixParticipant(clusterMapConfig, helixManagerFactory, new MetricRegistry(), getDefaultZkConnectStr(clusterMapConfig), true);
fail("Instantiation should have failed");
} catch (IllegalStateException e) {
// OK
}
props.setProperty("clustermap.cluster.name", "HelixParticipantTestCluster");
props.setProperty("clustermap.dcs.zk.connect.strings", "");
clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
try {
new HelixClusterAgentsFactory(clusterMapConfig, new MetricRegistry()).getClusterParticipants();
fail("Instantiation should have failed");
} catch (IOException e) {
// OK
}
}
use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class HelixParticipantTest method testGetAndSetReplicaStoppedState.
/**
* Tests setReplicaStoppedState method for {@link HelixParticipant}
* @throws Exception
*/
@Test
public void testGetAndSetReplicaStoppedState() throws Exception {
// setup HelixParticipant, HelixParticipantDummy and dependencies
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
String instanceName = ClusterMapUtils.getInstanceName("localhost", clusterMapConfig.clusterMapPort);
HelixParticipant helixParticipant = new HelixParticipant(clusterMapConfig, new HelixFactory(), new MetricRegistry(), getDefaultZkConnectStr(clusterMapConfig), true);
ZKHelixAdmin helixAdmin = new ZKHelixAdmin("localhost:" + zkInfo.getPort());
DataNodeConfig dataNodeConfig = getDataNodeConfigInHelix(helixAdmin, instanceName);
// Make sure the current stoppedReplicas list is empty
List<String> stoppedReplicas = helixParticipant.getStoppedReplicas();
assertEquals("stoppedReplicas list should be empty", Collections.emptyList(), stoppedReplicas);
String listName = "stoppedReplicas list";
Set<String> localPartitionNames = new HashSet<>();
dataNodeConfig.getDiskConfigs().values().forEach(diskConfig -> localPartitionNames.addAll(diskConfig.getReplicaConfigs().keySet()));
String[] partitionIds = new String[3];
for (int i = 0; i < partitionIds.length; ++i) {
partitionIds[i] = localPartitionNames.iterator().next();
localPartitionNames.remove(partitionIds[i]);
}
ReplicaId replicaId1 = createMockAmbryReplica(partitionIds[0]);
ReplicaId replicaId2 = createMockAmbryReplica(partitionIds[1]);
ReplicaId replicaId3 = createMockAmbryReplica(partitionIds[2]);
// Check that invoking setReplicaStoppedState with a non-AmbryReplica ReplicaId throws an IllegalArgumentException
ReplicaId nonAmbryReplica = createMockNotAmbryReplica(partitionIds[1]);
try {
helixParticipant.setReplicaStoppedState(Collections.singletonList(nonAmbryReplica), true);
fail("Expected an IllegalArgumentException here");
} catch (IllegalArgumentException e) {
// expected. Nothing to do.
}
// Check that invoking setReplicaStoppedState adds the replicaId1, replicaId2 to the list of stopped replicas
helixParticipant.setReplicaStoppedState(Arrays.asList(replicaId1, replicaId2), true);
stoppedReplicas = helixParticipant.getStoppedReplicas();
listIsExpectedSize(stoppedReplicas, 2, listName);
assertTrue(stoppedReplicas.contains(replicaId1.getPartitionId().toPathString()));
assertTrue(stoppedReplicas.contains(replicaId2.getPartitionId().toPathString()));
// Invoke setReplicaStoppedState to add replicaId1, replicaId2 again, should be no-op
helixParticipant.setReplicaStoppedState(Arrays.asList(replicaId1, replicaId2), true);
listIsExpectedSize(helixParticipant.getStoppedReplicas(), 2, listName);
// Add replicaId1 again as well as replicaId3 to ensure new replicaId is correctly added and no duplicates in the stopped list
helixParticipant.setReplicaStoppedState(Arrays.asList(replicaId1, replicaId3), true);
stoppedReplicas = helixParticipant.getStoppedReplicas();
listIsExpectedSize(stoppedReplicas, 3, listName);
assertTrue(stoppedReplicas.contains(replicaId1.getPartitionId().toPathString()));
assertTrue(stoppedReplicas.contains(replicaId2.getPartitionId().toPathString()));
assertTrue(stoppedReplicas.contains(replicaId3.getPartitionId().toPathString()));
// Check that invoking setReplicaStoppedState with markStop == false removes replicaId1, replicaId2 from stopped list
helixParticipant.setReplicaStoppedState(Arrays.asList(replicaId1, replicaId2), false);
stoppedReplicas = helixParticipant.getStoppedReplicas();
listIsExpectedSize(stoppedReplicas, 1, listName);
assertTrue(stoppedReplicas.contains(replicaId3.getPartitionId().toPathString()));
assertFalse(stoppedReplicas.contains(replicaId2.getPartitionId().toPathString()));
assertFalse(stoppedReplicas.contains(replicaId1.getPartitionId().toPathString()));
// Removing replicaIds which have already been removed doesn't hurt anything and will not update InstanceConfig in Helix
helixParticipant.setReplicaStoppedState(Arrays.asList(replicaId1, replicaId2), false);
stoppedReplicas = helixParticipant.getStoppedReplicas();
listIsExpectedSize(stoppedReplicas, 1, listName);
assertTrue(stoppedReplicas.contains(replicaId3.getPartitionId().toPathString()));
// Removing all replicas (including replica not in the list) yields expected behavior
helixParticipant.setReplicaStoppedState(Arrays.asList(replicaId2, replicaId3), false);
stoppedReplicas = helixParticipant.getStoppedReplicas();
listIsExpectedSize(stoppedReplicas, 0, listName);
helixAdmin.close();
}
use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class ClusterChangeHandlerTest method addNewInstancesAndPartitionsTest.
/**
* Test new instances/partitions are added to cluster dynamically. {@link HelixClusterManager} with
* {@link DynamicClusterChangeHandler} should absorb the change and update in-mem cluster map.
* 1. add new instance
* 2. add new partition onto new instance
* 3. add new partition onto existing instance
*/
@Test
public void addNewInstancesAndPartitionsTest() throws Exception {
// create a HelixClusterManager with DynamicClusterChangeHandler
Properties properties = new Properties();
properties.putAll(props);
properties.setProperty("clustermap.cluster.change.handler.type", "DynamicClusterChangeHandler");
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(properties));
HelixClusterManager helixClusterManager = new HelixClusterManager(clusterMapConfig, selfInstanceName, helixManagerFactory, new MetricRegistry());
// before adding new instances, let's first ensure current number of nodes is correct.
List<DataNode> dataNodesInLayout = new ArrayList<>();
testHardwareLayout.getHardwareLayout().getDatacenters().forEach(dc -> dataNodesInLayout.addAll(dc.getDataNodes()));
assertEquals("Number of data nodes is not expected", dataNodesInLayout.size(), helixClusterManager.getDataNodeIds().size());
// pick 2 existing nodes from each dc (also the nodes from local dc should be different from currentNode)
List<DataNode> nodesToHostNewPartition = new ArrayList<>();
List<DataNode> localDcNodes = testHardwareLayout.getAllDataNodesFromDc(localDc);
localDcNodes.remove(currentNode);
DataNode localDcNode1 = localDcNodes.get(0);
DataNode localDcNode2 = localDcNodes.get(localDcNodes.size() - 1);
List<DataNode> remoteDcNodes = testHardwareLayout.getAllDataNodesFromDc(remoteDc);
DataNode remoteDcNode1 = remoteDcNodes.get(0);
DataNode remoteDcNode2 = remoteDcNodes.get(remoteDcNodes.size() - 1);
// add a new node into static layout
testHardwareLayout.addNewDataNodes(1);
// add a new partition to static layout and put its replicas to both existing nodes and new node
List<DataNode> newAddedNodes = new ArrayList<>();
testHardwareLayout.getHardwareLayout().getDatacenters().forEach(dc -> newAddedNodes.addAll(dc.getDataNodes()));
newAddedNodes.removeAll(dataNodesInLayout);
// pick 2 existing nodes and 1 new node from each dc to add a replica from new partition
nodesToHostNewPartition.addAll(Arrays.asList(localDcNode1, localDcNode2));
nodesToHostNewPartition.addAll(Arrays.asList(remoteDcNode1, remoteDcNode2));
nodesToHostNewPartition.addAll(newAddedNodes);
testPartitionLayout.addNewPartition(testHardwareLayout, nodesToHostNewPartition, DEFAULT_PARTITION_CLASS);
// write new HardwareLayout and PartitionLayout into files
Utils.writeJsonObjectToFile(testHardwareLayout.getHardwareLayout().toJSONObject(), hardwareLayoutPath);
Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
// this triggers a InstanceConfig change notification.
// In each dc, 2 existing instance configs are updated and 1 new instance is added as well as 1 new partition
helixCluster.upgradeWithNewHardwareLayout(hardwareLayoutPath);
// verify after InstanceConfig change, HelixClusterManager contains the one more node per dc.
assertEquals("Number of data nodes after instance addition is not correct", testHardwareLayout.getAllExistingDataNodes().size(), helixClusterManager.getDataNodeIds().size());
// verify number of partitions in cluster manager has increased by 1
assertEquals("Number of partitions after partition addition is not correct", testPartitionLayout.getPartitionCount(), helixClusterManager.getAllPartitionIds(null).size());
// we need to bring new added instances up because that method checks if all local replicas are up)
for (DataNode newNode : newAddedNodes) {
helixCluster.bringInstanceUp(getInstanceName(newNode.getHostname(), newNode.getPort()));
}
assertEquals("Number of writable partitions after partition addition is not correct", overrideEnabled ? testPartitionLayout.getPartitionCount() - 1 : testPartitionLayout.getPartitionCount(), helixClusterManager.getWritablePartitionIds(null).size());
// verify capacity stats are updated
HelixClusterManager.HelixClusterManagerCallback clusterManagerCallback = helixClusterManager.getManagerCallback();
// note that we add one node to each dc, so the raw capacity = (# of nodes) * (# of disks) * (disk capacity)
long rawCapacityInStaticLayout = testHardwareLayout.getAllExistingDataNodes().size() * testHardwareLayout.getDiskCount() * testHardwareLayout.getDiskCapacityInBytes();
assertEquals("Raw capacity of entire cluster is not expected", rawCapacityInStaticLayout, clusterManagerCallback.getRawCapacity());
// we have added one more partition, so now the allocated raw capacity in cluster is 4 (partition count) * 6 * ReplicaCapacity
assertEquals("Allocated raw capacity of entire cluster is not correct", testPartitionLayout.getAllocatedRawCapacityInBytes(), clusterManagerCallback.getAllocatedRawCapacity());
// verify usable capacity
assertEquals("Allocated usable capacity of entire cluster is not correct", testPartitionLayout.getAllocatedUsableCapacityInBytes(), clusterManagerCallback.getAllocatedUsableCapacity());
// additional tests to verify getting replicas, disks and resources etc returns correct results.
for (DataNode newNode : newAddedNodes) {
AmbryDataNode ambryNode = helixClusterManager.getDataNodeId(newNode.getHostname(), newNode.getPort());
assertNotNull("New added node should exist in HelixClusterManager", ambryNode);
List<AmbryReplica> ambryReplicas = helixClusterManager.getReplicaIds(ambryNode);
assertEquals("There should be one replica on the new node", 1, ambryReplicas.size());
Set<AmbryDisk> ambryDisks = new HashSet<>(clusterManagerCallback.getDisks(ambryNode));
assertEquals("Disk count on the new node is not correct", localDcNode1.getDisks().size(), ambryDisks.size());
// verify that get a non-existent partition on new node should return null
assertNull("Should return null when getting a non-existent replica on new node", helixClusterManager.getReplicaForPartitionOnNode(ambryNode, "0"));
}
// trigger IdealState change and refresh partition-to-resource mapping (bring in the new partition in resource map)
helixCluster.refreshIdealState();
Map<String, String> partitionNameToResource = helixClusterManager.getPartitionToResourceMap().get(localDc);
List<PartitionId> partitionIds = testPartitionLayout.getPartitionLayout().getPartitions(null);
// verify all partitions (including the new added one) are present in partition-to-resource map
Set<String> partitionNames = partitionIds.stream().map(PartitionId::toPathString).collect(Collectors.toSet());
assertEquals("Some partitions are not present in partition-to-resource map", partitionNames, partitionNameToResource.keySet());
// verify all partitions are able to get their resource name
helixClusterManager.getAllPartitionIds(DEFAULT_PARTITION_CLASS).forEach(partitionId -> assertEquals("Resource name is not expected", partitionNameToResource.get(partitionId.toPathString()), partitionId.getResourceName()));
helixClusterManager.close();
}
use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class ClusterChangeHandlerTest method diskCapacityUpdateTest.
/**
* Test the case where the disk capacity is updated dynamically.
* @throws Exception
*/
@Test
public void diskCapacityUpdateTest() throws Exception {
// create a HelixClusterManager with DynamicClusterChangeHandler
Properties properties = new Properties();
properties.putAll(props);
properties.setProperty("clustermap.cluster.change.handler.type", "DynamicClusterChangeHandler");
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(properties));
HelixClusterManager helixClusterManager = new HelixClusterManager(clusterMapConfig, selfInstanceName, helixManagerFactory, new MetricRegistry());
// disk capacity after initialization should equal to original value (100L * 1024 * 1024 * 1024)
PartitionId partitionId = helixClusterManager.getAllPartitionIds(null).get(0);
assertEquals("Mismatch in disk capacity", 100L * 1024 * 1024 * 1024, partitionId.getReplicaIds().get(0).getDiskId().getRawCapacityInBytes());
// update disk capacity
testHardwareLayout.updateDiskCapacity(500L * 1024 * 1024 * 1024);
Utils.writeJsonObjectToFile(testHardwareLayout.getHardwareLayout().toJSONObject(), hardwareLayoutPath);
helixCluster.upgradeWithNewHardwareLayout(hardwareLayoutPath);
partitionId = helixClusterManager.getAllPartitionIds(null).get(0);
assertEquals("Mismatch in disk capacity", 500L * 1024 * 1024 * 1024, partitionId.getReplicaIds().get(0).getDiskId().getRawCapacityInBytes());
helixClusterManager.close();
}
use of com.github.ambry.config.ClusterMapConfig in project ambry by linkedin.
the class ClusterChangeHandlerTest method replicaAdditionOnCurrentNodeTest.
/**
* Test the case where current node receives InstanceConfig change triggered by itself due to replica addition. We need
* to verify {@link DynamicClusterChangeHandler} will check if new replica from InstanceConfig exists in bootstrap
* replica map. The intention here is to avoid creating a second instance of replica on current node.
*/
@Test
public void replicaAdditionOnCurrentNodeTest() throws Exception {
// create a HelixClusterManager with DynamicClusterChangeHandler
Properties properties = new Properties();
properties.putAll(props);
properties.setProperty("clustermap.cluster.change.handler.type", "DynamicClusterChangeHandler");
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(properties));
HelixClusterManager helixClusterManager = new HelixClusterManager(clusterMapConfig, selfInstanceName, helixManagerFactory, new MetricRegistry());
// test setup: create 2 new partitions and place their replicas onto nodes that exclude currentNode. This is to avoid
// edge case where currentNode already has all partitions in cluster
Set<PartitionId> initialPartitionSet = new HashSet<>(testPartitionLayout.getPartitionLayout().getPartitions(null));
List<DataNode> nodesToHostNewPartition = new ArrayList<>();
List<DataNode> localDcNodes = testHardwareLayout.getAllDataNodesFromDc(localDc).stream().filter(node -> node != currentNode).collect(Collectors.toList());
List<DataNode> remoteDcNodes = testHardwareLayout.getAllDataNodesFromDc(remoteDc);
nodesToHostNewPartition.addAll(localDcNodes.subList(0, 3));
nodesToHostNewPartition.addAll(remoteDcNodes.subList(0, 3));
testPartitionLayout.addNewPartition(testHardwareLayout, nodesToHostNewPartition, DEFAULT_PARTITION_CLASS);
// add one more new partition
testPartitionLayout.addNewPartition(testHardwareLayout, nodesToHostNewPartition, DEFAULT_PARTITION_CLASS);
// write new HardwareLayout and PartitionLayout into files
Utils.writeJsonObjectToFile(testHardwareLayout.getHardwareLayout().toJSONObject(), hardwareLayoutPath);
Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
// this triggers a InstanceConfig change notification.
helixCluster.upgradeWithNewHardwareLayout(hardwareLayoutPath);
Set<PartitionId> updatedPartitionSet = new HashSet<>(testPartitionLayout.getPartitionLayout().getPartitions(null));
updatedPartitionSet.removeAll(initialPartitionSet);
List<PartitionId> addedPartitions = new ArrayList<>(updatedPartitionSet);
assertEquals("There should be 2 added partitions", 2, addedPartitions.size());
Partition addedPartition1 = (Partition) addedPartitions.get(0);
Partition addedPartition2 = (Partition) addedPartitions.get(1);
// add one replica of this newly added partition1 to currentNode
testPartitionLayout.addReplicaToPartition(currentNode, addedPartition1);
// before upgrading Helix, let's save the replica count of test partition to a variable
PartitionId partitionInManager = helixClusterManager.getAllPartitionIds(null).stream().filter(p -> p.toPathString().equals(addedPartition1.toPathString())).findFirst().get();
int previousReplicaCnt = partitionInManager.getReplicaIds().size();
// test case 1: without populating bootstrap replica, new replica in InstanceConfig will trigger exception on current
// node (this shouldn't happen in practice but we still mock this situation to perform exhaustive testing)
Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
helixCluster.upgradeWithNewPartitionLayout(partitionLayoutPath, HelixBootstrapUpgradeUtil.HelixAdminOperation.BootstrapCluster);
assertEquals("Replica count of testing partition shouldn't change", previousReplicaCnt, partitionInManager.getReplicaIds().size());
// verify there is an exception when handling instance config change due to replica not found in bootstrap replica map
assertEquals("Instance config change error count should be 1", 1, helixClusterManager.helixClusterManagerMetrics.instanceConfigChangeErrorCount.getCount());
helixClusterManager.close();
// test case 2: call getBootstrapReplica in HelixClusterManager to populate bootstrap replica map and then upgrade
// Helix again.
Map<String, Map<String, String>> partitionToReplicaInfosMap = new HashMap<>();
Map<String, String> newReplicaInfos = new HashMap<>();
newReplicaInfos.put(PARTITION_CLASS_STR, DEFAULT_PARTITION_CLASS);
newReplicaInfos.put(REPLICAS_CAPACITY_STR, String.valueOf(TestPartitionLayout.defaultReplicaCapacityInBytes));
newReplicaInfos.put(currentNode.getHostname() + "_" + currentNode.getPort(), currentNode.getDisks().get(0).getMountPath());
partitionToReplicaInfosMap.put(addedPartition2.toPathString(), newReplicaInfos);
// set ZNRecord
ZNRecord replicaInfosZNRecord = new ZNRecord(REPLICA_ADDITION_STR);
replicaInfosZNRecord.setMapFields(partitionToReplicaInfosMap);
znRecordMap.put(REPLICA_ADDITION_ZNODE_PATH, replicaInfosZNRecord);
// create a new HelixClusterManager with replica addition info in Helix
helixClusterManager = new HelixClusterManager(clusterMapConfig, selfInstanceName, new HelixClusterManagerTest.MockHelixManagerFactory(helixCluster, znRecordMap, null), new MetricRegistry());
ReplicaId bootstrapReplica = helixClusterManager.getBootstrapReplica(addedPartition2.toPathString(), currentNode);
assertNotNull("Getting bootstrap replica should succeed", bootstrapReplica);
// add replica of new partition2 to currentNode
testPartitionLayout.addReplicaToPartition(currentNode, addedPartition2);
Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
helixCluster.upgradeWithNewPartitionLayout(partitionLayoutPath, HelixBootstrapUpgradeUtil.HelixAdminOperation.BootstrapCluster);
partitionInManager = helixClusterManager.getAllPartitionIds(null).stream().filter(p -> p.toPathString().equals(addedPartition2.toPathString())).findFirst().get();
// this time the new replica should be present in bootstrap replica map and therefore replica count should increase
assertEquals("Replica count of testing partition shouldn't change", previousReplicaCnt + 1, partitionInManager.getReplicaIds().size());
// verify that the replica instance in HelixClusterManager is same with bootstrap replica instance
ReplicaId replicaInManager = helixClusterManager.getReplicaIds(helixClusterManager.getDataNodeId(currentNode.getHostname(), currentNode.getPort())).stream().filter(r -> r.getPartitionId().toPathString().equals(addedPartition2.toPathString())).findFirst().get();
assertSame("There should be exactly one instance for added replica", replicaInManager, bootstrapReplica);
helixClusterManager.close();
}
Aggregations