use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.
the class StorageManagerTest method updateInstanceConfigSuccessTest.
/**
* Test success case when updating InstanceConfig in Helix after new replica is added in storage manager.
*/
@Test
public void updateInstanceConfigSuccessTest() throws Exception {
generateConfigs(true, true);
MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
List<ReplicaId> localReplicas = clusterMap.getReplicaIds(localNode);
MockClusterParticipant mockHelixParticipant = new MockClusterParticipant();
StorageManager storageManager = createStorageManager(localNode, metricRegistry, Collections.singletonList(mockHelixParticipant));
storageManager.start();
// create a new partition and get its replica on local node
PartitionId newPartition = clusterMap.createNewPartition(Collections.singletonList(localNode));
ReplicaId newReplica = newPartition.getReplicaIds().get(0);
// for updating instanceConfig test, we first add an empty InstanceConfig of current node
String instanceName = ClusterMapUtils.getInstanceName(clusterMapConfig.clusterMapHostName, clusterMapConfig.clusterMapPort);
InstanceConfig instanceConfig = new InstanceConfig(instanceName);
instanceConfig.setHostName(localNode.getHostname());
instanceConfig.setPort(Integer.toString(localNode.getPort()));
// for current test, we initial InstanceConfig empty, non-empty case will be tested in HelixParticipantTest
Map<String, Map<String, String>> diskInfos = new HashMap<>();
instanceConfig.getRecord().setMapFields(diskInfos);
HelixAdmin helixAdmin = mockHelixParticipant.getHelixAdmin();
helixAdmin.addCluster(CLUSTER_NAME);
helixAdmin.addInstance(CLUSTER_NAME, instanceConfig);
// test success case
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
instanceConfig = helixAdmin.getInstanceConfig(CLUSTER_NAME, instanceName);
// verify that new replica info is present in InstanceConfig
Map<String, Map<String, String>> mountPathToDiskInfos = instanceConfig.getRecord().getMapFields();
Map<String, String> diskInfo = mountPathToDiskInfos.get(newReplica.getMountPath());
String replicasStr = diskInfo.get("Replicas");
Set<String> partitionStrs = new HashSet<>();
for (String replicaInfo : replicasStr.split(",")) {
String[] infos = replicaInfo.split(":");
partitionStrs.add(infos[0]);
}
assertTrue("New replica info is not found in InstanceConfig", partitionStrs.contains(newPartition.toPathString()));
shutdownAndAssertStoresInaccessible(storageManager, localReplicas);
}
use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.
the class BlobStoreTest method storeErrorTriggerDisableReplicaTest.
/**
* Test that replica is correctly disabled when store is shut down due to disk I/O error.
* @throws Exception
*/
@Test
public void storeErrorTriggerDisableReplicaTest() throws Exception {
final String RESOURCE_NAME = "0";
final String CLUSTER_NAME = "BlobStoreTest";
// setup testing environment
store.shutdown();
List<TestUtils.ZkInfo> zkInfoList = new ArrayList<>();
zkInfoList.add(new TestUtils.ZkInfo(null, "DC1", (byte) 0, 2199, false));
JSONObject zkJson = constructZkLayoutJSON(zkInfoList);
properties.setProperty("clustermap.cluster.name", CLUSTER_NAME);
properties.setProperty("clustermap.datacenter.name", "DC1");
properties.setProperty("clustermap.host.name", "localhost");
properties.setProperty("clustermap.dcs.zk.connect.strings", zkJson.toString(2));
properties.setProperty("store.io.error.count.to.trigger.shutdown", "1");
properties.setProperty("store.replica.status.delegate.enable", "true");
properties.setProperty("store.set.local.partition.state.enabled", "true");
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(properties));
AtomicReference<InstanceConfig> instanceConfig = new AtomicReference<>(new InstanceConfig("localhost"));
instanceConfig.get().setPort("2222");
Map<String, List<String>> listMap = new HashMap<>();
listMap.put(storeId, null);
ZNRecord znRecord = new ZNRecord("localhost");
znRecord.setListFields(listMap);
IdealState idealState = new IdealState(znRecord);
idealState.setRebalanceMode(IdealState.RebalanceMode.SEMI_AUTO);
// mock helix related components
HelixAdmin mockHelixAdmin = mock(HelixAdmin.class);
when(mockHelixAdmin.getInstanceConfig(eq(CLUSTER_NAME), anyString())).then(invocation -> instanceConfig.get());
when(mockHelixAdmin.getResourcesInCluster(eq(CLUSTER_NAME))).thenReturn(Collections.singletonList(RESOURCE_NAME));
when(mockHelixAdmin.getResourceIdealState(eq(CLUSTER_NAME), eq(RESOURCE_NAME))).thenReturn(idealState);
when(mockHelixAdmin.setInstanceConfig(any(), any(), any())).then(invocation -> {
instanceConfig.set(invocation.getArgument(2));
return true;
});
HelixManager mockHelixManager = mock(HelixManager.class);
when(mockHelixManager.getClusterManagmentTool()).thenReturn(mockHelixAdmin);
HelixFactory mockHelixFactory = new HelixFactory() {
@Override
public HelixManager getZKHelixManager(String clusterName, String instanceName, InstanceType instanceType, String zkAddr) {
return mockHelixManager;
}
};
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockParticipant = new MockHelixParticipant(clusterMapConfig, mockHelixFactory);
mockParticipant.overrideDisableReplicaMethod = false;
ReplicaStatusDelegate replicaStatusDelegate = new ReplicaStatusDelegate(mockParticipant);
BlobStore testStore = createBlobStore(getMockAmbryReplica(clusterMapConfig, tempDirStr), new StoreConfig(new VerifiableProperties(properties)), Collections.singletonList(replicaStatusDelegate));
testStore.start();
assertTrue("Store should start successfully", testStore.isStarted());
// create corrupted write set
MessageInfo corruptedInfo = new MessageInfo(getUniqueId(), PUT_RECORD_SIZE, Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM), Utils.Infinite_Time);
MessageWriteSet corruptedWriteSet = new MockMessageWriteSet(Collections.singletonList(corruptedInfo), Collections.singletonList(ByteBuffer.allocate(PUT_RECORD_SIZE)), new StoreException(StoreException.IO_ERROR_STR, StoreErrorCodes.IOError));
// 1. mock failure case
when(mockHelixAdmin.getInstanceConfig(eq(CLUSTER_NAME), anyString())).thenReturn(null);
// trigger store exception when calling store.put()
try {
testStore.put(corruptedWriteSet);
fail("should throw exception");
} catch (StoreException e) {
assertEquals("Mismatch in error code", StoreErrorCodes.IOError, e.getErrorCode());
}
assertNull("Disabled partition list should be null as disabling replica didn't succeed", instanceConfig.get().getDisabledPartitions(RESOURCE_NAME));
// 2. mock success case
when(mockHelixAdmin.getInstanceConfig(eq(CLUSTER_NAME), anyString())).then(invocation -> instanceConfig.get());
testStore.start();
assertTrue("Store should start successfully", testStore.isStarted());
try {
testStore.put(corruptedWriteSet);
fail("should throw exception");
} catch (StoreException e) {
assertEquals("Mismatch in error code", StoreErrorCodes.IOError, e.getErrorCode());
}
assertEquals("Disabled partition name is not expected", storeId, instanceConfig.get().getDisabledPartitions(RESOURCE_NAME).get(0));
// verify "DISABLED" list in InstanceConfig has correct partition id.
assertEquals("Disabled replica list is not expected", Collections.singletonList(storeId), getDisabledReplicas(instanceConfig.get()));
// 3. mock disk is replaced case, restart should succeed
testStore.start();
assertNull("Disabled partition list should be null as restart will enable same replica", instanceConfig.get().getDisabledPartitions(RESOURCE_NAME));
assertTrue("Disabled replica list should be empty", getDisabledReplicas(instanceConfig.get()).isEmpty());
testStore.shutdown();
reloadStore();
}
use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.
the class HelixParticipantTest method deepCopyDataNodeConfig.
/**
* Deep copy a {@link DataNodeConfig}.
* @param dataNodeConfig {@link DataNodeConfig} to copy
* @return {@link InstanceConfig} of given data node.
*/
private DataNodeConfig deepCopyDataNodeConfig(DataNodeConfig dataNodeConfig) {
String instanceName = ClusterMapUtils.getInstanceName(dataNodeConfig.getHostName(), dataNodeConfig.getPort());
InstanceConfig instanceConfig = new InstanceConfig(instanceName);
instanceConfig.setHostName(dataNodeConfig.getHostName());
instanceConfig.setPort(Integer.toString(dataNodeConfig.getPort()));
instanceConfig.getRecord().setSimpleField(DATACENTER_STR, dataNodeConfig.getDatacenterName());
instanceConfig.getRecord().setSimpleField(RACKID_STR, dataNodeConfig.getRackId());
instanceConfig.getRecord().setSimpleField(SCHEMA_VERSION_STR, Integer.toString(CURRENT_SCHEMA_VERSION));
instanceConfig.getRecord().setSimpleField(SSL_PORT_STR, Integer.toString(dataNodeConfig.getSslPort()));
instanceConfig.getRecord().setSimpleField(HTTP2_PORT_STR, Integer.toString(dataNodeConfig.getHttp2Port()));
instanceConfig.getRecord().setSimpleField(XID_STR, Long.toString(dataNodeConfig.getXid()));
Map<String, Map<String, String>> mountPathToDiskInfos = new TreeMap<>();
for (Map.Entry<String, DataNodeConfig.DiskConfig> entry : dataNodeConfig.getDiskConfigs().entrySet()) {
String mountPath = entry.getKey();
DataNodeConfig.DiskConfig diskConfig = entry.getValue();
StringBuilder replicaStrBuilder = new StringBuilder();
for (Map.Entry<String, DataNodeConfig.ReplicaConfig> replicaEntry : diskConfig.getReplicaConfigs().entrySet()) {
DataNodeConfig.ReplicaConfig replicaConfig = replicaEntry.getValue();
replicaStrBuilder.append(replicaEntry.getKey()).append(REPLICAS_STR_SEPARATOR).append(replicaConfig.getReplicaCapacityInBytes()).append(REPLICAS_STR_SEPARATOR).append(replicaConfig.getPartitionClass()).append(REPLICAS_DELIM_STR);
}
Map<String, String> diskInfo = new HashMap<>();
diskInfo.put(REPLICAS_STR, replicaStrBuilder.toString());
diskInfo.put(DISK_CAPACITY_STR, String.valueOf(diskConfig.getDiskCapacityInBytes()));
diskInfo.put(DISK_STATE, AVAILABLE_STR);
mountPathToDiskInfos.put(mountPath, diskInfo);
}
instanceConfig.getRecord().setMapFields(mountPathToDiskInfos);
instanceConfig.getRecord().setListField(SEALED_STR, new ArrayList<>(dataNodeConfig.getSealedReplicas()));
instanceConfig.getRecord().setListField(STOPPED_REPLICAS_STR, new ArrayList<>(dataNodeConfig.getStoppedReplicas()));
instanceConfig.getRecord().setListField(DISABLED_REPLICAS_STR, new ArrayList<>(dataNodeConfig.getDisabledReplicas()));
return instanceConfigConverter.convert(instanceConfig);
}
use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.
the class ClusterChangeHandlerTest method replicaAdditionOnCurrentNodeTest.
/**
* Test the case where current node receives InstanceConfig change triggered by itself due to replica addition. We need
* to verify {@link DynamicClusterChangeHandler} will check if new replica from InstanceConfig exists in bootstrap
* replica map. The intention here is to avoid creating a second instance of replica on current node.
*/
@Test
public void replicaAdditionOnCurrentNodeTest() throws Exception {
// create a HelixClusterManager with DynamicClusterChangeHandler
Properties properties = new Properties();
properties.putAll(props);
properties.setProperty("clustermap.cluster.change.handler.type", "DynamicClusterChangeHandler");
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(properties));
HelixClusterManager helixClusterManager = new HelixClusterManager(clusterMapConfig, selfInstanceName, helixManagerFactory, new MetricRegistry());
// test setup: create 2 new partitions and place their replicas onto nodes that exclude currentNode. This is to avoid
// edge case where currentNode already has all partitions in cluster
Set<PartitionId> initialPartitionSet = new HashSet<>(testPartitionLayout.getPartitionLayout().getPartitions(null));
List<DataNode> nodesToHostNewPartition = new ArrayList<>();
List<DataNode> localDcNodes = testHardwareLayout.getAllDataNodesFromDc(localDc).stream().filter(node -> node != currentNode).collect(Collectors.toList());
List<DataNode> remoteDcNodes = testHardwareLayout.getAllDataNodesFromDc(remoteDc);
nodesToHostNewPartition.addAll(localDcNodes.subList(0, 3));
nodesToHostNewPartition.addAll(remoteDcNodes.subList(0, 3));
testPartitionLayout.addNewPartition(testHardwareLayout, nodesToHostNewPartition, DEFAULT_PARTITION_CLASS);
// add one more new partition
testPartitionLayout.addNewPartition(testHardwareLayout, nodesToHostNewPartition, DEFAULT_PARTITION_CLASS);
// write new HardwareLayout and PartitionLayout into files
Utils.writeJsonObjectToFile(testHardwareLayout.getHardwareLayout().toJSONObject(), hardwareLayoutPath);
Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
// this triggers a InstanceConfig change notification.
helixCluster.upgradeWithNewHardwareLayout(hardwareLayoutPath);
Set<PartitionId> updatedPartitionSet = new HashSet<>(testPartitionLayout.getPartitionLayout().getPartitions(null));
updatedPartitionSet.removeAll(initialPartitionSet);
List<PartitionId> addedPartitions = new ArrayList<>(updatedPartitionSet);
assertEquals("There should be 2 added partitions", 2, addedPartitions.size());
Partition addedPartition1 = (Partition) addedPartitions.get(0);
Partition addedPartition2 = (Partition) addedPartitions.get(1);
// add one replica of this newly added partition1 to currentNode
testPartitionLayout.addReplicaToPartition(currentNode, addedPartition1);
// before upgrading Helix, let's save the replica count of test partition to a variable
PartitionId partitionInManager = helixClusterManager.getAllPartitionIds(null).stream().filter(p -> p.toPathString().equals(addedPartition1.toPathString())).findFirst().get();
int previousReplicaCnt = partitionInManager.getReplicaIds().size();
// test case 1: without populating bootstrap replica, new replica in InstanceConfig will trigger exception on current
// node (this shouldn't happen in practice but we still mock this situation to perform exhaustive testing)
Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
helixCluster.upgradeWithNewPartitionLayout(partitionLayoutPath, HelixBootstrapUpgradeUtil.HelixAdminOperation.BootstrapCluster);
assertEquals("Replica count of testing partition shouldn't change", previousReplicaCnt, partitionInManager.getReplicaIds().size());
// verify there is an exception when handling instance config change due to replica not found in bootstrap replica map
assertEquals("Instance config change error count should be 1", 1, helixClusterManager.helixClusterManagerMetrics.instanceConfigChangeErrorCount.getCount());
helixClusterManager.close();
// test case 2: call getBootstrapReplica in HelixClusterManager to populate bootstrap replica map and then upgrade
// Helix again.
Map<String, Map<String, String>> partitionToReplicaInfosMap = new HashMap<>();
Map<String, String> newReplicaInfos = new HashMap<>();
newReplicaInfos.put(PARTITION_CLASS_STR, DEFAULT_PARTITION_CLASS);
newReplicaInfos.put(REPLICAS_CAPACITY_STR, String.valueOf(TestPartitionLayout.defaultReplicaCapacityInBytes));
newReplicaInfos.put(currentNode.getHostname() + "_" + currentNode.getPort(), currentNode.getDisks().get(0).getMountPath());
partitionToReplicaInfosMap.put(addedPartition2.toPathString(), newReplicaInfos);
// set ZNRecord
ZNRecord replicaInfosZNRecord = new ZNRecord(REPLICA_ADDITION_STR);
replicaInfosZNRecord.setMapFields(partitionToReplicaInfosMap);
znRecordMap.put(REPLICA_ADDITION_ZNODE_PATH, replicaInfosZNRecord);
// create a new HelixClusterManager with replica addition info in Helix
helixClusterManager = new HelixClusterManager(clusterMapConfig, selfInstanceName, new HelixClusterManagerTest.MockHelixManagerFactory(helixCluster, znRecordMap, null), new MetricRegistry());
ReplicaId bootstrapReplica = helixClusterManager.getBootstrapReplica(addedPartition2.toPathString(), currentNode);
assertNotNull("Getting bootstrap replica should succeed", bootstrapReplica);
// add replica of new partition2 to currentNode
testPartitionLayout.addReplicaToPartition(currentNode, addedPartition2);
Utils.writeJsonObjectToFile(testPartitionLayout.getPartitionLayout().toJSONObject(), partitionLayoutPath);
helixCluster.upgradeWithNewPartitionLayout(partitionLayoutPath, HelixBootstrapUpgradeUtil.HelixAdminOperation.BootstrapCluster);
partitionInManager = helixClusterManager.getAllPartitionIds(null).stream().filter(p -> p.toPathString().equals(addedPartition2.toPathString())).findFirst().get();
// this time the new replica should be present in bootstrap replica map and therefore replica count should increase
assertEquals("Replica count of testing partition shouldn't change", previousReplicaCnt + 1, partitionInManager.getReplicaIds().size());
// verify that the replica instance in HelixClusterManager is same with bootstrap replica instance
ReplicaId replicaInManager = helixClusterManager.getReplicaIds(helixClusterManager.getDataNodeId(currentNode.getHostname(), currentNode.getPort())).stream().filter(r -> r.getPartitionId().toPathString().equals(addedPartition2.toPathString())).findFirst().get();
assertSame("There should be exactly one instance for added replica", replicaInManager, bootstrapReplica);
helixClusterManager.close();
}
use of org.apache.helix.model.InstanceConfig in project ambry by linkedin.
the class HelixHealthReportAggregationTaskTest method initializeNodeReports.
/**
* Initialize the reports and create instances in helix if not exists.
* @param type The type of reports to create
* @param numNode The number of nodes to initiate.
* @param startingPort The starting port number, which will then be incremented to represent different nodes.
* @throws IOException
*/
private void initializeNodeReports(StatsReportType type, int numNode, int startingPort) throws IOException {
String healthReportName = type == StatsReportType.ACCOUNT_REPORT ? HEALTH_REPORT_NAME_ACCOUNT : HEALTH_REPORT_NAME_PARTITION;
String statsFieldName = type == StatsReportType.ACCOUNT_REPORT ? STATS_FIELD_NAME_ACCOUNT : STATS_FIELD_NAME_PARTITION;
List<StatsSnapshot> storeSnapshots = new ArrayList<>();
Random random = new Random();
for (int i = 3; i < 6; i++) {
storeSnapshots.add(TestUtils.generateStoreStats(i, 3, random, type));
}
StatsWrapper nodeStats = TestUtils.generateNodeStats(storeSnapshots, 1000, type);
String nodeStatsJSON = mapper.writeValueAsString(nodeStats);
HelixDataAccessor dataAccessor = mockHelixManager.getHelixDataAccessor();
for (int i = 0; i < numNode; i++) {
String instanceName = ClusterMapUtils.getInstanceName("localhost", startingPort);
InstanceConfig instanceConfig = new InstanceConfig(instanceName);
instanceConfig.setHostName("localhost");
instanceConfig.setPort(Integer.toString(startingPort));
mockHelixAdmin.addInstance(CLUSTER_NAME, instanceConfig);
PropertyKey key = dataAccessor.keyBuilder().healthReport(instanceName, healthReportName);
ZNRecord znRecord = new ZNRecord(instanceName);
// Set the same reports for all instances
znRecord.setSimpleField(statsFieldName, nodeStatsJSON);
HelixProperty helixProperty = new HelixProperty(znRecord);
dataAccessor.setProperty(key, helixProperty);
startingPort++;
}
}
Aggregations