use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class StorageManagerTest method setBlobStoreStoppedStateSuccessTest.
/**
* Test successfully set stopped state of blobstore with given list of {@link PartitionId}.
*/
@Test
public void setBlobStoreStoppedStateSuccessTest() throws Exception {
MockDataNodeId dataNode = clusterMap.getDataNodes().get(0);
List<ReplicaId> replicas = clusterMap.getReplicaIds(dataNode);
List<PartitionId> partitionIds = new ArrayList<>();
Map<DiskId, List<ReplicaId>> diskToReplicas = new HashMap<>();
// test setting the state of store via instantiated MockClusterParticipant
ClusterParticipant participant = new MockClusterParticipant();
ClusterParticipant participantSpy = Mockito.spy(participant);
StorageManager storageManager = createStorageManager(dataNode, metricRegistry, Collections.singletonList(participantSpy));
storageManager.start();
assertEquals("There should be no unexpected partitions reported", 0, getNumUnrecognizedPartitionsReported());
for (ReplicaId replica : replicas) {
partitionIds.add(replica.getPartitionId());
diskToReplicas.computeIfAbsent(replica.getDiskId(), disk -> new ArrayList<>()).add(replica);
}
List<PartitionId> failToUpdateList;
// add a list of stores to STOPPED list. Note that the stores are residing on 3 disks.
failToUpdateList = storageManager.setBlobStoreStoppedState(partitionIds, true);
// make sure the update operation succeeds
assertTrue("Add stores to stopped list should succeed, failToUpdateList should be empty", failToUpdateList.isEmpty());
// make sure the stopped list contains all the added stores
Set<String> stoppedReplicasCopy = new HashSet<>(participantSpy.getStoppedReplicas());
for (ReplicaId replica : replicas) {
assertTrue("The stopped list should contain the replica: " + replica.getPartitionId().toPathString(), stoppedReplicasCopy.contains(replica.getPartitionId().toPathString()));
}
// make sure replicaStatusDelegate is invoked 3 times and each time the input replica list conforms with stores on particular disk
for (List<ReplicaId> replicasPerDisk : diskToReplicas.values()) {
verify(participantSpy, times(1)).setReplicaStoppedState(replicasPerDisk, true);
}
// remove a list of stores from STOPPED list. Note that the stores are residing on 3 disks.
storageManager.setBlobStoreStoppedState(partitionIds, false);
// make sure the update operation succeeds
assertTrue("Remove stores from stopped list should succeed, failToUpdateList should be empty", failToUpdateList.isEmpty());
// make sure the stopped list is empty because all the stores are successfully removed.
assertTrue("The stopped list should be empty after removing all stores", participantSpy.getStoppedReplicas().isEmpty());
// make sure replicaStatusDelegate is invoked 3 times and each time the input replica list conforms with stores on particular disk
for (List<ReplicaId> replicasPerDisk : diskToReplicas.values()) {
verify(participantSpy, times(1)).setReplicaStoppedState(replicasPerDisk, false);
}
shutdownAndAssertStoresInaccessible(storageManager, replicas);
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class ServerAdminTool method main.
/**
* Runs the server admin tool
* @param args associated arguments.
* @throws Exception
*/
public static void main(String[] args) throws Exception {
VerifiableProperties verifiableProperties = ToolUtils.getVerifiableProperties(args);
ServerAdminToolConfig config = new ServerAdminToolConfig(verifiableProperties);
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
ClusterMap clusterMap = ((ClusterAgentsFactory) Utils.getObj(clusterMapConfig.clusterMapClusterAgentsFactory, clusterMapConfig, config.hardwareLayoutFilePath, config.partitionLayoutFilePath)).getClusterMap();
SSLFactory sslFactory = !clusterMapConfig.clusterMapSslEnabledDatacenters.isEmpty() ? SSLFactory.getNewInstance(new SSLConfig(verifiableProperties)) : null;
ServerAdminTool serverAdminTool = new ServerAdminTool(clusterMap, sslFactory, verifiableProperties);
File file = new File(config.dataOutputFilePath);
if (!file.exists() && !file.createNewFile()) {
throw new IllegalStateException("Could not create " + file);
}
FileOutputStream outputFileStream = new FileOutputStream(config.dataOutputFilePath);
DataNodeId dataNodeId = clusterMap.getDataNodeId(config.hostname, config.port);
if (dataNodeId == null) {
throw new IllegalArgumentException("Could not find a data node corresponding to " + config.hostname + ":" + config.port);
}
switch(config.typeOfOperation) {
case GetBlobProperties:
BlobId blobId = new BlobId(config.blobId, clusterMap);
Pair<ServerErrorCode, BlobProperties> bpResponse = serverAdminTool.getBlobProperties(dataNodeId, blobId, config.getOption, clusterMap);
if (bpResponse.getFirst() == ServerErrorCode.No_Error) {
LOGGER.info("Blob properties for {} from {}: {}", blobId, dataNodeId, bpResponse.getSecond());
} else {
LOGGER.error("Failed to get blob properties for {} from {} with option {}. Error code is {}", blobId, dataNodeId, config.getOption, bpResponse.getFirst());
}
break;
case GetUserMetadata:
blobId = new BlobId(config.blobId, clusterMap);
Pair<ServerErrorCode, ByteBuffer> umResponse = serverAdminTool.getUserMetadata(dataNodeId, blobId, config.getOption, clusterMap);
if (umResponse.getFirst() == ServerErrorCode.No_Error) {
writeBufferToFile(umResponse.getSecond(), outputFileStream);
LOGGER.info("User metadata for {} from {} written to {}", blobId, dataNodeId, config.dataOutputFilePath);
} else {
LOGGER.error("Failed to get user metadata for {} from {} with option {}. Error code is {}", blobId, dataNodeId, config.getOption, umResponse.getFirst());
}
break;
case GetBlob:
blobId = new BlobId(config.blobId, clusterMap);
Pair<ServerErrorCode, BlobData> bResponse = serverAdminTool.getBlob(dataNodeId, blobId, config.getOption, clusterMap);
if (bResponse.getFirst() == ServerErrorCode.No_Error) {
LOGGER.info("Blob type of {} from {} is {}", blobId, dataNodeId, bResponse.getSecond().getBlobType());
ByteBuf buffer = bResponse.getSecond().content();
try {
writeByteBufToFile(buffer, outputFileStream);
} finally {
buffer.release();
}
LOGGER.info("Blob data for {} from {} written to {}", blobId, dataNodeId, config.dataOutputFilePath);
} else {
LOGGER.error("Failed to get blob data for {} from {} with option {}. Error code is {}", blobId, dataNodeId, config.getOption, bResponse.getFirst());
}
break;
case TriggerCompaction:
if (config.partitionIds.length > 0 && !config.partitionIds[0].isEmpty()) {
for (String partitionIdStr : config.partitionIds) {
PartitionId partitionId = getPartitionIdFromStr(partitionIdStr, clusterMap);
ServerErrorCode errorCode = serverAdminTool.triggerCompaction(dataNodeId, partitionId);
if (errorCode == ServerErrorCode.No_Error) {
LOGGER.info("Compaction has been triggered for {} on {}", partitionId, dataNodeId);
} else {
LOGGER.error("From {}, received server error code {} for trigger compaction request on {}", dataNodeId, errorCode, partitionId);
}
}
} else {
LOGGER.error("There were no partitions provided to trigger compaction on");
}
break;
case RequestControl:
if (config.partitionIds.length > 0 && !config.partitionIds[0].isEmpty()) {
for (String partitionIdStr : config.partitionIds) {
PartitionId partitionId = getPartitionIdFromStr(partitionIdStr, clusterMap);
sendRequestControlRequest(serverAdminTool, dataNodeId, partitionId, config.requestTypeToControl, config.enableState);
}
} else {
LOGGER.info("No partition list provided. Requesting enable status of {} to be set to {} on all partitions", config.requestTypeToControl, config.enableState);
sendRequestControlRequest(serverAdminTool, dataNodeId, null, config.requestTypeToControl, config.enableState);
}
break;
case ReplicationControl:
List<String> origins = Collections.emptyList();
if (config.origins.length > 0 && !config.origins[0].isEmpty()) {
origins = Arrays.asList(config.origins);
}
if (config.partitionIds.length > 0 && !config.partitionIds[0].isEmpty()) {
for (String partitionIdStr : config.partitionIds) {
PartitionId partitionId = getPartitionIdFromStr(partitionIdStr, clusterMap);
sendReplicationControlRequest(serverAdminTool, dataNodeId, partitionId, origins, config.enableState);
}
} else {
LOGGER.info("No partition list provided. Requesting enable status for replication from {} to be set to {} on " + "all partitions", origins.isEmpty() ? "all DCs" : origins, config.enableState);
sendReplicationControlRequest(serverAdminTool, dataNodeId, null, origins, config.enableState);
}
break;
case CatchupStatus:
if (config.partitionIds.length > 0 && !config.partitionIds[0].isEmpty()) {
for (String partitionIdStr : config.partitionIds) {
PartitionId partitionId = getPartitionIdFromStr(partitionIdStr, clusterMap);
Pair<ServerErrorCode, Boolean> response = serverAdminTool.isCaughtUp(dataNodeId, partitionId, config.acceptableLagInBytes, config.numReplicasCaughtUpPerPartition);
if (response.getFirst() == ServerErrorCode.No_Error) {
LOGGER.info("Replicas are {} within {} bytes for {}", response.getSecond() ? "" : "NOT", config.acceptableLagInBytes, partitionId);
} else {
LOGGER.error("From {}, received server error code {} for request for catchup status of {}", dataNodeId, response.getFirst(), partitionId);
}
}
} else {
Pair<ServerErrorCode, Boolean> response = serverAdminTool.isCaughtUp(dataNodeId, null, config.acceptableLagInBytes, config.numReplicasCaughtUpPerPartition);
if (response.getFirst() == ServerErrorCode.No_Error) {
LOGGER.info("Replicas are {} within {} bytes for all partitions", response.getSecond() ? "" : "NOT", config.acceptableLagInBytes);
} else {
LOGGER.error("From {}, received server error code {} for request for catchup status of all partitions", dataNodeId, response.getFirst());
}
}
break;
case BlobStoreControl:
if (config.partitionIds.length > 0 && !config.partitionIds[0].isEmpty()) {
for (String partitionIdStr : config.partitionIds) {
PartitionId partitionId = getPartitionIdFromStr(partitionIdStr, clusterMap);
sendBlobStoreControlRequest(serverAdminTool, dataNodeId, partitionId, config.numReplicasCaughtUpPerPartition, config.storeControlRequestType);
}
} else {
LOGGER.error("There were no partitions provided to be controlled (Start/Stop)");
}
break;
default:
throw new IllegalStateException("Recognized but unsupported operation: " + config.typeOfOperation);
}
serverAdminTool.close();
outputFileStream.close();
clusterMap.close();
System.out.println("Server admin tool is safely closed");
System.exit(0);
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class StorageManagerTest method updateInstanceConfigFailureTest.
/**
* Test failure cases when updating InstanceConfig in Helix for both Offline-To-Bootstrap and Inactive-To-Offline.
*/
@Test
public void updateInstanceConfigFailureTest() throws Exception {
generateConfigs(true, true);
MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
List<ReplicaId> localReplicas = clusterMap.getReplicaIds(localNode);
MockClusterParticipant mockHelixParticipant = new MockClusterParticipant();
StorageManager storageManager = createStorageManager(localNode, metricRegistry, Collections.singletonList(mockHelixParticipant));
storageManager.start();
// create a new partition and get its replica on local node
PartitionId newPartition = clusterMap.createNewPartition(Collections.singletonList(localNode));
// override return value of updateDataNodeInfoInCluster() to mock update InstanceConfig failure
mockHelixParticipant.updateNodeInfoReturnVal = false;
try {
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
fail("should fail because updating InstanceConfig didn't succeed during Offline-To-Bootstrap");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", StateTransitionException.TransitionErrorCode.HelixUpdateFailure, e.getErrorCode());
}
try {
mockHelixParticipant.onPartitionBecomeOfflineFromInactive(localReplicas.get(0).getPartitionId().toPathString());
fail("should fail because updating InstanceConfig didn't succeed during Inactive-To-Offline");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", StateTransitionException.TransitionErrorCode.HelixUpdateFailure, e.getErrorCode());
}
mockHelixParticipant.updateNodeInfoReturnVal = null;
// mock InstanceConfig not found error (note that MockHelixAdmin is empty by default, so no InstanceConfig is present)
newPartition = clusterMap.createNewPartition(Collections.singletonList(localNode));
try {
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
fail("should fail because InstanceConfig is not found during Offline-To-Bootstrap");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", StateTransitionException.TransitionErrorCode.HelixUpdateFailure, e.getErrorCode());
}
try {
mockHelixParticipant.onPartitionBecomeOfflineFromInactive(localReplicas.get(1).getPartitionId().toPathString());
fail("should fail because InstanceConfig is not found during Inactive-To-Offline");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", StateTransitionException.TransitionErrorCode.HelixUpdateFailure, e.getErrorCode());
}
shutdownAndAssertStoresInaccessible(storageManager, localReplicas);
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class StorageManagerTest method addBlobStoreTest.
/**
* Test add new BlobStore with given {@link ReplicaId}.
*/
@Test
public void addBlobStoreTest() throws Exception {
generateConfigs(true, false);
MockDataNodeId localNode = clusterMap.getDataNodes().get(0);
List<ReplicaId> localReplicas = clusterMap.getReplicaIds(localNode);
int newMountPathIndex = 3;
// add new MountPath to local node
File f = File.createTempFile("ambry", ".tmp");
File mountFile = new File(f.getParent(), "mountpathfile" + MockClusterMap.PLAIN_TEXT_PORT_START_NUMBER + newMountPathIndex);
MockClusterMap.deleteFileOrDirectory(mountFile);
assertTrue("Couldn't create mount path directory", mountFile.mkdir());
localNode.addMountPaths(Collections.singletonList(mountFile.getAbsolutePath()));
PartitionId newPartition1 = new MockPartitionId(10L, MockClusterMap.DEFAULT_PARTITION_CLASS, clusterMap.getDataNodes(), newMountPathIndex);
StorageManager storageManager = createStorageManager(localNode, metricRegistry, null);
storageManager.start();
// test add store that already exists, which should fail
assertFalse("Add store which is already existing should fail", storageManager.addBlobStore(localReplicas.get(0)));
// test add store onto a new disk, which should succeed
assertTrue("Add new store should succeed", storageManager.addBlobStore(newPartition1.getReplicaIds().get(0)));
assertNotNull("The store shouldn't be null because new store is successfully added", storageManager.getStore(newPartition1, false));
// test add store whose diskManager is not running, which should fail
PartitionId newPartition2 = new MockPartitionId(11L, MockClusterMap.DEFAULT_PARTITION_CLASS, clusterMap.getDataNodes(), 0);
storageManager.getDiskManager(localReplicas.get(0).getPartitionId()).shutdown();
assertFalse("Add store onto the DiskManager which is not running should fail", storageManager.addBlobStore(newPartition2.getReplicaIds().get(0)));
storageManager.getDiskManager(localReplicas.get(0).getPartitionId()).start();
// test replica addition can correctly handle existing dir (should delete it and create a new one)
// To verify the directory has been recreated, we purposely put a test file in previous dir.
PartitionId newPartition3 = new MockPartitionId(12L, MockClusterMap.DEFAULT_PARTITION_CLASS, clusterMap.getDataNodes(), 0);
ReplicaId replicaToAdd = newPartition3.getReplicaIds().get(0);
File previousDir = new File(replicaToAdd.getReplicaPath());
File testFile = new File(previousDir, "testFile");
MockClusterMap.deleteFileOrDirectory(previousDir);
assertTrue("Cannot create dir for " + replicaToAdd.getReplicaPath(), previousDir.mkdir());
assertTrue("Cannot create test file within previous dir", testFile.createNewFile());
assertTrue("Adding new store should succeed", storageManager.addBlobStore(replicaToAdd));
assertFalse("Test file should not exist", testFile.exists());
assertNotNull("Store associated new added replica should not be null", storageManager.getStore(newPartition3, false));
shutdownAndAssertStoresInaccessible(storageManager, localReplicas);
// test add store but fail to add segment requirements to DiskSpaceAllocator. (This is simulated by inducing
// addRequiredSegments failure to make store inaccessible)
List<String> mountPaths = localNode.getMountPaths();
String diskToFail = mountPaths.get(0);
File reservePoolDir = new File(diskToFail, diskManagerConfig.diskManagerReserveFileDirName);
File storeReserveDir = new File(reservePoolDir, DiskSpaceAllocator.STORE_DIR_PREFIX + newPartition2.toPathString());
StorageManager storageManager2 = createStorageManager(localNode, new MetricRegistry(), null);
storageManager2.start();
Utils.deleteFileOrDirectory(storeReserveDir);
assertTrue("File creation should succeed", storeReserveDir.createNewFile());
assertFalse("Add store should fail if store couldn't start due to initializePool failure", storageManager2.addBlobStore(newPartition2.getReplicaIds().get(0)));
assertNull("New store shouldn't be in in-memory data structure", storageManager2.getStore(newPartition2, false));
shutdownAndAssertStoresInaccessible(storageManager2, localReplicas);
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class VcrTestUtil method populateZkInfoAndStartController.
/**
* Populate info on ZooKeeper server and start {@link HelixControllerManager}.
* @param zkConnectString zk connect string to zk server.
* @param vcrClusterName the vcr cluster name.
* @param clusterMap the {@link ClusterMap} to use.
* @param vcrHelixStateModel the state model to use for helix cluster events.
* @return the created {@link HelixControllerManager}.
*/
public static HelixControllerManager populateZkInfoAndStartController(String zkConnectString, String vcrClusterName, ClusterMap clusterMap, String vcrHelixStateModel) {
HelixZkClient zkClient = DedicatedZkClientFactory.getInstance().buildZkClient(new HelixZkClient.ZkConnectionConfig(zkConnectString), new HelixZkClient.ZkClientConfig());
try {
zkClient.setZkSerializer(new ZNRecordSerializer());
ClusterSetup clusterSetup = new ClusterSetup(zkClient);
clusterSetup.addCluster(vcrClusterName, true);
HelixAdmin admin = new ZKHelixAdmin(zkClient);
// set ALLOW_PARTICIPANT_AUTO_JOIN
HelixConfigScope configScope = new HelixConfigScopeBuilder(HelixConfigScope.ConfigScopeProperty.CLUSTER).forCluster(vcrClusterName).build();
Map<String, String> helixClusterProperties = new HashMap<>();
helixClusterProperties.put(ZKHelixManager.ALLOW_PARTICIPANT_AUTO_JOIN, String.valueOf(true));
admin.setConfig(configScope, helixClusterProperties);
// set PersistBestPossibleAssignment
ConfigAccessor configAccessor = new ConfigAccessor(zkClient);
ClusterConfig clusterConfig = configAccessor.getClusterConfig(vcrClusterName);
clusterConfig.setPersistBestPossibleAssignment(true);
configAccessor.setClusterConfig(vcrClusterName, clusterConfig);
FullAutoModeISBuilder builder = new FullAutoModeISBuilder(helixResource);
builder.setStateModel(vcrHelixStateModel);
for (PartitionId partitionId : clusterMap.getAllPartitionIds(null)) {
builder.add(partitionId.toPathString());
}
builder.setMinActiveReplica(MIN_ACTIVE_REPLICAS);
builder.setRebalanceDelay((int) REBALANCE_DELAY_MS);
builder.setRebalancerClass(DelayedAutoRebalancer.class.getName());
builder.setRebalanceStrategy(CrushEdRebalanceStrategy.class.getName());
IdealState idealState = builder.build();
admin.addResource(vcrClusterName, helixResource, idealState);
admin.rebalance(vcrClusterName, helixResource, NUM_REPLICAS, "", "");
HelixControllerManager helixControllerManager = new HelixControllerManager(zkConnectString, vcrClusterName);
helixControllerManager.syncStart();
return helixControllerManager;
} finally {
zkClient.close();
}
}
Aggregations