use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class AmbryServerRequestsTest method stopBlobStoreFailureTest.
/**
* Tests for the stopBlobStore response received on a {@link BlobStoreControlAdminRequest} for different failure cases
* @throws InterruptedException
* @throws IOException
*/
@Test
public void stopBlobStoreFailureTest() throws InterruptedException, IOException {
List<? extends PartitionId> partitionIds = clusterMap.getAllPartitionIds(null);
PartitionId id = partitionIds.get(0);
short numReplicasCaughtUpPerPartition = 3;
// test partition unknown
sendAndVerifyStoreControlRequest(null, BlobStoreControlAction.StopStore, numReplicasCaughtUpPerPartition, ServerErrorCode.Bad_Request);
// test validate request failure - Replica_Unavailable
storageManager.returnNullStore = true;
sendAndVerifyStoreControlRequest(id, BlobStoreControlAction.StopStore, numReplicasCaughtUpPerPartition, ServerErrorCode.Replica_Unavailable);
storageManager.returnNullStore = false;
// test validate request failure - Disk_Unavailable
storageManager.shutdown();
storageManager.returnNullStore = true;
sendAndVerifyStoreControlRequest(id, BlobStoreControlAction.StopStore, numReplicasCaughtUpPerPartition, ServerErrorCode.Disk_Unavailable);
storageManager.returnNullStore = false;
storageManager.start();
// test invalid numReplicasCaughtUpPerPartition
numReplicasCaughtUpPerPartition = -1;
sendAndVerifyStoreControlRequest(id, BlobStoreControlAction.StopStore, numReplicasCaughtUpPerPartition, ServerErrorCode.Bad_Request);
numReplicasCaughtUpPerPartition = 3;
// test disable compaction failure
storageManager.returnValueOfControllingCompaction = false;
sendAndVerifyStoreControlRequest(id, BlobStoreControlAction.StopStore, numReplicasCaughtUpPerPartition, ServerErrorCode.Unknown_Error);
storageManager.returnValueOfControllingCompaction = true;
// test disable compaction with runtime exception
storageManager.exceptionToThrowOnControllingCompaction = new IllegalStateException();
sendAndVerifyStoreControlRequest(id, BlobStoreControlAction.StopStore, numReplicasCaughtUpPerPartition, ServerErrorCode.Unknown_Error);
storageManager.exceptionToThrowOnControllingCompaction = null;
// test disable replication failure
replicationManager.reset();
replicationManager.controlReplicationReturnVal = false;
sendAndVerifyStoreControlRequest(id, BlobStoreControlAction.StopStore, numReplicasCaughtUpPerPartition, ServerErrorCode.Unknown_Error);
// test peers catchup failure
replicationManager.reset();
replicationManager.controlReplicationReturnVal = true;
// all replicas of this partition > acceptableLag
generateLagOverrides(1, 1);
sendAndVerifyStoreControlRequest(id, BlobStoreControlAction.StopStore, numReplicasCaughtUpPerPartition, ServerErrorCode.Retry_After_Backoff);
// test shutdown BlobStore failure
replicationManager.reset();
replicationManager.controlReplicationReturnVal = true;
storageManager.returnValueOfShutdownBlobStore = false;
generateLagOverrides(0, 0);
sendAndVerifyStoreControlRequest(id, BlobStoreControlAction.StopStore, numReplicasCaughtUpPerPartition, ServerErrorCode.Unknown_Error);
// test shutdown BlobStore with runtime exception
storageManager.exceptionToThrowOnShuttingDownBlobStore = new IllegalStateException();
sendAndVerifyStoreControlRequest(id, BlobStoreControlAction.StopStore, numReplicasCaughtUpPerPartition, ServerErrorCode.Unknown_Error);
storageManager.exceptionToThrowOnShuttingDownBlobStore = null;
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class AmbryServerRequestsTest method sendAndVerifyGetOriginalStoreKeys.
/**
* Sends and verifies that GetRequest with a list of original blobIds works correctly.
* @param blobIds List of blobIds for GetRequest.
* @param expectedErrorCode the {@link ServerErrorCode} expected in the response.
* @throws InterruptedException
* @throws IOException
*/
private void sendAndVerifyGetOriginalStoreKeys(List<BlobId> blobIds, ServerErrorCode expectedErrorCode) throws InterruptedException, IOException {
PartitionId partitionId = blobIds.get(0).getPartition();
int correlationId = blobIds.get(0).getContainerId();
String clientId = TestUtils.getRandomString(10);
PartitionRequestInfo pRequestInfo = new PartitionRequestInfo(partitionId, blobIds);
RequestOrResponse request = new GetRequest(correlationId, clientId, MessageFormatFlags.All, Collections.singletonList(pRequestInfo), GetOption.Include_All);
storageManager.resetStore();
if (!expectedErrorCode.equals(ServerErrorCode.Unknown_Error)) {
// known error will be filled to each PartitionResponseInfo and set ServerErrorCode.No_Error in response.
Response response = sendRequestGetResponse(request, ServerErrorCode.No_Error);
assertEquals("Operation received at the store not as expected", RequestOrResponseType.GetRequest, MockStorageManager.operationReceived);
for (PartitionResponseInfo info : ((GetResponse) response).getPartitionResponseInfoList()) {
assertEquals("Error code does not match expected", expectedErrorCode, info.getErrorCode());
}
response.release();
} else {
sendRequestGetResponse(request, ServerErrorCode.Unknown_Error).release();
}
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class ServerHardDeleteTest method endToEndTestHardDeletes.
/**
* Tests the hard delete functionality.
* <p>
* This test does the following:
* 1. Makes 6 puts, waits for notification.
* 2. Makes 2 deletes, waits for notification.
* 3. Waits for hard deletes to catch up to the expected token value.
* 4. Verifies that the two records that are deleted are zeroed out by hard deletes.
* 5. Makes 3 more puts, waits for notification.
* 6. Makes 3 deletes - 2 of records from the initial set of puts, and 1 from the new set.
* 7. Waits for hard deletes to catch up again to the expected token value.
* 8. Verifies that the three records that are deleted are zeroed out by hard deletes.
*
* @throws Exception
*/
@Test
public void endToEndTestHardDeletes() throws Exception {
DataNodeId dataNodeId = mockClusterMap.getDataNodeIds().get(0);
encryptionKey = new ArrayList<>(9);
usermetadata = new ArrayList<>(9);
data = new ArrayList<>(9);
Random random = new Random();
for (int i = 0; i < 9; i++) {
if (i % 2 == 0) {
encryptionKey.add(new byte[100]);
random.nextBytes(encryptionKey.get(i));
} else {
encryptionKey.add(null);
}
usermetadata.add(new byte[1000 + i]);
data.add(new byte[31870 + i]);
random.nextBytes(usermetadata.get(i));
random.nextBytes(data.get(i));
}
properties = new ArrayList<>(9);
properties.add(new BlobProperties(31870, "serviceid1", Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM), true));
properties.add(new BlobProperties(31871, "serviceid1", Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM), false));
properties.add(new BlobProperties(31872, "serviceid1", Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM), true));
properties.add(new BlobProperties(31873, "serviceid1", "ownerid", "jpeg", false, 0, Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM), false, null, null, null));
properties.add(new BlobProperties(31874, "serviceid1", Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM), true));
properties.add(new BlobProperties(31875, "serviceid1", "ownerid", "jpeg", false, 0, Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM), false, null, null, null));
properties.add(new BlobProperties(31876, "serviceid1", Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM), true));
properties.add(new BlobProperties(31877, "serviceid1", Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM), false));
properties.add(new BlobProperties(31878, "serviceid1", Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM), true));
List<PartitionId> partitionIds = mockClusterMap.getWritablePartitionIds(MockClusterMap.DEFAULT_PARTITION_CLASS);
PartitionId chosenPartition = partitionIds.get(0);
blobIdList = new ArrayList<>(9);
for (int i = 0; i < 9; i++) {
blobIdList.add(new BlobId(CommonTestUtils.getCurrentBlobIdVersion(), BlobId.BlobIdType.NATIVE, mockClusterMap.getLocalDatacenterId(), properties.get(i).getAccountId(), properties.get(i).getContainerId(), chosenPartition, false, BlobId.BlobDataType.DATACHUNK));
}
ConnectedChannel channel = ServerTestUtil.getBlockingChannelBasedOnPortType(new Port(dataNodeId.getPort(), PortType.PLAINTEXT), "localhost", null, null);
channel.connect();
for (int i = 0; i < 6; i++) {
// blob 3 and 5 are expired among these
putBlob(blobIdList.get(i), properties.get(i), encryptionKey.get(i), usermetadata.get(i), data.get(i), channel);
}
notificationSystem.awaitBlobCreations(blobIdList.get(0).getID());
notificationSystem.awaitBlobCreations(blobIdList.get(1).getID());
notificationSystem.awaitBlobCreations(blobIdList.get(2).getID());
notificationSystem.awaitBlobCreations(blobIdList.get(4).getID());
// delete blob 1
deleteBlob(blobIdList.get(1), channel);
zeroOutBlobContent(1);
// delete blob 4
deleteBlob(blobIdList.get(4), channel);
zeroOutBlobContent(4);
notificationSystem.awaitBlobDeletions(blobIdList.get(1).getID());
notificationSystem.awaitBlobDeletions(blobIdList.get(4).getID());
time.sleep(TimeUnit.DAYS.toMillis(7));
// For each future change to this offset, add to this variable and write an explanation of why the number changed.
// old value: 198728. Increased by 4 to 198732 because the format for delete record went from 2 to 3 which adds
// 4 bytes (two shorts) extra. The last record is a delete record so its extra 4 bytes are not (yet) added
//
// Add 14 here when changing message header version to 3, since the message header version went from 2 to 3 and adds
// a short to every record, which include 6 puts and 1 delete. (last delete is not included).
// old value is 198732 + 14. Increased by 48 when adding two fields(4 BYTE CRC for each field) in blobProperty when putBlob.
// There are 6 * (4 + 4). 6 stands for the times for putBlob, 4 stands for 4 extra blobProperty Bytes for each field.
int expectedTokenValueT1 = 198732 + 14 + 48;
ensureCleanupTokenCatchesUp(chosenPartition.getReplicaIds().get(0).getReplicaPath(), mockClusterMap, expectedTokenValueT1);
getAndVerify(channel, 6);
// put blob 6
putBlob(blobIdList.get(6), properties.get(6), encryptionKey.get(6), usermetadata.get(6), data.get(6), channel);
// put blob 7
putBlob(blobIdList.get(7), properties.get(7), encryptionKey.get(7), usermetadata.get(7), data.get(7), channel);
// put blob 8
putBlob(blobIdList.get(8), properties.get(8), encryptionKey.get(8), usermetadata.get(8), data.get(8), channel);
notificationSystem.awaitBlobCreations(blobIdList.get(6).getID());
notificationSystem.awaitBlobCreations(blobIdList.get(7).getID());
notificationSystem.awaitBlobCreations(blobIdList.get(8).getID());
// Do more deletes
// delete blob 3 that is expired.
deleteBlob(blobIdList.get(3), channel);
zeroOutBlobContent(3);
// delete blob 0, will undelete it later, so don't zero out the content
deleteBlob(blobIdList.get(0), channel);
// delete blob 6.
deleteBlob(blobIdList.get(6), channel);
zeroOutBlobContent(6);
notificationSystem.awaitBlobDeletions(blobIdList.get(0).getID());
notificationSystem.awaitBlobDeletions(blobIdList.get(6).getID());
undeleteBlob(blobIdList.get(0), channel);
notificationSystem.awaitBlobUndeletes(blobIdList.get(0).getID());
time.sleep(TimeUnit.DAYS.toMillis(1));
// For each future change to this offset, add to this variable and write an explanation of why the number changed.
int expectedTokenValueT2 = 298416 + 98 + 28 + 72;
// old value: 298400. Increased by 16 (4 * 4) to 298416 because the format for delete record went from 2 to 3 which
// adds 4 bytes (two shorts) extra. The last record is a delete record so its extra 4 bytes are not added
//
// old value 298416. Increased by 98. The end offset is now a journal-based offset, so the offset is not inclusive.
// It points to the last record in the journal. Before adding an undelete record, the last record in journal is the
// delete record for blob 6, now it's undelete for blob 0. Since a delete record is 98 bytes, so increase 98 bytes.
//
// old value is 298416 + 98. Increased by 28 when changing the message header version from 2 to 3, which adds a short
// to all the records, which includes 9 puts and 5 deletes and 1 undelete. Undelete is not include since it's the last
// record.
// old value is 298416 + 98 + 28. Increased by 72 when adding two fields(4 BYTE CRC for each field) in blobProperty when putBlob.
// There are 9 * (4 + 4). 9 stands for the times for putBlob, 4 stands for 4 extra blobProperty Bytes.
ensureCleanupTokenCatchesUp(chosenPartition.getReplicaIds().get(0).getReplicaPath(), mockClusterMap, expectedTokenValueT2);
getAndVerify(channel, 9);
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class CloudAndStoreReplicationTest method setup.
/**
* Create a cluster with one vcr node and two ambry server data nodes.
* @throws Exception on {@link Exception}
*/
@Before
public void setup() throws Exception {
String vcrMountPath = ClusterMapSnapshotConstants.CLOUD_REPLICA_MOUNT + "/1";
recoveryProperties = new Properties();
recoveryProperties.setProperty("replication.metadata.request.version", "2");
recoveryProperties.setProperty("replication.enabled.with.vcr.cluster", "true");
recoveryProperties.setProperty("clustermap.vcr.datacenter.name", cloudDc);
if (!vcrRecoveryPartitionConfig.isEmpty()) {
recoveryProperties.setProperty("vcr.recovery.partitions", vcrRecoveryPartitionConfig);
}
TestSSLUtils.addHttp2Properties(recoveryProperties, SSLFactory.Mode.SERVER, true);
// create vcr node
List<Port> vcrPortList = Arrays.asList(new Port(12310, PortType.PLAINTEXT), new Port(12410, PortType.SSL));
MockDataNodeId vcrNode = new MockDataNodeId("localhost", vcrPortList, Collections.singletonList(vcrMountPath), cloudDc);
// create ambry server recovery cluster
MockClusterMap serverClusterMap = new MockClusterMap(false, true, 2, 1, 1, true, false, null);
recoveryCluster = new MockCluster(serverClusterMap, Collections.singletonList(vcrNode), recoveryProperties);
partitionId = recoveryCluster.getClusterMap().getWritablePartitionIds(null).get(0);
allRecoveryNodes = serverClusterMap.getDataNodes();
// record ambry server node which will get partition leadership notification.
partitionLeaderRecoveryNode = allRecoveryNodes.get(0);
MockClusterAgentsFactory leaderMockClusterAgentsFactory = new MockClusterAgentsFactory(serverClusterMap, serverClusterMap.getAllPartitionIds(null).stream().map(PartitionId::toPathString).collect(Collectors.toList()));
// Start Helix Controller and ZK Server.
if (!zkInfo.isZkServerStarted()) {
zkInfo.startZkServer();
}
helixControllerManager = VcrTestUtil.populateZkInfoAndStartController(zkConnectString, vcrClusterName, recoveryCluster.getClusterMap());
Properties vcrProperties = VcrTestUtil.createVcrProperties(vcrNode.getDatacenterName(), vcrClusterName, zkConnectString, 12310, 12410, 12510, null);
vcrProperties.putAll(recoveryProperties);
MockNotificationSystem notificationSystem = new MockNotificationSystem(recoveryCluster.getClusterMap());
// Create blobs and data for upload to vcr.
int blobCount = 10;
cloudBlobIds = ServerTestUtil.createBlobIds(blobCount, recoveryCluster.getClusterMap(), accountId, containerId, partitionId);
serverBlobIds = ServerTestUtil.createBlobIds(blobCount, recoveryCluster.getClusterMap(), accountId, containerId, partitionId);
// Create cloud destination and start vcr server.
latchBasedInMemoryCloudDestination = new LatchBasedInMemoryCloudDestination(cloudBlobIds, recoveryCluster.getClusterMap());
CloudDestinationFactory cloudDestinationFactory = new LatchBasedInMemoryCloudDestinationFactory(latchBasedInMemoryCloudDestination);
vcrServer = VcrTestUtil.createVcrServer(new VerifiableProperties(vcrProperties), recoveryCluster.getClusterAgentsFactory(), notificationSystem, cloudDestinationFactory);
vcrServer.startup();
// initialize and start ambry servers
for (MockDataNodeId serverNode : allRecoveryNodes) {
AmbryServer server = recoveryCluster.initializeServer(serverNode, recoveryProperties, false, notificationSystem, SystemTime.getInstance(), serverNode.equals(partitionLeaderRecoveryNode) ? leaderMockClusterAgentsFactory : null);
recoveryCluster.addServer(server);
}
recoveryCluster.startServers();
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class CloudStorageCompactor method compactPartitions.
/**
* Purge the inactive blobs in all managed partitions.
* @return the total number of blobs purged.
*/
public int compactPartitions() {
if (partitions.isEmpty()) {
logger.info("Skipping compaction as no partitions are assigned.");
return 0;
}
logger.info("Starting compaction on {} assigned partitions", partitions.size());
List<PartitionId> partitionSnapshot = new ArrayList<>(partitions);
long compactionStartTime = System.currentTimeMillis();
long timeToQuit = System.currentTimeMillis() + compactionTimeLimitMs;
int compactionInProgress = 0;
doneLatch.set(new CountDownLatch(1));
int totalBlobsPurged = 0;
int compactedPartitionCount = 0;
try {
while (!partitionSnapshot.isEmpty()) {
while (compactionInProgress < numThreads) {
if (partitionSnapshot.isEmpty()) {
break;
}
PartitionId partitionId = partitionSnapshot.remove(0);
executorCompletionService.submit(() -> compactPartition(partitionId));
compactionInProgress++;
}
try {
totalBlobsPurged += executorCompletionService.take().get();
compactedPartitionCount++;
} catch (ExecutionException ex) {
vcrMetrics.compactionFailureCount.inc();
}
compactionInProgress--;
if (System.currentTimeMillis() >= timeToQuit) {
logger.info("Compaction terminated due to time limit exceeded.");
break;
}
if (isShutDown()) {
logger.info("Compaction terminated due to shut down.");
break;
}
}
while (compactionInProgress > 0) {
try {
totalBlobsPurged += executorCompletionService.take().get();
compactedPartitionCount++;
} catch (ExecutionException ex) {
vcrMetrics.compactionFailureCount.inc();
}
compactionInProgress--;
}
doneLatch.get().countDown();
} catch (Throwable th) {
logger.error("Hit exception running compaction task", th);
} finally {
long compactionTime = (System.currentTimeMillis() - compactionStartTime) / 1000;
logger.info("Purged {} blobs in {} partitions taking {} seconds", totalBlobsPurged, compactedPartitionCount, compactionTime);
}
return totalBlobsPurged;
}
Aggregations