use of com.github.ambry.store.Store in project ambry by linkedin.
the class ReplicationTest method replicaFromInactiveToOfflineTest.
/**
* Test INACTIVE -> OFFLINE transition on existing replica (both success and failure cases)
*/
@Test
public void replicaFromInactiveToOfflineTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
// 1. test replica not found case
try {
mockHelixParticipant.onPartitionBecomeOfflineFromInactive("-1");
fail("should fail because of invalid partition");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", ReplicaNotFound, e.getErrorCode());
}
// 2. test store not started case
PartitionId existingPartition = replicationManager.partitionToPartitionInfo.keySet().iterator().next();
storageManager.shutdownBlobStore(existingPartition);
try {
mockHelixParticipant.onPartitionBecomeOfflineFromInactive(existingPartition.toPathString());
fail("should fail because store is not started");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
}
storageManager.startBlobStore(existingPartition);
// before testing success case, let's write a blob (size = 100) into local store and add a delete record for new blob
Store localStore = storageManager.getStore(existingPartition);
MockId id = new MockId(TestUtils.getRandomString(10), Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM));
long crc = (new Random()).nextLong();
long blobSize = 100;
MessageInfo info = new MessageInfo(id, blobSize, false, false, Utils.Infinite_Time, crc, id.getAccountId(), id.getContainerId(), Utils.Infinite_Time);
List<MessageInfo> infos = new ArrayList<>();
List<ByteBuffer> buffers = new ArrayList<>();
ByteBuffer buffer = ByteBuffer.wrap(TestUtils.getRandomBytes((int) blobSize));
infos.add(info);
buffers.add(buffer);
localStore.put(new MockMessageWriteSet(infos, buffers));
// delete the blob
int deleteRecordSize = (int) (new DeleteMessageFormatInputStream(id, (short) 0, (short) 0, 0).getSize());
MessageInfo deleteInfo = new MessageInfo(id, deleteRecordSize, id.getAccountId(), id.getContainerId(), time.milliseconds());
localStore.delete(Collections.singletonList(deleteInfo));
int sizeOfPutAndHeader = 100 + 18;
int sizeOfWhole = sizeOfPutAndHeader + deleteRecordSize;
// note that end offset of last PUT = 100 + 18 = 118, end offset of the store is sizeOfWhole
// 3. test success case (create a new thread and trigger INACTIVE -> OFFLINE transition)
ReplicaId localReplica = storageManager.getReplica(existingPartition.toPathString());
// put a decommission-in-progress file into local store dir
File decommissionFile = new File(localReplica.getReplicaPath(), "decommission_in_progress");
assertTrue("Couldn't create decommission file in local store", decommissionFile.createNewFile());
decommissionFile.deleteOnExit();
assertNotSame("Before disconnection, the local store state shouldn't be OFFLINE", ReplicaState.OFFLINE, localStore.getCurrentState());
mockHelixParticipant.registerPartitionStateChangeListener(StateModelListenerType.ReplicationManagerListener, replicationManager.replicationListener);
CountDownLatch participantLatch = new CountDownLatch(1);
replicationManager.listenerExecutionLatch = new CountDownLatch(1);
Utils.newThread(() -> {
mockHelixParticipant.onPartitionBecomeOfflineFromInactive(existingPartition.toPathString());
participantLatch.countDown();
}, false).start();
assertTrue("Partition state change listener in ReplicationManager didn't get called within 1 sec", replicationManager.listenerExecutionLatch.await(1, TimeUnit.SECONDS));
// the state of local store should be updated to OFFLINE
assertEquals("Local store state is not expected", ReplicaState.OFFLINE, localStore.getCurrentState());
// update replication lag between local and peer replicas
List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(existingPartition).getRemoteReplicaInfos();
ReplicaId peerReplica1 = remoteReplicaInfos.get(0).getReplicaId();
ReplicaId peerReplica2 = remoteReplicaInfos.get(1).getReplicaId();
// peer1 catches up with last PUT, peer2 catches up with end offset of local store. In this case, SyncUp is not complete
replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica1.getDataNodeId().getHostname(), peerReplica1.getReplicaPath(), sizeOfPutAndHeader);
replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica2.getDataNodeId().getHostname(), peerReplica2.getReplicaPath(), sizeOfWhole);
assertFalse("Only one peer replica has fully caught up with end offset so sync-up should not complete", mockHelixParticipant.getReplicaSyncUpManager().isSyncUpComplete(localReplica));
// make peer1 catch up with end offset
replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica1.getDataNodeId().getHostname(), peerReplica1.getReplicaPath(), sizeOfWhole);
// Now, sync-up should complete and transition should be able to proceed.
assertTrue("Inactive-To-Offline transition didn't complete within 1 sec", participantLatch.await(1, TimeUnit.SECONDS));
assertFalse("Local store should be stopped after transition", localStore.isStarted());
storageManager.shutdown();
}
use of com.github.ambry.store.Store in project ambry by linkedin.
the class StatsManager method collectAndAggregateAccountStorageStats.
/**
* Fetch and aggregate account stats from a given {@link Store}
* @param hostStorageStatsMap map from partition id to container storage stats.
* @param partitionId specifies the {@link Store} to be fetched from
* @param unreachablePartitions a {@link List} containing partition Ids that were unable to successfully fetch from
*/
void collectAndAggregateAccountStorageStats(Map<Long, Map<Short, Map<Short, ContainerStorageStats>>> hostStorageStatsMap, PartitionId partitionId, List<PartitionId> unreachablePartitions) {
Store store = storageManager.getStore(partitionId, false);
if (store == null) {
unreachablePartitions.add(partitionId);
} else {
try {
long fetchAndAggregatePerStoreStartTimeMs = time.milliseconds();
StoreStats storeStats = store.getStoreStats();
Map<Short, Map<Short, ContainerStorageStats>> containerStatsMap = storeStats.getContainerStorageStats(time.milliseconds(), publishExcludeAccountIds);
hostStorageStatsMap.put(partitionId.getId(), containerStatsMap);
metrics.fetchAndAggregateTimePerStoreMs.update(time.milliseconds() - fetchAndAggregatePerStoreStartTimeMs);
// update delete tombstone stats
updateDeleteTombstoneStats(storeStats);
} catch (StoreException e) {
unreachablePartitions.add(partitionId);
}
}
}
use of com.github.ambry.store.Store in project ambry by linkedin.
the class StatsManager method collectAndAggregatePartitionClassStorageStats.
/**
* Fetch and aggregate partition class stats from a given {@link Store}
* @param hostPartitionClassStorageStatsMap map from partition class to all partition storage stats.
* @param partitionId specifies the {@link Store} to be fetched from
* @param unreachablePartitions a {@link List} containing partition Ids that were unable to successfully fetch from
*/
void collectAndAggregatePartitionClassStorageStats(Map<String, Map<Long, Map<Short, Map<Short, ContainerStorageStats>>>> hostPartitionClassStorageStatsMap, PartitionId partitionId, List<PartitionId> unreachablePartitions) {
Store store = storageManager.getStore(partitionId, false);
if (store == null) {
unreachablePartitions.add(partitionId);
} else {
try {
long fetchAndAggregatePerStoreStartTimeMs = time.milliseconds();
StoreStats storeStats = store.getStoreStats();
Map<Short, Map<Short, ContainerStorageStats>> containerStatsMap = storeStats.getContainerStorageStats(time.milliseconds(), publishExcludeAccountIds);
String partitionClassName = partitionId.getPartitionClass();
hostPartitionClassStorageStatsMap.computeIfAbsent(partitionClassName, k -> new HashMap<>()).put(partitionId.getId(), containerStatsMap);
metrics.fetchAndAggregateTimePerStoreMs.update(time.milliseconds() - fetchAndAggregatePerStoreStartTimeMs);
} catch (StoreException e) {
unreachablePartitions.add(partitionId);
}
}
}
use of com.github.ambry.store.Store in project ambry by linkedin.
the class AmbryServerRequests method handleRemoveStoreRequest.
/**
* Handles admin request that removes a BlobStore from current node
* @param partitionId the {@link PartitionId} associated with BlobStore
* @return {@link ServerErrorCode} represents result of handling admin request.
*/
private ServerErrorCode handleRemoveStoreRequest(PartitionId partitionId) throws StoreException, IOException {
ServerErrorCode errorCode = ServerErrorCode.No_Error;
ReplicaId replicaId = storeManager.getReplica(partitionId.toPathString());
if (replicaId == null) {
logger.error("{} doesn't exist on current node", partitionId);
return ServerErrorCode.Partition_Unknown;
}
// Attempt to remove replica from stats manager. If replica doesn't exist, log info but don't fail the request
statsManager.removeReplica(replicaId);
// Attempt to remove replica from replication manager. If replica doesn't exist, log info but don't fail the request
((ReplicationManager) replicationEngine).removeReplica(replicaId);
Store store = ((StorageManager) storeManager).getStore(partitionId, true);
// Attempt to remove store from storage manager.
if (storeManager.removeBlobStore(partitionId) && store != null) {
((BlobStore) store).deleteStoreFiles();
for (ReplicaStatusDelegate replicaStatusDelegate : ((BlobStore) store).getReplicaStatusDelegates()) {
// Remove store from sealed and stopped list (if present)
logger.info("Removing store from sealed and stopped list(if present)");
replicaStatusDelegate.unseal(replicaId);
replicaStatusDelegate.unmarkStopped(Collections.singletonList(replicaId));
}
} else {
errorCode = ServerErrorCode.Unknown_Error;
}
return errorCode;
}
use of com.github.ambry.store.Store in project ambry by linkedin.
the class AmbryServerRequestsTest method addBlobStoreFailureTest.
@Test
public void addBlobStoreFailureTest() throws Exception {
// create newPartition1 that no replica sits on current node.
List<MockDataNodeId> dataNodes = clusterMap.getDataNodes().stream().filter(node -> !node.getHostname().equals(dataNodeId.getHostname()) || node.getPort() != dataNodeId.getPort()).collect(Collectors.toList());
PartitionId newPartition1 = clusterMap.createNewPartition(dataNodes);
// test that getting new replica from cluster map fails
sendAndVerifyStoreControlRequest(newPartition1, BlobStoreControlAction.AddStore, (short) 0, ServerErrorCode.Replica_Unavailable);
// create newPartition2 that has one replica on current node
PartitionId newPartition2 = clusterMap.createNewPartition(clusterMap.getDataNodes());
// test that adding store into StorageManager fails
storageManager.returnValueOfAddBlobStore = false;
sendAndVerifyStoreControlRequest(newPartition2, BlobStoreControlAction.AddStore, (short) 0, ServerErrorCode.Unknown_Error);
storageManager.returnValueOfAddBlobStore = true;
// test that adding replica into ReplicationManager fails (we first add replica into ReplicationManager to trigger failure)
ReplicaId replicaToAdd = clusterMap.getBootstrapReplica(newPartition2.toPathString(), dataNodeId);
replicationManager.addReplica(replicaToAdd);
sendAndVerifyStoreControlRequest(newPartition2, BlobStoreControlAction.AddStore, (short) 0, ServerErrorCode.Unknown_Error);
assertTrue("Remove replica from replication manager should succeed.", replicationManager.removeReplica(replicaToAdd));
// test that adding replica into StatsManager fails
statsManager.returnValOfAddReplica = false;
sendAndVerifyStoreControlRequest(newPartition2, BlobStoreControlAction.AddStore, (short) 0, ServerErrorCode.Unknown_Error);
statsManager.returnValOfAddReplica = true;
}
Aggregations