use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class ReplicationTest method replicaFromInactiveToOfflineTest.
/**
* Test INACTIVE -> OFFLINE transition on existing replica (both success and failure cases)
*/
@Test
public void replicaFromInactiveToOfflineTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
// 1. test replica not found case
try {
mockHelixParticipant.onPartitionBecomeOfflineFromInactive("-1");
fail("should fail because of invalid partition");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", ReplicaNotFound, e.getErrorCode());
}
// 2. test store not started case
PartitionId existingPartition = replicationManager.partitionToPartitionInfo.keySet().iterator().next();
storageManager.shutdownBlobStore(existingPartition);
try {
mockHelixParticipant.onPartitionBecomeOfflineFromInactive(existingPartition.toPathString());
fail("should fail because store is not started");
} catch (StateTransitionException e) {
assertEquals("Error code doesn't match", StoreNotStarted, e.getErrorCode());
}
storageManager.startBlobStore(existingPartition);
// before testing success case, let's write a blob (size = 100) into local store and add a delete record for new blob
Store localStore = storageManager.getStore(existingPartition);
MockId id = new MockId(TestUtils.getRandomString(10), Utils.getRandomShort(TestUtils.RANDOM), Utils.getRandomShort(TestUtils.RANDOM));
long crc = (new Random()).nextLong();
long blobSize = 100;
MessageInfo info = new MessageInfo(id, blobSize, false, false, Utils.Infinite_Time, crc, id.getAccountId(), id.getContainerId(), Utils.Infinite_Time);
List<MessageInfo> infos = new ArrayList<>();
List<ByteBuffer> buffers = new ArrayList<>();
ByteBuffer buffer = ByteBuffer.wrap(TestUtils.getRandomBytes((int) blobSize));
infos.add(info);
buffers.add(buffer);
localStore.put(new MockMessageWriteSet(infos, buffers));
// delete the blob
int deleteRecordSize = (int) (new DeleteMessageFormatInputStream(id, (short) 0, (short) 0, 0).getSize());
MessageInfo deleteInfo = new MessageInfo(id, deleteRecordSize, id.getAccountId(), id.getContainerId(), time.milliseconds());
localStore.delete(Collections.singletonList(deleteInfo));
int sizeOfPutAndHeader = 100 + 18;
int sizeOfWhole = sizeOfPutAndHeader + deleteRecordSize;
// note that end offset of last PUT = 100 + 18 = 118, end offset of the store is sizeOfWhole
// 3. test success case (create a new thread and trigger INACTIVE -> OFFLINE transition)
ReplicaId localReplica = storageManager.getReplica(existingPartition.toPathString());
// put a decommission-in-progress file into local store dir
File decommissionFile = new File(localReplica.getReplicaPath(), "decommission_in_progress");
assertTrue("Couldn't create decommission file in local store", decommissionFile.createNewFile());
decommissionFile.deleteOnExit();
assertNotSame("Before disconnection, the local store state shouldn't be OFFLINE", ReplicaState.OFFLINE, localStore.getCurrentState());
mockHelixParticipant.registerPartitionStateChangeListener(StateModelListenerType.ReplicationManagerListener, replicationManager.replicationListener);
CountDownLatch participantLatch = new CountDownLatch(1);
replicationManager.listenerExecutionLatch = new CountDownLatch(1);
Utils.newThread(() -> {
mockHelixParticipant.onPartitionBecomeOfflineFromInactive(existingPartition.toPathString());
participantLatch.countDown();
}, false).start();
assertTrue("Partition state change listener in ReplicationManager didn't get called within 1 sec", replicationManager.listenerExecutionLatch.await(1, TimeUnit.SECONDS));
// the state of local store should be updated to OFFLINE
assertEquals("Local store state is not expected", ReplicaState.OFFLINE, localStore.getCurrentState());
// update replication lag between local and peer replicas
List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(existingPartition).getRemoteReplicaInfos();
ReplicaId peerReplica1 = remoteReplicaInfos.get(0).getReplicaId();
ReplicaId peerReplica2 = remoteReplicaInfos.get(1).getReplicaId();
// peer1 catches up with last PUT, peer2 catches up with end offset of local store. In this case, SyncUp is not complete
replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica1.getDataNodeId().getHostname(), peerReplica1.getReplicaPath(), sizeOfPutAndHeader);
replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica2.getDataNodeId().getHostname(), peerReplica2.getReplicaPath(), sizeOfWhole);
assertFalse("Only one peer replica has fully caught up with end offset so sync-up should not complete", mockHelixParticipant.getReplicaSyncUpManager().isSyncUpComplete(localReplica));
// make peer1 catch up with end offset
replicationManager.updateTotalBytesReadByRemoteReplica(existingPartition, peerReplica1.getDataNodeId().getHostname(), peerReplica1.getReplicaPath(), sizeOfWhole);
// Now, sync-up should complete and transition should be able to proceed.
assertTrue("Inactive-To-Offline transition didn't complete within 1 sec", participantLatch.await(1, TimeUnit.SECONDS));
assertFalse("Local store should be stopped after transition", localStore.isStarted());
storageManager.shutdown();
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class ReplicationTest method replicaFromStandbyToLeaderTest.
/**
* Test state transition in replication manager from STANDBY to LEADER
* Test setup: When creating partitions, make sure that there is exactly one replica in LEADER STATE on each data center
* Test condition: When a partition on current node moves from standby to leader, verify that in-memory map storing
* partition to peer leader replicas is updated correctly
* @throws Exception
*/
@Test
public void replicaFromStandbyToLeaderTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
ReplicationConfig initialReplicationConfig = replicationConfig;
properties.setProperty("replication.model.across.datacenters", "LEADER_BASED");
replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
List<ReplicaId> replicaIds = clusterMap.getReplicaIds(replicationManager.dataNodeId);
for (ReplicaId replicaId : replicaIds) {
MockReplicaId mockReplicaId = (MockReplicaId) replicaId;
if (mockReplicaId.getReplicaState() == ReplicaState.LEADER) {
PartitionId existingPartition = mockReplicaId.getPartitionId();
mockHelixParticipant.onPartitionBecomeLeaderFromStandby(existingPartition.toPathString());
Set<ReplicaId> peerLeaderReplicasInReplicationManager = replicationManager.leaderBasedReplicationAdmin.getLeaderPartitionToPeerLeaderReplicas().get(existingPartition.toPathString());
Set<ReplicaId> peerLeaderReplicasInClusterMap = new HashSet<>(existingPartition.getReplicaIdsByState(ReplicaState.LEADER, null));
peerLeaderReplicasInClusterMap.remove(mockReplicaId);
assertThat("Mismatch in list of leader peer replicas stored by partition in replication manager and cluster map", peerLeaderReplicasInReplicationManager, is(peerLeaderReplicasInClusterMap));
}
}
storageManager.shutdown();
replicationConfig = initialReplicationConfig;
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class ReplicationTestHelper method getLocalAndRemoteHosts.
// static methods to get local and remote hosts, add put/ttlupdate/delete/undelete messages to partitions
/**
* Selects a local and remote host for replication tests that need it.
* @param clusterMap the {@link MockClusterMap} to use.
* @return a {@link Pair} with the first entry being the "local" host and the second, the "remote" host.
*/
public static Pair<MockHost, MockHost> getLocalAndRemoteHosts(MockClusterMap clusterMap) {
// to make sure we select hosts with the SPECIAL_PARTITION_CLASS, pick hosts from the replicas of that partition
PartitionId specialPartitionId = clusterMap.getWritablePartitionIds(MockClusterMap.SPECIAL_PARTITION_CLASS).get(0);
// these hosts have replicas of the "special" partition and all the other partitions.
MockHost localHost = new MockHost(specialPartitionId.getReplicaIds().get(0).getDataNodeId(), clusterMap);
MockHost remoteHost = new MockHost(specialPartitionId.getReplicaIds().get(1).getDataNodeId(), clusterMap);
return new Pair<>(localHost, remoteHost);
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class ReplicationTestHelper method getRemoteReplicasAndReplicaThread.
/**
* Creates and gets the remote replicas that the local host will deal with and the {@link ReplicaThread} to perform
* replication with.
* @param batchSize the number of messages to be returned in each iteration of replication
* @param clusterMap the {@link ClusterMap} to use
* @param localHost the local {@link MockHost} (the one running the replica thread)
* @param remoteHost the remote {@link MockHost} (the target of replication)
* @param storeKeyConverter the {@link StoreKeyConverter} to be used in {@link ReplicaThread}
* @param transformer the {@link Transformer} to be used in {@link ReplicaThread}
* @param listener the {@link StoreEventListener} to use.
* @param replicaSyncUpManager the {@link ReplicaSyncUpManager} to help create replica thread
* @return a pair whose first element is the set of remote replicas and the second element is the {@link ReplicaThread}
*/
protected Pair<Map<DataNodeId, List<RemoteReplicaInfo>>, ReplicaThread> getRemoteReplicasAndReplicaThread(int batchSize, ClusterMap clusterMap, MockHost localHost, MockHost remoteHost, StoreKeyConverter storeKeyConverter, Transformer transformer, StoreEventListener listener, ReplicaSyncUpManager replicaSyncUpManager) throws ReflectiveOperationException {
ReplicationMetrics replicationMetrics = new ReplicationMetrics(new MetricRegistry(), clusterMap.getReplicaIds(localHost.dataNodeId));
replicationMetrics.populateSingleColoMetrics(remoteHost.dataNodeId.getDatacenterName());
List<RemoteReplicaInfo> remoteReplicaInfoList = localHost.getRemoteReplicaInfos(remoteHost, listener);
Map<DataNodeId, List<RemoteReplicaInfo>> replicasToReplicate = Collections.singletonMap(remoteHost.dataNodeId, remoteReplicaInfoList);
StoreKeyFactory storeKeyFactory = Utils.getObj("com.github.ambry.commons.BlobIdFactory", clusterMap);
Map<DataNodeId, MockHost> hosts = new HashMap<>();
hosts.put(remoteHost.dataNodeId, remoteHost);
MockConnectionPool connectionPool = new MockConnectionPool(hosts, clusterMap, batchSize);
ReplicaThread replicaThread = new ReplicaThread("threadtest", new MockFindTokenHelper(storeKeyFactory, replicationConfig), clusterMap, new AtomicInteger(0), localHost.dataNodeId, connectionPool, replicationConfig, replicationMetrics, null, storeKeyConverter, transformer, clusterMap.getMetricRegistry(), false, localHost.dataNodeId.getDatacenterName(), new ResponseHandler(clusterMap), time, replicaSyncUpManager, null, null);
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfoList) {
replicaThread.addRemoteReplicaInfo(remoteReplicaInfo);
}
for (PartitionId partitionId : clusterMap.getAllPartitionIds(null)) {
replicationMetrics.addLagMetricForPartition(partitionId, true);
}
return new Pair<>(replicasToReplicate, replicaThread);
}
use of com.github.ambry.clustermap.PartitionId in project ambry by linkedin.
the class CryptoJobMetricsTracker method initializeResourceToHistogramMap.
/**
* Initialize resource-to-latency-histogram maps based on given resource type. Here resource can be {@link PartitionId},
* {@link DataNodeId}, etc. The resource type is defined by {@link RouterConfig#routerOperationTrackerMetricScope}.
* @param clusterMap the {@link ClusterMap} that contains info of all resources.
* @param routerConfig the {@link RouterConfig} that specifies histogram parameters.
*/
private void initializeResourceToHistogramMap(ClusterMap clusterMap, RouterConfig routerConfig) {
String localDatacenterName = clusterMap.getDatacenterName(clusterMap.getLocalDatacenterId());
switch(routerConfig.routerOperationTrackerMetricScope) {
case Partition:
for (PartitionId partitionId : clusterMap.getAllPartitionIds(null)) {
getBlobLocalDcResourceToLatency.put(partitionId, createHistogram(routerConfig, false));
getBlobInfoLocalDcResourceToLatency.put(partitionId, createHistogram(routerConfig, false));
getBlobCrossDcResourceToLatency.put(partitionId, createHistogram(routerConfig, false));
getBlobInfoCrossDcResourceToLatency.put(partitionId, createHistogram(routerConfig, false));
putBlobResourceToLatency.put(partitionId, createHistogram(routerConfig, false));
}
break;
case DataNode:
List<? extends DataNodeId> dataNodeIds = clusterMap.getDataNodeIds();
for (DataNodeId dataNodeId : dataNodeIds) {
if (dataNodeId.getDatacenterName().equals(localDatacenterName)) {
getBlobLocalDcResourceToLatency.put(dataNodeId, createHistogram(routerConfig, false));
getBlobInfoLocalDcResourceToLatency.put(dataNodeId, createHistogram(routerConfig, false));
// Put blob only cares abou local db data nodes.
putBlobResourceToLatency.put(dataNodeId, createHistogram(routerConfig, false));
} else {
getBlobCrossDcResourceToLatency.put(dataNodeId, createHistogram(routerConfig, false));
getBlobInfoCrossDcResourceToLatency.put(dataNodeId, createHistogram(routerConfig, false));
}
}
break;
case Disk:
for (PartitionId partitionId : clusterMap.getAllPartitionIds(null)) {
for (ReplicaId replicaId : partitionId.getReplicaIds()) {
DiskId diskId = replicaId.getDiskId();
if (getBlobLocalDcResourceToLatency.containsKey(diskId) || getBlobCrossDcResourceToLatency.containsKey(diskId)) {
continue;
}
if (replicaId.getDataNodeId().getDatacenterName().equals(localDatacenterName)) {
getBlobLocalDcResourceToLatency.put(diskId, createHistogram(routerConfig, false));
getBlobInfoLocalDcResourceToLatency.put(diskId, createHistogram(routerConfig, false));
putBlobResourceToLatency.put(diskId, createHistogram(routerConfig, false));
} else {
getBlobCrossDcResourceToLatency.put(diskId, createHistogram(routerConfig, false));
getBlobInfoCrossDcResourceToLatency.put(diskId, createHistogram(routerConfig, false));
}
}
}
default:
}
}
Aggregations