use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class ReplicationEngine method updatePartitionInfoMaps.
/**
* Update {@link PartitionInfo} related maps including {@link ReplicationEngine#partitionToPartitionInfo} and
* {@link ReplicationEngine#mountPathToPartitionInfos}
* @param remoteReplicaInfos the {@link RemoteReplicaInfo}(s) of the local {@link ReplicaId}
* @param replicaId the local replica
*/
protected void updatePartitionInfoMaps(List<RemoteReplicaInfo> remoteReplicaInfos, ReplicaId replicaId) {
PartitionId partition = replicaId.getPartitionId();
PartitionInfo partitionInfo = new PartitionInfo(remoteReplicaInfos, partition, storeManager.getStore(partition), replicaId);
partitionToPartitionInfo.put(partition, partitionInfo);
mountPathToPartitionInfos.computeIfAbsent(replicaId.getMountPath(), key -> ConcurrentHashMap.newKeySet()).add(partitionInfo);
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class CloudToStoreReplicationManager method addCloudReplica.
/**
* Add a replica of given partition and its {@link RemoteReplicaInfo}s to backup list.
* @param partitionName name of the partition of the replica to add.
* @throws ReplicationException if replicas initialization failed.
*/
private void addCloudReplica(String partitionName) throws ReplicationException {
// Adding cloud replica occurs when replica becomes leader from standby. Hence, if this a new added replica, it
// should be present in storage manager already.
ReplicaId localReplica = storeManager.getReplica(partitionName);
if (localReplica == null) {
logger.warn("Got partition leader notification for partition {} that is not present on the node", partitionName);
return;
}
PartitionId partitionId = localReplica.getPartitionId();
Store store = storeManager.getStore(partitionId);
if (store == null) {
logger.warn("Unable to add cloud replica for partition {} as store for the partition is not present or started.", partitionName);
return;
}
DataNodeId cloudDataNode = getCloudDataNode();
CloudReplica peerCloudReplica = new CloudReplica(partitionId, cloudDataNode);
FindTokenFactory findTokenFactory = tokenHelper.getFindTokenFactoryFromReplicaType(peerCloudReplica.getReplicaType());
RemoteReplicaInfo remoteReplicaInfo = new RemoteReplicaInfo(peerCloudReplica, localReplica, store, findTokenFactory.getNewFindToken(), storeConfig.storeDataFlushIntervalSeconds * SystemTime.MsPerSec * Replication_Delay_Multiplier, SystemTime.getInstance(), peerCloudReplica.getDataNodeId().getPortToConnectTo());
replicationMetrics.addMetricsForRemoteReplicaInfo(remoteReplicaInfo, trackPerDatacenterLagInMetric);
// Note that for each replica on a Ambry server node, there is only one cloud replica that it will be replicating from.
List<RemoteReplicaInfo> remoteReplicaInfos = Collections.singletonList(remoteReplicaInfo);
PartitionInfo partitionInfo = new PartitionInfo(remoteReplicaInfos, partitionId, store, localReplica);
partitionToPartitionInfo.put(partitionId, partitionInfo);
mountPathToPartitionInfos.computeIfAbsent(localReplica.getMountPath(), key -> ConcurrentHashMap.newKeySet()).add(partitionInfo);
logger.info("Cloud Partition {} added to {}. CloudNode {} port {}", partitionName, dataNodeId, cloudDataNode, cloudDataNode.getPortToConnectTo());
// Reload replication token if exist.
reloadReplicationTokenIfExists(localReplica, remoteReplicaInfos);
// Add remoteReplicaInfos to {@link ReplicaThread}.
addRemoteReplicaInfoToReplicaThread(remoteReplicaInfos, true);
if (replicationConfig.replicationTrackPerPartitionLagFromRemote) {
replicationMetrics.addLagMetricForPartition(partitionId, true);
}
replicationMetrics.addCatchUpPointMetricForPartition(partitionId);
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class ReplicaThread method containsMissingKeysFromPreviousMetadataExchange.
/**
* Checks if the input remote replica and its corresponding local replica are not leaders of their partitions and
* contains any messages from its previous metadata exchange still missing in local store, i.e. they haven't arrived
* from leader via intra-dc replication.
* @param remoteReplicaInfo remote replica information
* @return true if missing messages in previous metadata exchange are not yet received
*/
boolean containsMissingKeysFromPreviousMetadataExchange(RemoteReplicaInfo remoteReplicaInfo) {
ReplicaId localReplica = remoteReplicaInfo.getLocalReplicaId();
ReplicaId remoteReplica = remoteReplicaInfo.getReplicaId();
ExchangeMetadataResponse exchangeMetadataResponse = remoteReplicaInfo.getExchangeMetadataResponse();
return !leaderBasedReplicationAdmin.isLeaderPair(localReplica, remoteReplica) && !exchangeMetadataResponse.isEmpty();
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class ReplicationTest method replicaFromOfflineToBootstrapTest.
/**
* Test that state transition in replication manager from OFFLINE to BOOTSTRAP
* @throws Exception
*/
@Test
public void replicaFromOfflineToBootstrapTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
MockHelixParticipant.metricRegistry = new MetricRegistry();
MockHelixParticipant mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
DataNodeId currentNode = clusterMap.getDataNodeIds().get(0);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant);
StorageManager storageManager = managers.getFirst();
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
assertTrue("State change listener in cluster participant should contain replication manager listener", mockHelixParticipant.getPartitionStateChangeListeners().containsKey(StateModelListenerType.ReplicationManagerListener));
// 1. test partition not found case (should throw exception)
try {
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline("-1");
fail("should fail because replica is not found");
} catch (StateTransitionException e) {
assertEquals("Transition error doesn't match", ReplicaNotFound, e.getErrorCode());
}
// 2. create a new partition and test replica addition success case
ReplicaId newReplicaToAdd = getNewReplicaToAdd(clusterMap);
PartitionId newPartition = newReplicaToAdd.getPartitionId();
assertTrue("Adding new replica to Storage Manager should succeed", storageManager.addBlobStore(newReplicaToAdd));
assertFalse("partitionToPartitionInfo should not contain new partition", replicationManager.partitionToPartitionInfo.containsKey(newPartition));
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
assertTrue("partitionToPartitionInfo should contain new partition", replicationManager.partitionToPartitionInfo.containsKey(newPartition));
// 3. test replica addition failure case
replicationManager.partitionToPartitionInfo.remove(newPartition);
replicationManager.addReplicaReturnVal = false;
try {
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(newPartition.toPathString());
fail("should fail due to replica addition failure");
} catch (StateTransitionException e) {
assertEquals("Transition error doesn't match", ReplicaOperationFailure, e.getErrorCode());
}
replicationManager.addReplicaReturnVal = null;
// 4. test OFFLINE -> BOOTSTRAP on existing replica (should be no-op)
ReplicaId existingReplica = clusterMap.getReplicaIds(currentNode).get(0);
assertTrue("partitionToPartitionInfo should contain existing partition", replicationManager.partitionToPartitionInfo.containsKey(existingReplica.getPartitionId()));
mockHelixParticipant.onPartitionBecomeBootstrapFromOffline(existingReplica.getPartitionId().toPathString());
storageManager.shutdown();
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class ReplicationTest method dcLevelReplicationLagMetricsTest.
/**
* Test that metrics that track remote replicas lag behind local replicas in each dc.
* @throws Exception
*/
@Test
public void dcLevelReplicationLagMetricsTest() throws Exception {
MockClusterMap clusterMap = new MockClusterMap();
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, null);
MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
Set<String> remoteDcNames = new HashSet<>(Arrays.asList("DC1", "DC2", "DC3"));
String localDcName = clusterMap.getDataNodeIds().get(0).getDatacenterName();
remoteDcNames.remove(localDcName);
// before updating replication lag, make sure avg lag in each dc is 0
MetricRegistry metricRegistry = replicationManager.getMetricRegistry();
String prefix = ReplicaThread.class.getName() + ".";
String avgMetricSuffix = "-avgReplicaLagFromLocalInBytes";
assertEquals("Average replication lag in local dc is not expected", 18.0, metricRegistry.getGauges().get(prefix + localDcName + avgMetricSuffix).getValue());
for (String remoteDc : remoteDcNames) {
assertEquals("Average replication lag in remote dc is not expected", 18.0, metricRegistry.getGauges().get(prefix + remoteDc + avgMetricSuffix).getValue());
}
// iterate over all partitions on current node and make sure all their peer replicas in local dc have fully caught up
for (Map.Entry<PartitionId, PartitionInfo> entry : replicationManager.partitionToPartitionInfo.entrySet()) {
PartitionId localPartition = entry.getKey();
PartitionInfo partitionInfo = entry.getValue();
List<RemoteReplicaInfo> remoteReplicaInfos = partitionInfo.getRemoteReplicaInfos().stream().filter(info -> info.getReplicaId().getDataNodeId().getDatacenterName().equals(localDcName)).collect(Collectors.toList());
for (RemoteReplicaInfo remoteReplicaInfoInLocalDc : remoteReplicaInfos) {
ReplicaId peerReplicaInLocalDc = remoteReplicaInfoInLocalDc.getReplicaId();
replicationManager.updateTotalBytesReadByRemoteReplica(localPartition, peerReplicaInLocalDc.getDataNodeId().getHostname(), peerReplicaInLocalDc.getReplicaPath(), 18);
}
}
// verify that after updating replication lag for all peer replicas in local dc, the avg lag in local dc has updated
assertEquals("Average replication lag in local dc is not expected", 0.0, metricRegistry.getGauges().get(prefix + localDcName + avgMetricSuffix).getValue());
// for remote dc, the avg lag is still 18.0
for (String remoteDc : remoteDcNames) {
assertEquals("Average replication lag in remote dc is not expected", 18.0, metricRegistry.getGauges().get(prefix + remoteDc + avgMetricSuffix).getValue());
}
}
Aggregations