use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class MockSelector method testBasicSendAndPoll.
/**
* tests basic request sending, polling and receiving responses correctly associated with the requests.
*/
@Test
public void testBasicSendAndPoll() {
DataNodeId dataNodeId = localPlainTextDataNodes.get(0);
ReplicaId replicaId = sslDisabledClusterMap.getReplicaIds(dataNodeId).get(0);
List<RequestInfo> requestInfoList = new ArrayList<>();
List<ResponseInfo> responseInfoList;
requestInfoList.add(new RequestInfo(dataNodeId.getHostname(), dataNodeId.getPortToConnectTo(), new MockSend(1), replicaId, null));
requestInfoList.add(new RequestInfo(dataNodeId.getHostname(), dataNodeId.getPortToConnectTo(), new MockSend(2), replicaId, null));
int requestCount = requestInfoList.size();
int responseCount = 0;
do {
responseInfoList = networkClient.sendAndPoll(requestInfoList, Collections.emptySet(), POLL_TIMEOUT_MS);
requestInfoList.clear();
for (ResponseInfo responseInfo : responseInfoList) {
MockSend send = (MockSend) responseInfo.getRequestInfo().getRequest();
NetworkClientErrorCode error = responseInfo.getError();
ByteBuf response = responseInfo.content();
Assert.assertNull("Should not have encountered an error", error);
Assert.assertNotNull("Should receive a valid response", response);
int correlationIdInRequest = send.getCorrelationId();
int correlationIdInResponse = response.readInt();
Assert.assertEquals("Received response for the wrong request", correlationIdInRequest, correlationIdInResponse);
responseCount++;
responseInfo.release();
}
} while (requestCount > responseCount);
Assert.assertEquals("Should receive only as many responses as there were requests", requestCount, responseCount);
responseInfoList = networkClient.sendAndPoll(requestInfoList, Collections.emptySet(), POLL_TIMEOUT_MS);
responseInfoList.forEach(ResponseInfo::release);
requestInfoList.clear();
Assert.assertEquals("No responses are expected at this time", 0, responseInfoList.size());
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class ReplicationMetrics method updateLagMetricForRemoteReplica.
/**
* Update the lag between local and {@link RemoteReplicaInfo}. The lag indicates how far local replica is behind remote
* peer replica.
* @param remoteReplicaInfo the remote replica
* @param lag the new lag
*/
public void updateLagMetricForRemoteReplica(RemoteReplicaInfo remoteReplicaInfo, long lag) {
ReplicaId replicaId = remoteReplicaInfo.getReplicaId();
// update the partition's lag if and only if it was tracked.
partitionLags.computeIfPresent(replicaId.getPartitionId(), (k, v) -> {
v.put(replicaId.getDataNodeId(), lag);
return v;
});
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class ReplicationMetrics method populateInvalidMessageMetricForReplicas.
private void populateInvalidMessageMetricForReplicas(List<? extends ReplicaId> replicaIds) {
for (ReplicaId replicaId : replicaIds) {
PartitionId partitionId = replicaId.getPartitionId();
if (!partitionIdToInvalidMessageStreamErrorCounter.containsKey(partitionId)) {
Counter partitionBasedCorruptionErrorCount = registry.counter(MetricRegistry.name(ReplicaThread.class, partitionId + "-CorruptionErrorCount"));
partitionIdToInvalidMessageStreamErrorCounter.put(partitionId, partitionBasedCorruptionErrorCount);
}
}
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class ReplicaThread method getLeaderReplicaList.
/**
* Get list of remote replica infos whose local replica is a leader of the partition of this data center and
* remote replica is a leader of the partition of remote data center. This list is used for leader-based cross colo
* replication to exchange missing blobs between only leader replicas. For non-leader replica pairs (leader <->
* standby, standby <-> leader, standby <-> standby), we will wait the missing blobs to come from their leader interactions.
* @param remoteReplicaInfos list of all remote replicas
* @param exchangeMetadataResponseList list of metadata responses received from the remote replicas
* @param leaderReplicaInfosOutput output list of leader replicas. It will populated in this method.
* @param exchangeMetadataResponseListForLeaderReplicaInfosOutput output list of metadata responses received for the leader
* replicas. It will be populated in this method.
* @throws IllegalArgumentException
*/
void getLeaderReplicaList(List<RemoteReplicaInfo> remoteReplicaInfos, List<ExchangeMetadataResponse> exchangeMetadataResponseList, List<RemoteReplicaInfo> leaderReplicaInfosOutput, List<ExchangeMetadataResponse> exchangeMetadataResponseListForLeaderReplicaInfosOutput) throws IllegalArgumentException {
if (exchangeMetadataResponseList.size() != remoteReplicaInfos.size()) {
throw new IllegalArgumentException("ExchangeMetadataResponseList size " + exchangeMetadataResponseList.size() + " and replicasToReplicatePerNode size " + remoteReplicaInfos.size() + " should be the same");
}
for (int i = 0; i < remoteReplicaInfos.size(); i++) {
RemoteReplicaInfo remoteReplicaInfo = remoteReplicaInfos.get(i);
ReplicaId localReplica = remoteReplicaInfo.getLocalReplicaId();
ReplicaId remoteReplica = remoteReplicaInfo.getReplicaId();
// Check if local replica and remote replica are leaders for their partition.
if (leaderBasedReplicationAdmin.isLeaderPair(localReplica, remoteReplica)) {
leaderReplicaInfosOutput.add(remoteReplicaInfo);
exchangeMetadataResponseListForLeaderReplicaInfosOutput.add(exchangeMetadataResponseList.get(i));
}
}
}
use of com.github.ambry.clustermap.ReplicaId in project ambry by linkedin.
the class ReplicaThread method getRemoteStandbyReplicasTimedOutOnNoProgress.
/**
* Returns list of remote replica infos whose missing blobs in their metadata response haven't arrived within
* time = replicationConfig.replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds.
* @param remoteReplicaInfos list of remote replica infos
* @return list of remote replica infos which have timed out due to no progress
*/
List<RemoteReplicaInfo> getRemoteStandbyReplicasTimedOutOnNoProgress(List<RemoteReplicaInfo> remoteReplicaInfos) {
// Use case: In leader-based cross colo replication, non-leader replica pairs don't fetch blobs for missing keys
// found in metadata exchange and expect them to come from leader<->leader replication and intra-dc replication.
// However, if for any reason, some of their missing blobs never arrive via local leader, this is a safety feature
// for standbys to fetch the blobs themselves in order to avoid being stuck.
// Example scenario: For DELETE after PUT use case in remote data center, it is possible that standby replicas get
// only PUT record in its replication cycle (DELETE record will come in next cycle) while leader gets both
// PUT and DELETE together in its replication cycle. Due to that, deleted blob is not fetched by leader and is not
// replicated from leader to standby. As a result, the corresponding PUT record in standby's missing blobs set is
// never received.
// Time out period is configurable via replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds. If
// replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds == -1, this safety feature is disabled.
List<RemoteReplicaInfo> remoteReplicasTimedOut = new ArrayList<>();
if (replicationConfig.replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds != -1) {
for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfos) {
ReplicaId localReplica = remoteReplicaInfo.getLocalReplicaId();
ReplicaId remoteReplica = remoteReplicaInfo.getReplicaId();
ExchangeMetadataResponse exchangeMetadataResponse = remoteReplicaInfo.getExchangeMetadataResponse();
if (!leaderBasedReplicationAdmin.isLeaderPair(localReplica, remoteReplica) && exchangeMetadataResponse.hasMissingStoreMessages() && (time.seconds() - exchangeMetadataResponse.lastMissingMessageReceivedTimeSec) > replicationConfig.replicationStandbyWaitTimeoutToTriggerCrossColoFetchSeconds) {
remoteReplicasTimedOut.add(remoteReplicaInfo);
}
}
}
return remoteReplicasTimedOut;
}
Aggregations