use of com.github.ambry.utils.Time in project ambry by linkedin.
the class MockSelector method testConnectionReplenishment.
/**
* Test that connections get replenished in {@link SocketNetworkClient#sendAndPoll(List, Set, int)} to maintain the minimum
* number of active connections.
*/
@Test
public void testConnectionReplenishment() {
AtomicInteger nextCorrelationId = new AtomicInteger(1);
Function<Integer, List<RequestInfo>> requestGen = numRequests -> IntStream.range(0, numRequests).mapToObj(i -> new RequestInfo(sslHost, sslPort, new MockSend(nextCorrelationId.getAndIncrement()), replicaOnSslNode, null)).collect(Collectors.toList());
// 1 host x 1 port x 3 connections x 100%
int warmUpPercentage = 100;
AtomicInteger expectedConnectCalls = new AtomicInteger(warmUpPercentage * 3 / 100);
Runnable checkConnectCalls = () -> Assert.assertEquals(expectedConnectCalls.get(), selector.connectCallCount());
networkClient.warmUpConnections(Collections.singletonList(replicaOnSslNode.getDataNodeId()), warmUpPercentage, TIME_FOR_WARM_UP_MS, new ArrayList<>());
checkConnectCalls.run();
selector.setState(MockSelectorState.Good);
// 1. this sendAndPoll() should use one of the pre-warmed connections
List<ResponseInfo> responseInfoList = networkClient.sendAndPoll(requestGen.apply(3), Collections.emptySet(), POLL_TIMEOUT_MS);
checkConnectCalls.run();
Assert.assertEquals(3, responseInfoList.size());
responseInfoList.forEach(ResponseInfo::release);
// 2. this sendAndPoll() should disconnect two of the pre-warmed connections
selector.setState(MockSelectorState.DisconnectOnSend);
responseInfoList = networkClient.sendAndPoll(requestGen.apply(2), Collections.emptySet(), POLL_TIMEOUT_MS);
checkConnectCalls.run();
Assert.assertEquals(2, responseInfoList.size());
responseInfoList.forEach(ResponseInfo::release);
// 3. the two connections lost in the previous sendAndPoll should not be replenished yet since a second has not yet
// passed since startup
selector.setState(MockSelectorState.Good);
responseInfoList = networkClient.sendAndPoll(requestGen.apply(1), Collections.emptySet(), POLL_TIMEOUT_MS);
checkConnectCalls.run();
Assert.assertEquals(1, responseInfoList.size());
responseInfoList.forEach(ResponseInfo::release);
// 4. one of the connection lost in sendAndPoll 3 should be replenished
time.setCurrentMilliseconds(time.milliseconds() + Time.MsPerSec);
selector.setState(MockSelectorState.Good);
responseInfoList = networkClient.sendAndPoll(requestGen.apply(0), Collections.emptySet(), POLL_TIMEOUT_MS);
expectedConnectCalls.addAndGet(1);
checkConnectCalls.run();
Assert.assertEquals(0, responseInfoList.size());
// 5. no connections replenished this time since only half a second passed.
time.setCurrentMilliseconds(time.milliseconds() + 500);
selector.setState(MockSelectorState.Good);
responseInfoList = networkClient.sendAndPoll(requestGen.apply(0), Collections.emptySet(), POLL_TIMEOUT_MS);
checkConnectCalls.run();
Assert.assertEquals(0, responseInfoList.size());
// 6. the second connection lost in sendAndPoll 3 should be replenished
time.setCurrentMilliseconds(time.milliseconds() + 500);
selector.setState(MockSelectorState.Good);
responseInfoList = networkClient.sendAndPoll(requestGen.apply(0), Collections.emptySet(), POLL_TIMEOUT_MS);
expectedConnectCalls.addAndGet(1);
checkConnectCalls.run();
Assert.assertEquals(0, responseInfoList.size());
// 7. this call should use the existing connections in the pool
selector.setState(MockSelectorState.Good);
responseInfoList = networkClient.sendAndPoll(requestGen.apply(3), Collections.emptySet(), POLL_TIMEOUT_MS);
checkConnectCalls.run();
Assert.assertEquals(3, responseInfoList.size());
responseInfoList.forEach(ResponseInfo::release);
}
use of com.github.ambry.utils.Time in project ambry by linkedin.
the class CompactionPolicyFactoryTest method testCompactionPolicyFactory.
/**
* Tests {@link CompactionPolicyFactory}
* @throws Exception
*/
@Test
public void testCompactionPolicyFactory() throws Exception {
List<Pair<String, String>> validCompactionPolicyInfos = new ArrayList<>();
validCompactionPolicyInfos.add(new Pair<>("com.github.ambry.store.StatsBasedCompactionPolicyFactory", "com.github.ambry.store.StatsBasedCompactionPolicy"));
validCompactionPolicyInfos.add(new Pair<>("com.github.ambry.store.CompactAllPolicyFactory", "com.github.ambry.store.CompactAllPolicy"));
for (Pair<String, String> validCompactionPolicyInfo : validCompactionPolicyInfos) {
Properties properties = new Properties();
properties.setProperty("store.compaction.policy.factory", validCompactionPolicyInfo.getFirst());
StoreConfig config = new StoreConfig(new VerifiableProperties(properties));
Time time = new MockTime();
CompactionPolicyFactory compactionPolicyFactory = Utils.getObj(config.storeCompactionPolicyFactory, config, time);
Assert.assertEquals("Did not receive expected CompactionPolicy instance", validCompactionPolicyInfo.getFirst(), compactionPolicyFactory.getClass().getCanonicalName());
CompactionPolicy compactionPolicy = compactionPolicyFactory.getCompactionPolicy();
Assert.assertEquals("Did not receive expected CompactionPolicy instance", validCompactionPolicyInfo.getSecond(), compactionPolicy.getClass().getCanonicalName());
}
}
use of com.github.ambry.utils.Time in project ambry by linkedin.
the class ConsistencyCheckerTool method main.
public static void main(String[] args) throws Exception {
VerifiableProperties properties = ToolUtils.getVerifiableProperties(args);
ConsistencyCheckerToolConfig config = new ConsistencyCheckerToolConfig(properties);
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(properties);
ServerConfig serverConfig = new ServerConfig(properties);
try (ClusterMap clusterMap = new StaticClusterAgentsFactory(clusterMapConfig, config.hardwareLayoutFilePath, config.partitionLayoutFilePath).getClusterMap()) {
StoreToolsMetrics metrics = new StoreToolsMetrics(clusterMap.getMetricRegistry());
StoreConfig storeConfig = new StoreConfig(properties);
// this tool supports only blob IDs. It can become generic if StoreKeyFactory provides a deserFromString method.
BlobIdFactory blobIdFactory = new BlobIdFactory(clusterMap);
Set<StoreKey> filterKeySet = new HashSet<>();
for (String key : config.filterSet) {
filterKeySet.add(new BlobId(key, clusterMap));
}
Time time = SystemTime.getInstance();
Throttler throttler = new Throttler(config.indexEntriesToProcessPerSec, 1000, true, time);
StoreKeyConverterFactory storeKeyConverterFactory = Utils.getObj(serverConfig.serverStoreKeyConverterFactory, properties, clusterMap.getMetricRegistry());
ConsistencyCheckerTool consistencyCheckerTool = new ConsistencyCheckerTool(clusterMap, blobIdFactory, storeConfig, filterKeySet, throttler, metrics, time, storeKeyConverterFactory.getStoreKeyConverter());
boolean success = consistencyCheckerTool.checkConsistency(config.pathOfInput.listFiles(File::isDirectory)).getFirst();
System.exit(success ? 0 : 1);
}
}
use of com.github.ambry.utils.Time in project ambry by linkedin.
the class DumpCompactionLogTool method main.
public static void main(String[] args) throws Exception {
VerifiableProperties verifiableProperties = ToolUtils.getVerifiableProperties(args);
DumpCompactionLogConfig config = new DumpCompactionLogConfig(verifiableProperties);
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
try (ClusterMap clusterMap = ((ClusterAgentsFactory) Utils.getObj(clusterMapConfig.clusterMapClusterAgentsFactory, clusterMapConfig, config.hardwareLayoutFilePath, config.partitionLayoutFilePath)).getClusterMap()) {
File file = new File(config.compactionLogFilePath);
BlobIdFactory blobIdFactory = new BlobIdFactory(clusterMap);
StoreConfig storeConfig = new StoreConfig(verifiableProperties);
Time time = SystemTime.getInstance();
CompactionLog compactionLog = new CompactionLog(file, blobIdFactory, time, storeConfig);
System.out.println(compactionLog.toString());
}
}
use of com.github.ambry.utils.Time in project ambry by linkedin.
the class ReplicaThread method replicate.
/**
* Do replication for replicas grouped by {@link DataNodeId}
* A replication cycle between two replicas involves the following steps:
* 1. Exchange metadata : fetch the metadata of blobs added to remote replica since the last synchronization point
* and filter the ones missing in local store.
* 2. Fetch missing blobs: fetch the missing blobs by issuing GET request to remote replica and write them to
* the local store
*
* During cross-colo replication, depending on the {@link ReplicationModelType}, the missing blobs are either fetched
* from all remote replicas (if modelType == ALL_TO_ALL) or only fetched for local leader replicas from their remote
* leader replicas (if modelType == LEADER_BASED). In the latter case, non-leader replica pairs (leader <-> standby,
* standby <-> leader, standby <-> standby) will get their missing blobs from their corresponding leader<->leader
* exchanges and intra-dc replication.
*
* Here is a table listing on what is exchanged between local and remote replicas based on their roles
* (leader/standby) when {@link ReplicationModelType is LEADER_BASED}.
*
* | Local Leader | Local Standby | Remote Leader | Remote Standby
* -------------------------------------------------------------------------------------
* Leader: | --- | metadata and data | metadata and data | metadata only
* Standby: | metadata and data | metadata and data | metadata only | metadata only
*/
public void replicate() {
boolean allCaughtUp = true;
Map<DataNodeId, List<RemoteReplicaInfo>> dataNodeToRemoteReplicaInfo = getRemoteReplicaInfos();
logger.trace("Replicating from {} DataNodes.", replicasToReplicateGroupedByNode.size());
for (Map.Entry<DataNodeId, List<RemoteReplicaInfo>> entry : dataNodeToRemoteReplicaInfo.entrySet()) {
DataNodeId remoteNode = entry.getKey();
if (!running) {
break;
}
List<RemoteReplicaInfo> replicasToReplicatePerNode = entry.getValue();
Timer.Context context = null;
Timer.Context portTypeBasedContext = null;
if (replicatingFromRemoteColo) {
context = replicationMetrics.interColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
if (replicatingOverSsl) {
portTypeBasedContext = replicationMetrics.sslInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
} else {
portTypeBasedContext = replicationMetrics.plainTextInterColoReplicationLatency.get(remoteNode.getDatacenterName()).time();
}
} else {
context = replicationMetrics.intraColoReplicationLatency.time();
if (replicatingOverSsl) {
portTypeBasedContext = replicationMetrics.sslIntraColoReplicationLatency.time();
} else {
portTypeBasedContext = replicationMetrics.plainTextIntraColoReplicationLatency.time();
}
}
ConnectedChannel connectedChannel = null;
long checkoutConnectionTimeInMs = -1;
long exchangeMetadataTimeInMs = -1;
long fixMissingStoreKeysTimeInMs = -1;
long replicationStartTimeInMs = time.milliseconds();
long startTimeInMs = replicationStartTimeInMs;
// Get a list of active replicas that needs be included for this replication cycle
List<RemoteReplicaInfo> activeReplicasPerNode = new ArrayList<>();
List<RemoteReplicaInfo> standbyReplicasWithNoProgress = new ArrayList<>();
for (RemoteReplicaInfo remoteReplicaInfo : replicasToReplicatePerNode) {
ReplicaId replicaId = remoteReplicaInfo.getReplicaId();
boolean inBackoff = time.milliseconds() < remoteReplicaInfo.getReEnableReplicationTime();
if (replicaId.isDown() || inBackoff || remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE || replicationDisabledPartitions.contains(replicaId.getPartitionId())) {
logger.debug("Skipping replication on replica {} because one of following conditions is true: remote replica is down " + "= {}; in backoff = {}; local store is offline = {}; replication is disabled = {}.", replicaId.getPartitionId().toPathString(), replicaId.isDown(), inBackoff, remoteReplicaInfo.getLocalStore().getCurrentState() == ReplicaState.OFFLINE, replicationDisabledPartitions.contains(replicaId.getPartitionId()));
continue;
}
if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
// check if all missing keys for standby replicas from previous replication cycle are now obtained
// via leader replica. If we still have missing keys, don't include them in current replication cycle
// to avoid sending duplicate metadata requests since their token wouldn't have advanced.
processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
if (containsMissingKeysFromPreviousMetadataExchange(remoteReplicaInfo)) {
standbyReplicasWithNoProgress.add(remoteReplicaInfo);
continue;
}
}
activeReplicasPerNode.add(remoteReplicaInfo);
}
logger.trace("Replicating from {} RemoteReplicaInfos.", activeReplicasPerNode.size());
// use a variable to track current replica list to replicate (for logging purpose)
List<RemoteReplicaInfo> currentReplicaList = activeReplicasPerNode;
try {
if (activeReplicasPerNode.size() > 0) {
allCaughtUp = false;
// if maxReplicaCountPerRequest > 0, split remote replicas on same node into multiple lists; otherwise there is
// no limit.
List<List<RemoteReplicaInfo>> activeReplicaSubLists = maxReplicaCountPerRequest > 0 ? Utils.partitionList(activeReplicasPerNode, maxReplicaCountPerRequest) : Collections.singletonList(activeReplicasPerNode);
startTimeInMs = time.milliseconds();
connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), activeReplicasPerNode.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
// we checkout ConnectedChannel once and replicate remote replicas in batch via same ConnectedChannel
for (List<RemoteReplicaInfo> replicaSubList : activeReplicaSubLists) {
exchangeMetadataTimeInMs = -1;
fixMissingStoreKeysTimeInMs = -1;
currentReplicaList = replicaSubList;
logger.debug("Exchanging metadata with {} remote replicas on {}", currentReplicaList.size(), remoteNode);
startTimeInMs = time.milliseconds();
List<ExchangeMetadataResponse> exchangeMetadataResponseList = exchangeMetadata(connectedChannel, replicaSubList);
exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
// If leader based replication is enabled and we are replicating from remote colo, fetch the missing blobs
// only for local leader replicas from their corresponding peer leader replicas (Leader <-> Leader).
// Non-leader replica pairs (standby <-> leaders, leader <-> standby, standby <-> standby) will get their
// missing blobs from their leader pair exchanges and intra-dc replication.
List<RemoteReplicaInfo> leaderReplicaList = new ArrayList<>();
List<ExchangeMetadataResponse> exchangeMetadataResponseListForLeaderReplicas = new ArrayList<>();
getLeaderReplicaList(replicaSubList, exchangeMetadataResponseList, leaderReplicaList, exchangeMetadataResponseListForLeaderReplicas);
replicaSubList = leaderReplicaList;
exchangeMetadataResponseList = exchangeMetadataResponseListForLeaderReplicas;
}
if (replicaSubList.size() > 0) {
startTimeInMs = time.milliseconds();
fixMissingStoreKeys(connectedChannel, replicaSubList, exchangeMetadataResponseList, false);
fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
}
}
}
if (replicatingFromRemoteColo && leaderBasedReplicationAdmin != null) {
// Get a list of blocked standby replicas whose missing keys haven't arrived for long time.
// Use case: In leader-based cross colo replication, standby replicas don't send GET requests for missing keys
// found in metadata exchange and expect them to come via leader <-> leader replication.
// This is a safety condition to ensure that standby replicas are not stuck waiting for the keys to come from leader
// by fetching the missing keys themselves.
// TODO: As an improvement to this, we can first fetch missing blobs from local leader/other replicas in intra-dc first.
// TODO: If the result to fetch a blob from local dc is Blob_Not_Found, then we can fetch it from replicas in remote datacenter.
// This will involve co-ordination between replica threads containing replicas of same partition.
List<RemoteReplicaInfo> standbyReplicasTimedOutOnNoProgress = getRemoteStandbyReplicasTimedOutOnNoProgress(standbyReplicasWithNoProgress);
if (standbyReplicasTimedOutOnNoProgress.size() > 0) {
allCaughtUp = false;
currentReplicaList = standbyReplicasTimedOutOnNoProgress;
if (connectedChannel == null) {
checkoutConnectionTimeInMs = -1;
startTimeInMs = time.milliseconds();
connectedChannel = connectionPool.checkOutConnection(remoteNode.getHostname(), standbyReplicasTimedOutOnNoProgress.get(0).getPort(), replicationConfig.replicationConnectionPoolCheckoutTimeoutMs);
checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
}
List<ExchangeMetadataResponse> exchangeMetadataResponseListForBlockedReplicas = standbyReplicasTimedOutOnNoProgress.stream().map(remoteReplicaInfo -> new ExchangeMetadataResponse(remoteReplicaInfo.getExchangeMetadataResponse())).collect(Collectors.toList());
// Convert (and cache) the remote keys that are being fetched as the StoreKeyConverter would have cleared
// these keys from its cache while it is replicating with other replicas before time out happened for these standby replicas.
List<StoreKey> storeKeysToConvert = exchangeMetadataResponseListForBlockedReplicas.stream().map(ExchangeMetadataResponse::getMissingStoreKeys).flatMap(Collection::stream).collect(Collectors.toList());
convertStoreKeys(storeKeysToConvert);
exchangeMetadataTimeInMs = 0;
fixMissingStoreKeysTimeInMs = -1;
logger.debug("Sending GET request to fetch missing keys for standby remote replicas {} timed out on no progress", currentReplicaList);
startTimeInMs = time.milliseconds();
fixMissingStoreKeys(connectedChannel, standbyReplicasTimedOutOnNoProgress, exchangeMetadataResponseListForBlockedReplicas, true);
fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
}
}
} catch (Throwable e) {
if (checkoutConnectionTimeInMs == -1) {
// throwable happened in checkout connection phase
checkoutConnectionTimeInMs = time.milliseconds() - startTimeInMs;
responseHandler.onEvent(currentReplicaList.get(0).getReplicaId(), e);
} else if (exchangeMetadataTimeInMs == -1) {
// throwable happened in exchange metadata phase
exchangeMetadataTimeInMs = time.milliseconds() - startTimeInMs;
} else if (fixMissingStoreKeysTimeInMs == -1) {
// throwable happened in fix missing store phase
fixMissingStoreKeysTimeInMs = time.milliseconds() - startTimeInMs;
}
logger.error("Error while talking to peer: Remote node: {}, Thread name: {}, Remote replicas: {}, Current active " + "remote replica list: {}, Checkout connection time: {}, Exchange metadata time: {}, Fix missing " + "store key time {}", remoteNode, threadName, replicasToReplicatePerNode, currentReplicaList, checkoutConnectionTimeInMs, exchangeMetadataTimeInMs, fixMissingStoreKeysTimeInMs, e);
replicationMetrics.incrementReplicationErrors(replicatingOverSsl);
if (connectedChannel != null) {
connectionPool.destroyConnection(connectedChannel);
connectedChannel = null;
}
} finally {
long totalReplicationTime = time.milliseconds() - replicationStartTimeInMs;
replicationMetrics.updateTotalReplicationTime(totalReplicationTime, replicatingFromRemoteColo, replicatingOverSsl, datacenterName);
if (connectedChannel != null) {
connectionPool.checkInConnection(connectedChannel);
}
context.stop();
portTypeBasedContext.stop();
}
}
long sleepDurationMs = 0;
if (allCaughtUp && replicationConfig.replicationReplicaThreadIdleSleepDurationMs > 0) {
sleepDurationMs = replicationConfig.replicationReplicaThreadIdleSleepDurationMs;
idleCount.inc();
} else if (threadThrottleDurationMs > 0) {
sleepDurationMs = threadThrottleDurationMs;
throttleCount.inc();
}
if (sleepDurationMs > 0) {
try {
long currentTime = time.milliseconds();
time.sleep(sleepDurationMs);
logger.trace("Replica thread: {} slept for {} ms", threadName, time.milliseconds() - currentTime);
} catch (InterruptedException e) {
logger.error("Received interrupted exception during throttling", e);
}
}
}
Aggregations