use of com.github.ambry.clustermap.CloudDataNode in project ambry by linkedin.
the class CloudToStoreReplicationManager method handleChangeInVcrNodes.
/**
* When there is a change in vcr nodes state, update the new list of live vcr nodes.
* Also if there are nodes that are removed as part of change, then replication from
* those nodes should stop and the partitions should find new nodes to replicate from.
* Note that this method is not thread safe in the wake of arriving helix notifications.
* @param newVcrNodes Set of new vcr nodes.
*/
private void handleChangeInVcrNodes(Set<CloudDataNode> newVcrNodes) {
Set<CloudDataNode> removedNodes = new HashSet<>(vcrNodes.get());
removedNodes.removeAll(newVcrNodes);
vcrNodes.set(new ArrayList<>(newVcrNodes));
logger.info("Handling VCR nodes change. The removed vcr nodes: {}", removedNodes);
List<PartitionId> partitionsOnRemovedNodes = getPartitionsOnNodes(removedNodes);
for (PartitionId partitionId : partitionsOnRemovedNodes) {
try {
// We first remove replica to stop replication from removed node, and then add replica so that it can pick a
// new cloud node to start replicating from.
removeCloudReplica(partitionId.toPathString());
addCloudReplica(partitionId.toPathString());
} catch (ReplicationException rex) {
replicationMetrics.addCloudPartitionErrorCount.inc();
logger.error("Exception {} during remove/add replica for partitionId {}", rex, partitionId);
}
}
}
use of com.github.ambry.clustermap.CloudDataNode in project ambry by linkedin.
the class ReplicaThread method getMessagesForMissingKeys.
/**
* Gets the messages for the keys that are missing from the local store by issuing a {@link GetRequest} to the remote
* node, if there are any missing keys. If there are no missing keys to be fetched, then no request is issued and a
* null response is returned.
* @param connectedChannel The connection channel to the remote node
* @param exchangeMetadataResponseList The list of metadata response from the remote node
* @param replicasToReplicatePerNode The list of remote replicas for the remote node
* @param remoteNode The remote node from which replication needs to happen
* @param remoteColoGetRequestForStandby boolean which indicates if we are getting missing keys for standby or
* non-leader replica pairs during leader-based replication.
* @return The response that contains the missing messages; or null if no request was issued because there were no
* keys missing.
* @throws ReplicationException
* @throws IOException
*/
private GetResponse getMessagesForMissingKeys(ConnectedChannel connectedChannel, List<ExchangeMetadataResponse> exchangeMetadataResponseList, List<RemoteReplicaInfo> replicasToReplicatePerNode, DataNodeId remoteNode, boolean remoteColoGetRequestForStandby) throws ReplicationException, IOException {
List<PartitionRequestInfo> partitionRequestInfoList = new ArrayList<PartitionRequestInfo>();
for (int i = 0; i < exchangeMetadataResponseList.size(); i++) {
ExchangeMetadataResponse exchangeMetadataResponse = exchangeMetadataResponseList.get(i);
RemoteReplicaInfo remoteReplicaInfo = replicasToReplicatePerNode.get(i);
if (exchangeMetadataResponse.serverErrorCode == ServerErrorCode.No_Error) {
Set<StoreKey> missingStoreKeys = exchangeMetadataResponse.getMissingStoreKeys();
if (missingStoreKeys.size() > 0) {
if (remoteNode instanceof CloudDataNode) {
logger.trace("Replicating blobs from CloudDataNode: {}", missingStoreKeys);
}
ArrayList<BlobId> keysToFetch = new ArrayList<BlobId>();
for (StoreKey storeKey : missingStoreKeys) {
keysToFetch.add((BlobId) storeKey);
}
PartitionRequestInfo partitionRequestInfo = new PartitionRequestInfo(remoteReplicaInfo.getReplicaId().getPartitionId(), keysToFetch);
partitionRequestInfoList.add(partitionRequestInfo);
}
}
}
GetResponse getResponse = null;
if (!partitionRequestInfoList.isEmpty()) {
GetRequest getRequest = new GetRequest(correlationIdGenerator.incrementAndGet(), GetRequest.Replication_Client_Id_Prefix + dataNodeId.getHostname() + "[" + dataNodeId.getDatacenterName() + "]", MessageFormatFlags.All, partitionRequestInfoList, replicationConfig.replicationIncludeAll ? GetOption.Include_All : GetOption.None);
long startTime = time.milliseconds();
try {
ChannelOutput channelOutput = connectedChannel.sendAndReceive(getRequest);
getResponse = GetResponse.readFrom(channelOutput.getInputStream(), clusterMap);
long getRequestTime = time.milliseconds() - startTime;
replicationMetrics.updateGetRequestTime(getRequestTime, replicatingFromRemoteColo, replicatingOverSsl, datacenterName, remoteColoGetRequestForStandby);
if (getResponse.getError() != ServerErrorCode.No_Error) {
logger.error("Remote node: {} Thread name: {} Remote replicas: {} GetResponse from replication: {}", remoteNode, threadName, replicasToReplicatePerNode, getResponse.getError());
throw new ReplicationException(" Get Request returned error when trying to get missing keys " + getResponse.getError());
}
} catch (IOException e) {
responseHandler.onEvent(replicasToReplicatePerNode.get(0).getReplicaId(), e);
throw e;
}
}
return getResponse;
}
use of com.github.ambry.clustermap.CloudDataNode in project ambry by linkedin.
the class CloudRouterFactory method getRequestHandlerPool.
/**
* Utility method to build a {@link RequestHandlerPool}.
* @param verifiableProperties the properties to use.
* @param clusterMap the {@link ClusterMap} to use.
* @return the constructed {@link RequestHandlerPool}.
* @throws Exception if the construction fails.
*/
public RequestHandlerPool getRequestHandlerPool(VerifiableProperties verifiableProperties, ClusterMap clusterMap, CloudDestination cloudDestination, CloudConfig cloudConfig) throws Exception {
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(verifiableProperties);
MetricRegistry registry = clusterMap.getMetricRegistry();
DataNodeId nodeId = new CloudDataNode(cloudConfig, clusterMapConfig);
VcrMetrics vcrMetrics = new VcrMetrics(registry);
StoreManager cloudStorageManager = new CloudStorageManager(verifiableProperties, vcrMetrics, cloudDestination, clusterMap);
LocalRequestResponseChannel channel = new LocalRequestResponseChannel();
ServerMetrics serverMetrics = new ServerMetrics(registry, AmbryRequests.class);
StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
StoreKeyConverterFactory storeKeyConverterFactory = Utils.getObj(routerConfig.routerStoreKeyConverterFactory, verifiableProperties, registry);
// A null notification system is passed into AmbryRequests so that replication events are not emitted from a
// frontend.
AmbryRequests requests = new AmbryRequests(cloudStorageManager, channel, clusterMap, nodeId, registry, serverMetrics, null, null, null, storeKeyFactory, storeKeyConverterFactory);
return new RequestHandlerPool(routerConfig.routerRequestHandlerNumOfThreads, channel, requests);
}
use of com.github.ambry.clustermap.CloudDataNode in project ambry by linkedin.
the class CloudBlobStoreTest method testPutWithTtl.
/**
* Test PUT(with TTL) and TtlUpdate record replication.
* Replication may happen after PUT and after TtlUpdate, or after TtlUpdate only.
* PUT may already expired, expiration time < upload threshold or expiration time >= upload threshold.
* @throws Exception
*/
@Test
public void testPutWithTtl() throws Exception {
// Set up remote host
MockClusterMap clusterMap = new MockClusterMap();
MockHost remoteHost = getLocalAndRemoteHosts(clusterMap).getSecond();
List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
PartitionId partitionId = partitionIds.get(0);
StoreKeyFactory storeKeyFactory = new BlobIdFactory(clusterMap);
MockStoreKeyConverterFactory storeKeyConverterFactory = new MockStoreKeyConverterFactory(null, null);
storeKeyConverterFactory.setConversionMap(new HashMap<>());
storeKeyConverterFactory.setReturnInputIfAbsent(true);
MockStoreKeyConverterFactory.MockStoreKeyConverter storeKeyConverter = storeKeyConverterFactory.getStoreKeyConverter();
Transformer transformer = new BlobIdTransformer(storeKeyFactory, storeKeyConverter);
Map<DataNodeId, MockHost> hosts = new HashMap<>();
hosts.put(remoteHost.dataNodeId, remoteHost);
MockConnectionPool connectionPool = new MockConnectionPool(hosts, clusterMap, 4);
// Generate BlobIds for following PUT.
short blobIdVersion = CommonTestUtils.getCurrentBlobIdVersion();
short accountId = Utils.getRandomShort(TestUtils.RANDOM);
short containerId = Utils.getRandomShort(TestUtils.RANDOM);
boolean toEncrypt = TestUtils.RANDOM.nextBoolean();
List<BlobId> blobIdList = new ArrayList<>();
for (int i = 0; i < 6; i++) {
blobIdList.add(new BlobId(blobIdVersion, BlobId.BlobIdType.NATIVE, ClusterMap.UNKNOWN_DATACENTER_ID, accountId, containerId, partitionId, toEncrypt, BlobId.BlobDataType.DATACHUNK));
}
// Set up VCR
Properties props = new Properties();
setBasicProperties(props);
props.setProperty("clustermap.port", "12300");
props.setProperty("vcr.ssl.port", "12345");
ReplicationConfig replicationConfig = new ReplicationConfig(new VerifiableProperties(props));
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
CloudConfig cloudConfig = new CloudConfig(new VerifiableProperties(props));
CloudDataNode cloudDataNode = new CloudDataNode(cloudConfig, clusterMapConfig);
LatchBasedInMemoryCloudDestination latchBasedInMemoryCloudDestination = new LatchBasedInMemoryCloudDestination(blobIdList, clusterMap);
CloudReplica cloudReplica = new CloudReplica(partitionId, cloudDataNode);
CloudBlobStore cloudBlobStore = new CloudBlobStore(new VerifiableProperties(props), partitionId, latchBasedInMemoryCloudDestination, clusterMap, new VcrMetrics(new MetricRegistry()));
cloudBlobStore.start();
// Create ReplicaThread and add RemoteReplicaInfo to it.
ReplicationMetrics replicationMetrics = new ReplicationMetrics(new MetricRegistry(), Collections.emptyList());
ReplicaThread replicaThread = new ReplicaThread("threadtest", new MockFindTokenHelper(storeKeyFactory, replicationConfig), clusterMap, new AtomicInteger(0), cloudDataNode, connectionPool, replicationConfig, replicationMetrics, null, storeKeyConverter, transformer, clusterMap.getMetricRegistry(), false, cloudDataNode.getDatacenterName(), new ResponseHandler(clusterMap), new MockTime(), null, null, null);
for (ReplicaId replica : partitionId.getReplicaIds()) {
if (replica.getDataNodeId() == remoteHost.dataNodeId) {
RemoteReplicaInfo remoteReplicaInfo = new RemoteReplicaInfo(replica, cloudReplica, cloudBlobStore, new MockFindToken(0, 0), Long.MAX_VALUE, SystemTime.getInstance(), new Port(remoteHost.dataNodeId.getPort(), PortType.PLAINTEXT));
replicaThread.addRemoteReplicaInfo(remoteReplicaInfo);
break;
}
}
long referenceTime = System.currentTimeMillis();
// Case 1: Put already expired. Replication happens after Put and after TtlUpdate.
// Upload to Cloud only after replicating ttlUpdate.
BlobId id = blobIdList.get(0);
addPutMessagesToReplicasOfPartition(id, accountId, containerId, partitionId, Collections.singletonList(remoteHost), referenceTime - 2000, referenceTime - 1000);
replicaThread.replicate();
assertFalse("Blob should not exist.", latchBasedInMemoryCloudDestination.doesBlobExist(id));
addTtlUpdateMessagesToReplicasOfPartition(partitionId, id, Collections.singletonList(remoteHost), Utils.Infinite_Time);
replicaThread.replicate();
assertTrue("Blob should exist.", latchBasedInMemoryCloudDestination.doesBlobExist(id));
// Case 2: Put already expired. Replication happens after TtlUpdate.
// Upload to Cloud only after replicating ttlUpdate.
id = blobIdList.get(1);
addPutMessagesToReplicasOfPartition(id, accountId, containerId, partitionId, Collections.singletonList(remoteHost), referenceTime - 2000, referenceTime - 1000);
addTtlUpdateMessagesToReplicasOfPartition(partitionId, id, Collections.singletonList(remoteHost), Utils.Infinite_Time);
replicaThread.replicate();
assertTrue("Blob should exist.", latchBasedInMemoryCloudDestination.doesBlobExist(id));
// Case 3: Put TTL less than cloudConfig.vcrMinTtlDays. Replication happens after Put and after TtlUpdate.
// Upload to Cloud only after replicating ttlUpdate.
id = blobIdList.get(2);
addPutMessagesToReplicasOfPartition(id, accountId, containerId, partitionId, Collections.singletonList(remoteHost), referenceTime, referenceTime + TimeUnit.DAYS.toMillis(cloudConfig.vcrMinTtlDays) - 1);
replicaThread.replicate();
if (isVcr) {
assertFalse("Blob should not exist (vcr).", latchBasedInMemoryCloudDestination.doesBlobExist(id));
} else {
assertTrue("Blob should exist (not vcr).", latchBasedInMemoryCloudDestination.doesBlobExist(id));
}
addTtlUpdateMessagesToReplicasOfPartition(partitionId, id, Collections.singletonList(remoteHost), Utils.Infinite_Time);
replicaThread.replicate();
assertTrue("Blob should exist.", latchBasedInMemoryCloudDestination.doesBlobExist(id));
// Case 4: Put TTL less than cloudConfig.vcrMinTtlDays. Replication happens after TtlUpdate.
// Upload to Cloud only after replicating ttlUpdate.
id = blobIdList.get(3);
addPutMessagesToReplicasOfPartition(id, accountId, containerId, partitionId, Collections.singletonList(remoteHost), referenceTime, referenceTime + TimeUnit.DAYS.toMillis(cloudConfig.vcrMinTtlDays) - 1);
addTtlUpdateMessagesToReplicasOfPartition(partitionId, id, Collections.singletonList(remoteHost), Utils.Infinite_Time);
replicaThread.replicate();
assertTrue("Blob should exist.", latchBasedInMemoryCloudDestination.doesBlobExist(id));
// Case 5: Put TTL greater than or equals to cloudConfig.vcrMinTtlDays. Replication happens after Put and after TtlUpdate.
// Upload to Cloud after Put and update ttl after TtlUpdate.
id = blobIdList.get(4);
addPutMessagesToReplicasOfPartition(id, accountId, containerId, partitionId, Collections.singletonList(remoteHost), referenceTime, referenceTime + TimeUnit.DAYS.toMillis(cloudConfig.vcrMinTtlDays));
replicaThread.replicate();
assertTrue(latchBasedInMemoryCloudDestination.doesBlobExist(id));
addTtlUpdateMessagesToReplicasOfPartition(partitionId, id, Collections.singletonList(remoteHost), Utils.Infinite_Time);
replicaThread.replicate();
assertTrue("Blob should exist.", latchBasedInMemoryCloudDestination.doesBlobExist(id));
// Case 6: Put TTL greater than or equals to cloudConfig.vcrMinTtlDays. Replication happens after TtlUpdate.
// Upload to Cloud after TtlUpdate.
id = blobIdList.get(5);
addPutMessagesToReplicasOfPartition(id, accountId, containerId, partitionId, Collections.singletonList(remoteHost), referenceTime, referenceTime + TimeUnit.DAYS.toMillis(cloudConfig.vcrMinTtlDays));
addTtlUpdateMessagesToReplicasOfPartition(partitionId, id, Collections.singletonList(remoteHost), Utils.Infinite_Time);
replicaThread.replicate();
assertTrue("Blob should exist.", latchBasedInMemoryCloudDestination.doesBlobExist(id));
// Verify expiration time of all blobs.
Map<String, CloudBlobMetadata> map = latchBasedInMemoryCloudDestination.getBlobMetadata(blobIdList);
for (BlobId blobId : blobIdList) {
assertEquals("Blob ttl should be infinite now.", Utils.Infinite_Time, map.get(blobId.toString()).getExpirationTime());
}
}
use of com.github.ambry.clustermap.CloudDataNode in project ambry by linkedin.
the class CloudTokenPersistorTest method basicTest.
@Test
public void basicTest() throws Exception {
Properties props = VcrTestUtil.createVcrProperties("DC1", "vcrClusterName", "zkConnectString", 12310, 12410, 12510, null);
props.setProperty("replication.cloud.token.factory", replicationCloudTokenFactory);
CloudConfig cloudConfig = new CloudConfig(new VerifiableProperties(props));
ClusterMapConfig clusterMapConfig = new ClusterMapConfig(new VerifiableProperties(props));
ClusterMap clusterMap = new MockClusterMap();
DataNodeId dataNodeId = new CloudDataNode(cloudConfig, clusterMapConfig);
Map<String, Set<PartitionInfo>> mountPathToPartitionInfoList = new HashMap<>();
BlobIdFactory blobIdFactory = new BlobIdFactory(clusterMap);
StoreFindTokenFactory factory = new StoreFindTokenFactory(blobIdFactory);
PartitionId partitionId = clusterMap.getAllPartitionIds(null).get(0);
ReplicaId cloudReplicaId = new CloudReplica(partitionId, dataNodeId);
List<? extends ReplicaId> peerReplicas = cloudReplicaId.getPeerReplicaIds();
List<RemoteReplicaInfo> remoteReplicas = new ArrayList<RemoteReplicaInfo>();
List<RemoteReplicaInfo.ReplicaTokenInfo> replicaTokenInfos = new ArrayList<>();
for (ReplicaId remoteReplica : peerReplicas) {
RemoteReplicaInfo remoteReplicaInfo = new RemoteReplicaInfo(remoteReplica, cloudReplicaId, null, factory.getNewFindToken(), 10, SystemTime.getInstance(), remoteReplica.getDataNodeId().getPortToConnectTo());
remoteReplicas.add(remoteReplicaInfo);
replicaTokenInfos.add(new RemoteReplicaInfo.ReplicaTokenInfo(remoteReplicaInfo));
}
PartitionInfo partitionInfo = new PartitionInfo(remoteReplicas, partitionId, null, cloudReplicaId);
mountPathToPartitionInfoList.computeIfAbsent(cloudReplicaId.getMountPath(), key -> ConcurrentHashMap.newKeySet()).add(partitionInfo);
LatchBasedInMemoryCloudDestination cloudDestination = new LatchBasedInMemoryCloudDestination(Collections.emptyList(), AzureCloudDestinationFactory.getReplicationFeedType(new VerifiableProperties(props)), clusterMap);
ReplicationConfig replicationConfig = new ReplicationConfig(new VerifiableProperties(props));
CloudTokenPersistor cloudTokenPersistor = new CloudTokenPersistor("replicaTokens", mountPathToPartitionInfoList, new ReplicationMetrics(new MetricRegistry(), Collections.emptyList()), clusterMap, new FindTokenHelper(blobIdFactory, replicationConfig), cloudDestination);
cloudTokenPersistor.persist(cloudReplicaId.getMountPath(), replicaTokenInfos);
List<RemoteReplicaInfo.ReplicaTokenInfo> retrievedReplicaTokenInfos = cloudTokenPersistor.retrieve(cloudReplicaId.getMountPath());
Assert.assertEquals("Number of tokens doesn't match.", replicaTokenInfos.size(), retrievedReplicaTokenInfos.size());
for (int i = 0; i < replicaTokenInfos.size(); i++) {
Assert.assertArrayEquals("Token is not correct.", replicaTokenInfos.get(i).getReplicaToken().toBytes(), retrievedReplicaTokenInfos.get(i).getReplicaToken().toBytes());
}
}
Aggregations