Search in sources :

Example 16 with DataNodeId

use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.

the class ReplicaThread method applyDelete.

/**
 * Applies a delete to the blob described by {@code messageInfo}.
 * @param messageInfo the {@link MessageInfo} that will be transformed into a delete
 * @param remoteReplicaInfo The remote replica that is being replicated from
 * @throws StoreException
 */
private void applyDelete(MessageInfo messageInfo, RemoteReplicaInfo remoteReplicaInfo) throws StoreException {
    DataNodeId remoteNode = remoteReplicaInfo.getReplicaId().getDataNodeId();
    try {
        messageInfo = new MessageInfo.Builder(messageInfo).isDeleted(true).isUndeleted(false).build();
        remoteReplicaInfo.getLocalStore().delete(Collections.singletonList(messageInfo));
        logger.trace("Remote node: {} Thread name: {} Remote replica: {} Key delete: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), messageInfo.getStoreKey());
    } catch (StoreException e) {
        // The blob may be deleted or updated which is alright
        if (e.getErrorCode() == StoreErrorCodes.ID_Deleted || e.getErrorCode() == StoreErrorCodes.Life_Version_Conflict) {
            logger.trace("Remote node: {} Thread name: {} Remote replica: {} Key {}: {}", remoteNode, threadName, remoteReplicaInfo.getReplicaId(), messageInfo.getStoreKey(), e.getErrorCode().name());
        } else {
            throw e;
        }
    }
    // as long as the Delete is guaranteed to have taken effect locally.
    if (notification != null) {
        notification.onBlobReplicaDeleted(dataNodeId.getHostname(), dataNodeId.getPort(), messageInfo.getStoreKey().getID(), BlobReplicaSourceType.REPAIRED);
    }
}
Also used : DataNodeId(com.github.ambry.clustermap.DataNodeId) StoreException(com.github.ambry.store.StoreException)

Example 17 with DataNodeId

use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.

the class ReplicationEngine method addRemoteReplicaInfoToReplicaThread.

/**
 * Assign {@link RemoteReplicaInfo} to a {@link ReplicaThread} for replication.
 * The assignment is based on {@link DataNodeId}. If no {@link ReplicaThread} responsible for a {@link DataNodeId},
 * a {@link ReplicaThread} will be selected by {@link ReplicationEngine#getReplicaThreadIndexToUse(String)}.
 * Create threads pool for a DC if not exists.
 * @param remoteReplicaInfos List of {@link RemoteReplicaInfo} to add.
 * @param startThread if threads need to be started when create.
 */
protected void addRemoteReplicaInfoToReplicaThread(List<RemoteReplicaInfo> remoteReplicaInfos, boolean startThread) {
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfos) {
        DataNodeId dataNodeIdToReplicate = remoteReplicaInfo.getReplicaId().getDataNodeId();
        String datacenter = dataNodeIdToReplicate.getDatacenterName();
        List<ReplicaThread> replicaThreads = getOrCreateThreadPoolIfNecessary(datacenter, startThread);
        if (replicaThreads == null) {
            logger.warn("Number of replica threads is smaller or equal to 0, not starting any replica threads for {}.", datacenter);
            continue;
        }
        ReplicaThread replicaThread = dataNodeIdToReplicaThread.computeIfAbsent(dataNodeIdToReplicate, key -> replicaThreads.get(getReplicaThreadIndexToUse(datacenter)));
        replicaThread.addRemoteReplicaInfo(remoteReplicaInfo);
        remoteReplicaInfo.setReplicaThread(replicaThread);
    }
}
Also used : DataNodeId(com.github.ambry.clustermap.DataNodeId)

Example 18 with DataNodeId

use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.

the class DiskTokenPersistorTest method setup.

/**
 * Create the one time setup for the tests.
 * @throws Exception if Exception happens during setup.
 */
@BeforeClass
public static void setup() throws Exception {
    clusterMap = new MockClusterMap();
    mountPathToPartitionInfoList = new HashMap<>();
    mountPathToReplicaTokenInfos = new HashMap<>();
    BlobIdFactory blobIdFactory = new BlobIdFactory(clusterMap);
    StoreFindTokenFactory factory = new StoreFindTokenFactory(blobIdFactory);
    DataNodeId dataNodeId = clusterMap.getDataNodeIds().get(0);
    List<? extends ReplicaId> localReplicas = clusterMap.getReplicaIds(dataNodeId);
    replicaId = localReplicas.get(0);
    for (ReplicaId replicaId : localReplicas) {
        List<? extends ReplicaId> peerReplicas = replicaId.getPeerReplicaIds();
        List<RemoteReplicaInfo> remoteReplicas = new ArrayList<>();
        for (ReplicaId remoteReplica : peerReplicas) {
            RemoteReplicaInfo remoteReplicaInfo = new RemoteReplicaInfo(remoteReplica, replicaId, null, factory.getNewFindToken(), 10, SystemTime.getInstance(), remoteReplica.getDataNodeId().getPortToConnectTo());
            remoteReplicas.add(remoteReplicaInfo);
            mountPathToReplicaTokenInfos.computeIfAbsent(replicaId.getMountPath(), k -> new ArrayList<>()).add(new RemoteReplicaInfo.ReplicaTokenInfo(remoteReplicaInfo));
        }
        PartitionInfo partitionInfo = new PartitionInfo(remoteReplicas, replicaId.getPartitionId(), null, replicaId);
        mountPathToPartitionInfoList.computeIfAbsent(replicaId.getMountPath(), key -> ConcurrentHashMap.newKeySet()).add(partitionInfo);
    }
    Properties replicationProperties = new Properties();
    replicationProperties.setProperty("replication.cloud.token.factory", MockFindTokenFactory.class.getName());
    ReplicationConfig replicationConfig = new ReplicationConfig(new VerifiableProperties(replicationProperties));
    findTokenHelper = new FindTokenHelper(blobIdFactory, replicationConfig);
    mockStoreManager = Mockito.mock(StoreManager.class);
    Mockito.when(mockStoreManager.checkLocalPartitionStatus(any(), any())).thenReturn(ServerErrorCode.No_Error);
}
Also used : StoreManager(com.github.ambry.server.StoreManager) ArgumentMatchers(org.mockito.ArgumentMatchers) BeforeClass(org.junit.BeforeClass) ServerErrorCode(com.github.ambry.server.ServerErrorCode) CrcOutputStream(com.github.ambry.utils.CrcOutputStream) DataNodeId(com.github.ambry.clustermap.DataNodeId) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DataOutputStream(java.io.DataOutputStream) Map(java.util.Map) SystemTime(com.github.ambry.utils.SystemTime) ReplicationConfig(com.github.ambry.config.ReplicationConfig) MetricRegistry(com.codahale.metrics.MetricRegistry) Properties(java.util.Properties) Iterator(java.util.Iterator) VerifiableProperties(com.github.ambry.config.VerifiableProperties) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) FileOutputStream(java.io.FileOutputStream) Set(java.util.Set) ClusterMap(com.github.ambry.clustermap.ClusterMap) Utils(com.github.ambry.utils.Utils) IOException(java.io.IOException) Test(org.junit.Test) BlobIdFactory(com.github.ambry.commons.BlobIdFactory) File(java.io.File) Mockito(org.mockito.Mockito) List(java.util.List) ReplicaId(com.github.ambry.clustermap.ReplicaId) StoreFindTokenFactory(com.github.ambry.store.StoreFindTokenFactory) Assert(org.junit.Assert) Collections(java.util.Collections) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) StoreFindTokenFactory(com.github.ambry.store.StoreFindTokenFactory) ReplicationConfig(com.github.ambry.config.ReplicationConfig) VerifiableProperties(com.github.ambry.config.VerifiableProperties) ArrayList(java.util.ArrayList) Properties(java.util.Properties) VerifiableProperties(com.github.ambry.config.VerifiableProperties) ReplicaId(com.github.ambry.clustermap.ReplicaId) BlobIdFactory(com.github.ambry.commons.BlobIdFactory) StoreManager(com.github.ambry.server.StoreManager) DataNodeId(com.github.ambry.clustermap.DataNodeId) MockClusterMap(com.github.ambry.clustermap.MockClusterMap) BeforeClass(org.junit.BeforeClass)

Example 19 with DataNodeId

use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.

the class LeaderBasedReplicationTest method setUp.

public void setUp() throws IOException {
    properties.setProperty("replication.model.across.datacenters", "LEADER_BASED");
    replicationConfig = new ReplicationConfig(new VerifiableProperties(properties));
    clusterMap = new MockClusterMap();
    clusterMapConfig = new ClusterMapConfig(verifiableProperties);
    MockHelixParticipant.metricRegistry = new MetricRegistry();
    mockHelixParticipant = new MockHelixParticipant(clusterMapConfig);
    /*
      Setup:
      we have 3 nodes that have replicas belonging to same partitions:
      a) localNode (local node that hosts partitions)
      b) remoteNodeInLocalDC (remote node in local data center that shares the partitions)
      c) remoteNodeInRemoteDC (remote node in remote data center that shares the partitions)

      Each node have few of its partitions as leaders and others are standby. They are randomly assigned during creation
      of replicas for mock partitions.
     */
    DataNodeId localNode = clusterMap.getDataNodeIds().get(0);
    List<DataNodeId> remoteNodes = getRemoteNodesFromLocalAndRemoteDCs(clusterMap, localNode);
    remoteNodeInLocalDC = remoteNodes.get(0);
    remoteNodeInRemoteDC = remoteNodes.get(1);
    // mock hosts for remote nodes
    localHost = new MockHost(localNode, clusterMap);
    remoteHostInLocalDC = new MockHost(remoteNodeInLocalDC, clusterMap);
    remoteHostInRemoteDC = new MockHost(remoteNodeInRemoteDC, clusterMap);
}
Also used : ReplicationConfig(com.github.ambry.config.ReplicationConfig) MockHelixParticipant(com.github.ambry.clustermap.MockHelixParticipant) VerifiableProperties(com.github.ambry.config.VerifiableProperties) MetricRegistry(com.codahale.metrics.MetricRegistry) DataNodeId(com.github.ambry.clustermap.DataNodeId) ClusterMapConfig(com.github.ambry.config.ClusterMapConfig) MockClusterMap(com.github.ambry.clustermap.MockClusterMap)

Example 20 with DataNodeId

use of com.github.ambry.clustermap.DataNodeId in project ambry by linkedin.

the class LeaderBasedReplicationTest method replicaThreadLeaderBasedReplicationTokenCatchUpForStandbyToLeaderTest.

/**
 * Test leader based replication to verify remote token is caught up for standby replicas and updated token is used
 * when their state transitions to leader.
 * @throws Exception
 */
@Test
public void replicaThreadLeaderBasedReplicationTokenCatchUpForStandbyToLeaderTest() throws Exception {
    /*
      Setup:
      we have 3 nodes that have replicas belonging to same partitions:
      a) localNode (local node that hosts partitions)
      b) remoteNodeInLocalDC (remote node in local data center that shares the partitions)
      c) remoteNodeInRemoteDC (remote node in remote data center that shares the partitions)

      Each node have few of its partitions as leaders and others are standby. They are randomly assigned during creation
      of replicas for mock partitions.
     */
    Map<DataNodeId, MockHost> hosts = new HashMap<>();
    hosts.put(remoteNodeInLocalDC, remoteHostInLocalDC);
    hosts.put(remoteNodeInRemoteDC, remoteHostInRemoteDC);
    int batchSize = 5;
    ConnectionPool mockConnectionPool = new MockConnectionPool(hosts, clusterMap, batchSize);
    Pair<StorageManager, ReplicationManager> managers = createStorageManagerAndReplicationManager(clusterMap, clusterMapConfig, mockHelixParticipant, mockConnectionPool);
    StorageManager storageManager = managers.getFirst();
    MockReplicationManager replicationManager = (MockReplicationManager) managers.getSecond();
    // set mock local stores on all remoteReplicaInfos which will used during replication.
    for (PartitionId partitionId : replicationManager.partitionToPartitionInfo.keySet()) {
        localHost.addStore(partitionId, null);
        Store localStore = localHost.getStore(partitionId);
        localStore.start();
        List<RemoteReplicaInfo> remoteReplicaInfos = replicationManager.partitionToPartitionInfo.get(partitionId).getRemoteReplicaInfos();
        remoteReplicaInfos.forEach(remoteReplicaInfo -> remoteReplicaInfo.setLocalStore(localStore));
    }
    // get remote replicas and replica thread for remote host on local datacenter
    ReplicaThread intraColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInLocalDC);
    List<RemoteReplicaInfo> remoteReplicaInfosForLocalDC = intraColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInLocalDC);
    // get remote replicas and replica thread for remote host on remote datacenter
    ReplicaThread crossColoReplicaThread = replicationManager.dataNodeIdToReplicaThread.get(remoteNodeInRemoteDC);
    List<RemoteReplicaInfo> remoteReplicaInfosForRemoteDC = crossColoReplicaThread.getRemoteReplicaInfos().get(remoteNodeInRemoteDC);
    // mock helix transition state from standby to leader for local leader partitions
    List<? extends ReplicaId> replicaIds = clusterMap.getReplicaIds(replicationManager.dataNodeId);
    for (ReplicaId replicaId : replicaIds) {
        MockReplicaId mockReplicaId = (MockReplicaId) replicaId;
        if (mockReplicaId.getReplicaState() == ReplicaState.LEADER) {
            MockPartitionId mockPartitionId = (MockPartitionId) replicaId.getPartitionId();
            mockHelixParticipant.onPartitionBecomeLeaderFromStandby(mockPartitionId.toPathString());
        }
    }
    // Add put messages to all partitions on remoteHost1 and remoteHost2
    List<PartitionId> partitionIds = clusterMap.getWritablePartitionIds(null);
    for (PartitionId partitionId : partitionIds) {
        // add batchSize messages to the remoteHost1 and remote host 2 from which local host will replicate.
        addPutMessagesToReplicasOfPartition(partitionId, Arrays.asList(remoteHostInLocalDC, remoteHostInRemoteDC), batchSize + batchSize);
    }
    // Choose partitions that are leaders on both local and remote nodes
    Set<ReplicaId> leaderReplicasOnLocalAndRemoteNodes = new HashSet<>();
    // Track a standby replica which has leader partition on remote node. We will update the state of replica to leader after one cycle of replication
    // and verify that replication resumes from remote token.
    MockReplicaId localStandbyReplicaWithLeaderPartitionOnRemoteNode = null;
    List<? extends ReplicaId> localReplicas = clusterMap.getReplicaIds(replicationManager.dataNodeId);
    List<? extends ReplicaId> remoteReplicas = clusterMap.getReplicaIds(remoteNodeInRemoteDC);
    for (int i = 0; i < localReplicas.size(); i++) {
        MockReplicaId localReplica = (MockReplicaId) localReplicas.get(i);
        MockReplicaId remoteReplica = (MockReplicaId) remoteReplicas.get(i);
        if (localReplica.getReplicaState() == ReplicaState.LEADER && remoteReplica.getReplicaState() == ReplicaState.LEADER) {
            leaderReplicasOnLocalAndRemoteNodes.add(remoteReplicas.get(i));
        }
        if (localReplica.getReplicaState() == ReplicaState.STANDBY && remoteReplica.getReplicaState() == ReplicaState.LEADER && localStandbyReplicaWithLeaderPartitionOnRemoteNode == null) {
            localStandbyReplicaWithLeaderPartitionOnRemoteNode = localReplica;
        }
    }
    // replicate with remote node in remote DC
    crossColoReplicaThread.replicate();
    // missing messages are not fetched yet.
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        if (leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId())) {
            assertEquals("remote token mismatch for leader replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize - 1);
        } else {
            assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), 0);
        }
    }
    // Replicate with remote node in local dc
    intraColoReplicaThread.replicate();
    // verify that remote token will be moved for all intra-colo replicas with token index = batchSize-1
    for (RemoteReplicaInfo replicaInfo : remoteReplicaInfosForLocalDC) {
        assertEquals("mismatch in remote token set for intra colo replicas", ((MockFindToken) replicaInfo.getToken()).getIndex(), batchSize - 1);
    }
    // process missing keys for cross colo replicas from previous metadata exchange
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        crossColoReplicaThread.processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
    }
    // as missing keys must now be obtained via intra-dc replication
    for (RemoteReplicaInfo replicaInfo : remoteReplicaInfosForRemoteDC) {
        assertEquals("mismatch in remote token set for inter colo replicas", ((MockFindToken) replicaInfo.getToken()).getIndex(), batchSize - 1);
    }
    // If we have a local standby replica with leader partition on remote node, change its state to leader
    if (localStandbyReplicaWithLeaderPartitionOnRemoteNode != null) {
        MockPartitionId mockPartitionId = (MockPartitionId) localStandbyReplicaWithLeaderPartitionOnRemoteNode.getPartitionId();
        mockHelixParticipant.onPartitionBecomeLeaderFromStandby(mockPartitionId.toPathString());
    }
    // Trigger replication again with remote node in remote DC
    crossColoReplicaThread.replicate();
    // leader localStandbyReplicaWithLeaderPartitionOnRemoteNode whose replication should be have resumed with remote token index = batchSize-1
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        if (leaderReplicasOnLocalAndRemoteNodes.contains(remoteReplicaInfo.getReplicaId()) || (remoteReplicaInfo.getLocalReplicaId().equals(localStandbyReplicaWithLeaderPartitionOnRemoteNode))) {
            assertEquals("remote token mismatch for leader replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize * 2 - 2);
        } else {
            assertEquals("remote token should not move forward for standby replicas until missing keys are fetched", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize - 1);
        }
    }
    // Trigger replication again with remote node in local DC
    intraColoReplicaThread.replicate();
    // verify that remote token is moved forward for all intra-colo replicas.
    for (RemoteReplicaInfo replicaInfo : remoteReplicaInfosForLocalDC) {
        assertEquals("mismatch in remote token set for intra colo replicas", ((MockFindToken) replicaInfo.getToken()).getIndex(), batchSize * 2 - 2);
    }
    // process missing keys for cross colo replicas from previous metadata exchange
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        crossColoReplicaThread.processMissingKeysFromPreviousMetadataResponse(remoteReplicaInfo);
    }
    // via intra-dc replication.
    for (RemoteReplicaInfo remoteReplicaInfo : remoteReplicaInfosForRemoteDC) {
        assertEquals("mismatch in remote token set for intra colo replicas", ((MockFindToken) remoteReplicaInfo.getToken()).getIndex(), batchSize * 2 - 2);
    }
    storageManager.shutdown();
}
Also used : ConnectionPool(com.github.ambry.network.ConnectionPool) HashMap(java.util.HashMap) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) StorageManager(com.github.ambry.store.StorageManager) Store(com.github.ambry.store.Store) MockPartitionId(com.github.ambry.clustermap.MockPartitionId) PartitionId(com.github.ambry.clustermap.PartitionId) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) ReplicaId(com.github.ambry.clustermap.ReplicaId) MockReplicaId(com.github.ambry.clustermap.MockReplicaId) DataNodeId(com.github.ambry.clustermap.DataNodeId) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

DataNodeId (com.github.ambry.clustermap.DataNodeId)92 ArrayList (java.util.ArrayList)45 Test (org.junit.Test)45 HashMap (java.util.HashMap)29 PartitionId (com.github.ambry.clustermap.PartitionId)28 MockDataNodeId (com.github.ambry.clustermap.MockDataNodeId)27 ReplicaId (com.github.ambry.clustermap.ReplicaId)25 MockClusterMap (com.github.ambry.clustermap.MockClusterMap)23 VerifiableProperties (com.github.ambry.config.VerifiableProperties)23 MetricRegistry (com.codahale.metrics.MetricRegistry)22 MockPartitionId (com.github.ambry.clustermap.MockPartitionId)22 List (java.util.List)22 Map (java.util.Map)22 Port (com.github.ambry.network.Port)21 ClusterMap (com.github.ambry.clustermap.ClusterMap)20 ClusterMapConfig (com.github.ambry.config.ClusterMapConfig)19 StoreKeyFactory (com.github.ambry.store.StoreKeyFactory)18 BlobIdFactory (com.github.ambry.commons.BlobIdFactory)17 HashSet (java.util.HashSet)16 Properties (java.util.Properties)16