use of org.apache.asterix.common.replication.Replica in project asterixdb by apache.
the class ReplicationManager method updateReplicaState.
/**
* Updates the state of a remote replica.
*
* @param replicaId
* The replica id to update.
* @param newState
* The new state of the replica.
* @param suspendReplication
* a flag indicating whether to suspend replication on state change or not.
* @throws InterruptedException
*/
public synchronized void updateReplicaState(String replicaId, ReplicaState newState, boolean suspendReplication) throws InterruptedException {
Replica replica = replicas.get(replicaId);
if (replica.getState() == newState) {
return;
}
if (suspendReplication) {
//prevent new jobs/logs from coming in
replicationSuspended.set(true);
if (newState == ReplicaState.DEAD) {
//assume the dead replica ACK has been received for all pending jobs
synchronized (jobCommitAcks) {
for (Integer jobId : jobCommitAcks.keySet()) {
addAckToJob(jobId, replicaId);
}
}
}
//force replication threads to stop in order to change the replication factor
suspendReplication(true);
}
replica.setState(newState);
if (newState == ReplicaState.ACTIVE) {
replicationFactor++;
} else if (newState == ReplicaState.DEAD && replicationFactor > INITIAL_REPLICATION_FACTOR) {
replicationFactor--;
}
LOGGER.log(Level.WARNING, "Replica " + replicaId + " state changed to: " + newState.name() + ". Replication factor changed to: " + replicationFactor);
if (suspendReplication) {
startReplicationThreads();
}
}
use of org.apache.asterix.common.replication.Replica in project asterixdb by apache.
the class ReplicationManager method getActiveRemoteReplicasSockets.
private Map<String, SocketChannel> getActiveRemoteReplicasSockets() {
Map<String, SocketChannel> replicaNodesSockets = new HashMap<>();
for (Replica replica : replicas.values()) {
if (replica.getState() == ReplicaState.ACTIVE) {
try {
SocketChannel sc = getReplicaSocket(replica.getId());
replicaNodesSockets.put(replica.getId(), sc);
} catch (IOException e) {
if (LOGGER.isLoggable(Level.WARNING)) {
LOGGER.log(Level.WARNING, "Could not get replica socket", e);
}
reportFailedReplica(replica.getId());
}
}
}
return replicaNodesSockets;
}
use of org.apache.asterix.common.replication.Replica in project asterixdb by apache.
the class MetadataNodeFaultToleranceStrategy method notifyNodeFailure.
@Override
public synchronized void notifyNodeFailure(String nodeId) throws HyracksDataException {
failedNodes.add(nodeId);
hotStandbyMetadataReplica.remove(nodeId);
clusterManager.updateNodePartitions(nodeId, false);
if (nodeId.equals(metadataNodeId)) {
clusterManager.updateMetadataNode(metadataNodeId, false);
}
clusterManager.refreshState();
if (replicationStrategy.isParticipant(nodeId)) {
// Notify impacted replica
FaultToleranceUtil.notifyImpactedReplicas(nodeId, ClusterEventType.NODE_FAILURE, clusterManager, messageBroker, replicationStrategy);
}
// If the failed node is the metadata node, ask its replicas to replay any committed jobs
if (nodeId.equals(metadataNodeId)) {
ICcApplicationContext appCtx = (ICcApplicationContext) serviceCtx.getApplicationContext();
int metadataPartitionId = appCtx.getMetadataProperties().getMetadataPartition().getPartitionId();
Set<Integer> metadataPartition = new HashSet<>(Arrays.asList(metadataPartitionId));
Set<Replica> activeRemoteReplicas = replicationStrategy.getRemoteReplicas(metadataNodeId).stream().filter(replica -> !failedNodes.contains(replica.getId())).collect(Collectors.toSet());
//TODO Do election to identity the node with latest state
for (Replica replica : activeRemoteReplicas) {
ReplayPartitionLogsRequestMessage msg = new ReplayPartitionLogsRequestMessage(metadataPartition);
try {
messageBroker.sendApplicationMessageToNC(msg, replica.getId());
} catch (Exception e) {
LOGGER.log(Level.WARNING, "Failed sending an application message to an NC", e);
continue;
}
}
}
}
use of org.apache.asterix.common.replication.Replica in project asterixdb by apache.
the class ReplicationManager method getReplicaSocket.
/**
* Establishes a connection with a remote replica.
*
* @param replicaId
* The replica to connect to.
* @return The socket of the remote replica
* @throws IOException
*/
private SocketChannel getReplicaSocket(String replicaId) throws IOException {
Replica replica = replicationProperties.getReplicaById(replicaId);
SocketChannel sc = SocketChannel.open();
sc.configureBlocking(true);
InetSocketAddress address = replica.getAddress(replicationProperties);
sc.connect(new InetSocketAddress(address.getHostString(), address.getPort()));
return sc;
}
use of org.apache.asterix.common.replication.Replica in project asterixdb by apache.
the class ReplicationManager method reportFailedReplica.
/**
* Suspends replications and sends a remote replica failure event to ReplicasEventsMonitor.
*
* @param replicaId
* the failed replica id.
*/
public void reportFailedReplica(String replicaId) {
Replica replica = replicas.get(replicaId);
if (replica == null) {
return;
}
if (replica.getState() == ReplicaState.DEAD) {
return;
}
//need to stop processing any new logs or jobs
terminateJobsReplication.set(true);
ReplicaEvent event = new ReplicaEvent(replica, ClusterEventType.NODE_FAILURE);
reportReplicaEvent(event);
}
Aggregations