Search in sources :

Example 1 with Replica

use of org.apache.asterix.common.replication.Replica in project asterixdb by apache.

the class ReplicationManager method updateReplicaState.

/**
     * Updates the state of a remote replica.
     *
     * @param replicaId
     *            The replica id to update.
     * @param newState
     *            The new state of the replica.
     * @param suspendReplication
     *            a flag indicating whether to suspend replication on state change or not.
     * @throws InterruptedException
     */
public synchronized void updateReplicaState(String replicaId, ReplicaState newState, boolean suspendReplication) throws InterruptedException {
    Replica replica = replicas.get(replicaId);
    if (replica.getState() == newState) {
        return;
    }
    if (suspendReplication) {
        //prevent new jobs/logs from coming in
        replicationSuspended.set(true);
        if (newState == ReplicaState.DEAD) {
            //assume the dead replica ACK has been received for all pending jobs
            synchronized (jobCommitAcks) {
                for (Integer jobId : jobCommitAcks.keySet()) {
                    addAckToJob(jobId, replicaId);
                }
            }
        }
        //force replication threads to stop in order to change the replication factor
        suspendReplication(true);
    }
    replica.setState(newState);
    if (newState == ReplicaState.ACTIVE) {
        replicationFactor++;
    } else if (newState == ReplicaState.DEAD && replicationFactor > INITIAL_REPLICATION_FACTOR) {
        replicationFactor--;
    }
    LOGGER.log(Level.WARNING, "Replica " + replicaId + " state changed to: " + newState.name() + ". Replication factor changed to: " + replicationFactor);
    if (suspendReplication) {
        startReplicationThreads();
    }
}
Also used : Replica(org.apache.asterix.common.replication.Replica)

Example 2 with Replica

use of org.apache.asterix.common.replication.Replica in project asterixdb by apache.

the class ReplicationManager method getActiveRemoteReplicasSockets.

private Map<String, SocketChannel> getActiveRemoteReplicasSockets() {
    Map<String, SocketChannel> replicaNodesSockets = new HashMap<>();
    for (Replica replica : replicas.values()) {
        if (replica.getState() == ReplicaState.ACTIVE) {
            try {
                SocketChannel sc = getReplicaSocket(replica.getId());
                replicaNodesSockets.put(replica.getId(), sc);
            } catch (IOException e) {
                if (LOGGER.isLoggable(Level.WARNING)) {
                    LOGGER.log(Level.WARNING, "Could not get replica socket", e);
                }
                reportFailedReplica(replica.getId());
            }
        }
    }
    return replicaNodesSockets;
}
Also used : SocketChannel(java.nio.channels.SocketChannel) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) IOException(java.io.IOException) Replica(org.apache.asterix.common.replication.Replica)

Example 3 with Replica

use of org.apache.asterix.common.replication.Replica in project asterixdb by apache.

the class MetadataNodeFaultToleranceStrategy method notifyNodeFailure.

@Override
public synchronized void notifyNodeFailure(String nodeId) throws HyracksDataException {
    failedNodes.add(nodeId);
    hotStandbyMetadataReplica.remove(nodeId);
    clusterManager.updateNodePartitions(nodeId, false);
    if (nodeId.equals(metadataNodeId)) {
        clusterManager.updateMetadataNode(metadataNodeId, false);
    }
    clusterManager.refreshState();
    if (replicationStrategy.isParticipant(nodeId)) {
        // Notify impacted replica
        FaultToleranceUtil.notifyImpactedReplicas(nodeId, ClusterEventType.NODE_FAILURE, clusterManager, messageBroker, replicationStrategy);
    }
    // If the failed node is the metadata node, ask its replicas to replay any committed jobs
    if (nodeId.equals(metadataNodeId)) {
        ICcApplicationContext appCtx = (ICcApplicationContext) serviceCtx.getApplicationContext();
        int metadataPartitionId = appCtx.getMetadataProperties().getMetadataPartition().getPartitionId();
        Set<Integer> metadataPartition = new HashSet<>(Arrays.asList(metadataPartitionId));
        Set<Replica> activeRemoteReplicas = replicationStrategy.getRemoteReplicas(metadataNodeId).stream().filter(replica -> !failedNodes.contains(replica.getId())).collect(Collectors.toSet());
        //TODO Do election to identity the node with latest state
        for (Replica replica : activeRemoteReplicas) {
            ReplayPartitionLogsRequestMessage msg = new ReplayPartitionLogsRequestMessage(metadataPartition);
            try {
                messageBroker.sendApplicationMessageToNC(msg, replica.getId());
            } catch (Exception e) {
                LOGGER.log(Level.WARNING, "Failed sending an application message to an NC", e);
                continue;
            }
        }
    }
}
Also used : ReportMaxResourceIdTask(org.apache.asterix.app.nc.task.ReportMaxResourceIdTask) IFaultToleranceStrategy(org.apache.asterix.common.replication.IFaultToleranceStrategy) Arrays(java.util.Arrays) ICCMessageBroker(org.apache.asterix.common.messaging.api.ICCMessageBroker) NCLifecycleTaskReportMessage(org.apache.asterix.app.replication.message.NCLifecycleTaskReportMessage) INCLifecycleTask(org.apache.asterix.common.api.INCLifecycleTask) ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition) RuntimeDataException(org.apache.asterix.common.exceptions.RuntimeDataException) INCLifecycleMessage(org.apache.asterix.common.replication.INCLifecycleMessage) CheckpointTask(org.apache.asterix.app.nc.task.CheckpointTask) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) HashMap(java.util.HashMap) ErrorCode(org.apache.asterix.common.exceptions.ErrorCode) ReplayPartitionLogsResponseMessage(org.apache.asterix.app.replication.message.ReplayPartitionLogsResponseMessage) ICCServiceContext(org.apache.hyracks.api.application.ICCServiceContext) ICcApplicationContext(org.apache.asterix.common.dataflow.ICcApplicationContext) IReplicationStrategy(org.apache.asterix.common.replication.IReplicationStrategy) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) HashSet(java.util.HashSet) Map(java.util.Map) RemoteRecoveryTask(org.apache.asterix.app.nc.task.RemoteRecoveryTask) FaultToleranceUtil(org.apache.asterix.util.FaultToleranceUtil) ExternalLibrarySetupTask(org.apache.asterix.app.nc.task.ExternalLibrarySetupTask) Replica(org.apache.asterix.common.replication.Replica) ClusterEventType(org.apache.hyracks.api.application.IClusterLifecycleListener.ClusterEventType) Set(java.util.Set) StartLifecycleComponentsTask(org.apache.asterix.app.nc.task.StartLifecycleComponentsTask) StartupTaskRequestMessage(org.apache.asterix.app.replication.message.StartupTaskRequestMessage) Logger(java.util.logging.Logger) Collectors(java.util.stream.Collectors) LocalRecoveryTask(org.apache.asterix.app.nc.task.LocalRecoveryTask) StartReplicationServiceTask(org.apache.asterix.app.nc.task.StartReplicationServiceTask) List(java.util.List) BindMetadataNodeTask(org.apache.asterix.app.nc.task.BindMetadataNodeTask) MetadataBootstrapTask(org.apache.asterix.app.nc.task.MetadataBootstrapTask) ReplayPartitionLogsRequestMessage(org.apache.asterix.app.replication.message.ReplayPartitionLogsRequestMessage) StartupTaskResponseMessage(org.apache.asterix.app.replication.message.StartupTaskResponseMessage) IClusterStateManager(org.apache.asterix.common.cluster.IClusterStateManager) SystemState(org.apache.asterix.common.transactions.IRecoveryManager.SystemState) ICcApplicationContext(org.apache.asterix.common.dataflow.ICcApplicationContext) ReplayPartitionLogsRequestMessage(org.apache.asterix.app.replication.message.ReplayPartitionLogsRequestMessage) Replica(org.apache.asterix.common.replication.Replica) RuntimeDataException(org.apache.asterix.common.exceptions.RuntimeDataException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) HashSet(java.util.HashSet)

Example 4 with Replica

use of org.apache.asterix.common.replication.Replica in project asterixdb by apache.

the class ReplicationManager method getReplicaSocket.

/**
     * Establishes a connection with a remote replica.
     *
     * @param replicaId
     *            The replica to connect to.
     * @return The socket of the remote replica
     * @throws IOException
     */
private SocketChannel getReplicaSocket(String replicaId) throws IOException {
    Replica replica = replicationProperties.getReplicaById(replicaId);
    SocketChannel sc = SocketChannel.open();
    sc.configureBlocking(true);
    InetSocketAddress address = replica.getAddress(replicationProperties);
    sc.connect(new InetSocketAddress(address.getHostString(), address.getPort()));
    return sc;
}
Also used : SocketChannel(java.nio.channels.SocketChannel) InetSocketAddress(java.net.InetSocketAddress) Replica(org.apache.asterix.common.replication.Replica)

Example 5 with Replica

use of org.apache.asterix.common.replication.Replica in project asterixdb by apache.

the class ReplicationManager method reportFailedReplica.

/**
     * Suspends replications and sends a remote replica failure event to ReplicasEventsMonitor.
     *
     * @param replicaId
     *            the failed replica id.
     */
public void reportFailedReplica(String replicaId) {
    Replica replica = replicas.get(replicaId);
    if (replica == null) {
        return;
    }
    if (replica.getState() == ReplicaState.DEAD) {
        return;
    }
    //need to stop processing any new logs or jobs
    terminateJobsReplication.set(true);
    ReplicaEvent event = new ReplicaEvent(replica, ClusterEventType.NODE_FAILURE);
    reportReplicaEvent(event);
}
Also used : Replica(org.apache.asterix.common.replication.Replica) ReplicaEvent(org.apache.asterix.common.replication.ReplicaEvent)

Aggregations

Replica (org.apache.asterix.common.replication.Replica)9 SocketChannel (java.nio.channels.SocketChannel)3 ReplicaEvent (org.apache.asterix.common.replication.ReplicaEvent)3 HashMap (java.util.HashMap)2 Node (org.apache.asterix.event.schema.cluster.Node)2 IOException (java.io.IOException)1 InetSocketAddress (java.net.InetSocketAddress)1 ByteBuffer (java.nio.ByteBuffer)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 Level (java.util.logging.Level)1 Logger (java.util.logging.Logger)1 Collectors (java.util.stream.Collectors)1 BindMetadataNodeTask (org.apache.asterix.app.nc.task.BindMetadataNodeTask)1 CheckpointTask (org.apache.asterix.app.nc.task.CheckpointTask)1