Search in sources :

Example 1 with ReplayPartitionLogsRequestMessage

use of org.apache.asterix.app.replication.message.ReplayPartitionLogsRequestMessage in project asterixdb by apache.

the class MetadataNodeFaultToleranceStrategy method notifyNodeFailure.

@Override
public synchronized void notifyNodeFailure(String nodeId) throws HyracksDataException {
    failedNodes.add(nodeId);
    hotStandbyMetadataReplica.remove(nodeId);
    clusterManager.updateNodePartitions(nodeId, false);
    if (nodeId.equals(metadataNodeId)) {
        clusterManager.updateMetadataNode(metadataNodeId, false);
    }
    clusterManager.refreshState();
    if (replicationStrategy.isParticipant(nodeId)) {
        // Notify impacted replica
        FaultToleranceUtil.notifyImpactedReplicas(nodeId, ClusterEventType.NODE_FAILURE, clusterManager, messageBroker, replicationStrategy);
    }
    // If the failed node is the metadata node, ask its replicas to replay any committed jobs
    if (nodeId.equals(metadataNodeId)) {
        ICcApplicationContext appCtx = (ICcApplicationContext) serviceCtx.getApplicationContext();
        int metadataPartitionId = appCtx.getMetadataProperties().getMetadataPartition().getPartitionId();
        Set<Integer> metadataPartition = new HashSet<>(Arrays.asList(metadataPartitionId));
        Set<Replica> activeRemoteReplicas = replicationStrategy.getRemoteReplicas(metadataNodeId).stream().filter(replica -> !failedNodes.contains(replica.getId())).collect(Collectors.toSet());
        //TODO Do election to identity the node with latest state
        for (Replica replica : activeRemoteReplicas) {
            ReplayPartitionLogsRequestMessage msg = new ReplayPartitionLogsRequestMessage(metadataPartition);
            try {
                messageBroker.sendApplicationMessageToNC(msg, replica.getId());
            } catch (Exception e) {
                LOGGER.log(Level.WARNING, "Failed sending an application message to an NC", e);
                continue;
            }
        }
    }
}
Also used : ReportMaxResourceIdTask(org.apache.asterix.app.nc.task.ReportMaxResourceIdTask) IFaultToleranceStrategy(org.apache.asterix.common.replication.IFaultToleranceStrategy) Arrays(java.util.Arrays) ICCMessageBroker(org.apache.asterix.common.messaging.api.ICCMessageBroker) NCLifecycleTaskReportMessage(org.apache.asterix.app.replication.message.NCLifecycleTaskReportMessage) INCLifecycleTask(org.apache.asterix.common.api.INCLifecycleTask) ClusterPartition(org.apache.asterix.common.cluster.ClusterPartition) RuntimeDataException(org.apache.asterix.common.exceptions.RuntimeDataException) INCLifecycleMessage(org.apache.asterix.common.replication.INCLifecycleMessage) CheckpointTask(org.apache.asterix.app.nc.task.CheckpointTask) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) HashMap(java.util.HashMap) ErrorCode(org.apache.asterix.common.exceptions.ErrorCode) ReplayPartitionLogsResponseMessage(org.apache.asterix.app.replication.message.ReplayPartitionLogsResponseMessage) ICCServiceContext(org.apache.hyracks.api.application.ICCServiceContext) ICcApplicationContext(org.apache.asterix.common.dataflow.ICcApplicationContext) IReplicationStrategy(org.apache.asterix.common.replication.IReplicationStrategy) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) HashSet(java.util.HashSet) Map(java.util.Map) RemoteRecoveryTask(org.apache.asterix.app.nc.task.RemoteRecoveryTask) FaultToleranceUtil(org.apache.asterix.util.FaultToleranceUtil) ExternalLibrarySetupTask(org.apache.asterix.app.nc.task.ExternalLibrarySetupTask) Replica(org.apache.asterix.common.replication.Replica) ClusterEventType(org.apache.hyracks.api.application.IClusterLifecycleListener.ClusterEventType) Set(java.util.Set) StartLifecycleComponentsTask(org.apache.asterix.app.nc.task.StartLifecycleComponentsTask) StartupTaskRequestMessage(org.apache.asterix.app.replication.message.StartupTaskRequestMessage) Logger(java.util.logging.Logger) Collectors(java.util.stream.Collectors) LocalRecoveryTask(org.apache.asterix.app.nc.task.LocalRecoveryTask) StartReplicationServiceTask(org.apache.asterix.app.nc.task.StartReplicationServiceTask) List(java.util.List) BindMetadataNodeTask(org.apache.asterix.app.nc.task.BindMetadataNodeTask) MetadataBootstrapTask(org.apache.asterix.app.nc.task.MetadataBootstrapTask) ReplayPartitionLogsRequestMessage(org.apache.asterix.app.replication.message.ReplayPartitionLogsRequestMessage) StartupTaskResponseMessage(org.apache.asterix.app.replication.message.StartupTaskResponseMessage) IClusterStateManager(org.apache.asterix.common.cluster.IClusterStateManager) SystemState(org.apache.asterix.common.transactions.IRecoveryManager.SystemState) ICcApplicationContext(org.apache.asterix.common.dataflow.ICcApplicationContext) ReplayPartitionLogsRequestMessage(org.apache.asterix.app.replication.message.ReplayPartitionLogsRequestMessage) Replica(org.apache.asterix.common.replication.Replica) RuntimeDataException(org.apache.asterix.common.exceptions.RuntimeDataException) HyracksDataException(org.apache.hyracks.api.exceptions.HyracksDataException) HashSet(java.util.HashSet)

Aggregations

ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Map (java.util.Map)1 Set (java.util.Set)1 Level (java.util.logging.Level)1 Logger (java.util.logging.Logger)1 Collectors (java.util.stream.Collectors)1 BindMetadataNodeTask (org.apache.asterix.app.nc.task.BindMetadataNodeTask)1 CheckpointTask (org.apache.asterix.app.nc.task.CheckpointTask)1 ExternalLibrarySetupTask (org.apache.asterix.app.nc.task.ExternalLibrarySetupTask)1 LocalRecoveryTask (org.apache.asterix.app.nc.task.LocalRecoveryTask)1 MetadataBootstrapTask (org.apache.asterix.app.nc.task.MetadataBootstrapTask)1 RemoteRecoveryTask (org.apache.asterix.app.nc.task.RemoteRecoveryTask)1 ReportMaxResourceIdTask (org.apache.asterix.app.nc.task.ReportMaxResourceIdTask)1 StartLifecycleComponentsTask (org.apache.asterix.app.nc.task.StartLifecycleComponentsTask)1 StartReplicationServiceTask (org.apache.asterix.app.nc.task.StartReplicationServiceTask)1 NCLifecycleTaskReportMessage (org.apache.asterix.app.replication.message.NCLifecycleTaskReportMessage)1