Search in sources :

Example 1 with SharedNothingBackupQuorum

use of org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum in project activemq-artemis by apache.

the class SharedNothingBackupActivation method run.

@Override
public void run() {
    try {
        logger.trace("SharedNothingBackupActivation..start");
        synchronized (activeMQServer) {
            activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED);
        }
        // move all data away:
        activeMQServer.getNodeManager().stop();
        activeMQServer.moveServerData(replicaPolicy.getMaxSavedReplicatedJournalsSize());
        activeMQServer.getNodeManager().start();
        synchronized (this) {
            if (closed) {
                logger.trace("SharedNothingBackupActivation is closed, ignoring activation!");
                return;
            }
        }
        boolean scalingDown = replicaPolicy.getScaleDownPolicy() != null && replicaPolicy.getScaleDownPolicy().isEnabled();
        if (!activeMQServer.initialisePart1(scalingDown)) {
            if (logger.isTraceEnabled()) {
                logger.trace("could not initialize part1 " + scalingDown);
            }
            return;
        }
        logger.trace("Waiting for a synchronize now...");
        synchronized (this) {
            logger.trace("Entered a synchronized");
            if (closed)
                return;
            backupQuorum = new SharedNothingBackupQuorum(activeMQServer.getStorageManager(), activeMQServer.getNodeManager(), activeMQServer.getScheduledPool(), networkHealthCheck, replicaPolicy.getQuorumSize(), replicaPolicy.getVoteRetries(), replicaPolicy.getVoteRetryWait());
            activeMQServer.getClusterManager().getQuorumManager().registerQuorum(backupQuorum);
            activeMQServer.getClusterManager().getQuorumManager().registerQuorumHandler(new ServerConnectVoteHandler(activeMQServer));
        }
        // use a Node Locator to connect to the cluster
        LiveNodeLocator nodeLocator;
        if (activationParams.get(ActivationParams.REPLICATION_ENDPOINT) != null) {
            TopologyMember member = (TopologyMember) activationParams.get(ActivationParams.REPLICATION_ENDPOINT);
            nodeLocator = new NamedNodeIdNodeLocator(member.getNodeId(), new Pair<>(member.getLive(), member.getBackup()));
        } else {
            nodeLocator = replicaPolicy.getGroupName() == null ? new AnyLiveNodeLocatorForReplication(backupQuorum, activeMQServer) : new NamedLiveNodeLocatorForReplication(replicaPolicy.getGroupName(), backupQuorum);
        }
        ClusterController clusterController = activeMQServer.getClusterManager().getClusterController();
        clusterController.addClusterTopologyListenerForReplication(nodeLocator);
        logger.trace("Waiting on cluster connection");
        clusterController.awaitConnectionToReplicationCluster();
        logger.trace("Cluster Connected");
        clusterController.addIncomingInterceptorForReplication(new ReplicationError(nodeLocator));
        // nodeManager.startBackup();
        if (logger.isTraceEnabled()) {
            logger.trace("Starting backup manager");
        }
        activeMQServer.getBackupManager().start();
        if (logger.isTraceEnabled()) {
            logger.trace("Set backup Quorum");
        }
        replicationEndpoint.setBackupQuorum(backupQuorum);
        replicationEndpoint.setExecutor(activeMQServer.getExecutorFactory().getExecutor());
        EndpointConnector endpointConnector = new EndpointConnector();
        if (logger.isTraceEnabled()) {
            logger.trace("Starting Backup Server");
        }
        ActiveMQServerLogger.LOGGER.backupServerStarted(activeMQServer.getVersion().getFullVersion(), activeMQServer.getNodeManager().getNodeId());
        activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED);
        if (logger.isTraceEnabled())
            logger.trace("Setting server state as started");
        SharedNothingBackupQuorum.BACKUP_ACTIVATION signal;
        do {
            if (closed) {
                if (logger.isTraceEnabled()) {
                    logger.trace("Activation is closed, so giving up");
                }
                return;
            }
            if (logger.isTraceEnabled()) {
                logger.trace("looking up the node through nodeLocator.locateNode()");
            }
            // locate the first live server to try to replicate
            nodeLocator.locateNode();
            Pair<TransportConfiguration, TransportConfiguration> possibleLive = nodeLocator.getLiveConfiguration();
            nodeID = nodeLocator.getNodeID();
            if (logger.isTraceEnabled()) {
                logger.trace("nodeID = " + nodeID);
            }
            // in a normal (non failback) scenario if we couldn't find our live server we should fail
            if (!attemptFailBack) {
                if (logger.isTraceEnabled()) {
                    logger.trace("attemptFailback=false, nodeID=" + nodeID);
                }
                // this shouldn't happen
                if (nodeID == null) {
                    logger.debug("Throwing a RuntimeException as nodeID==null ant attemptFailback=false");
                    throw new RuntimeException("Could not establish the connection");
                }
                activeMQServer.getNodeManager().setNodeID(nodeID);
            }
            try {
                if (logger.isTraceEnabled()) {
                    logger.trace("Calling clusterController.connectToNodeInReplicatedCluster(" + possibleLive.getA() + ")");
                }
                clusterControl = clusterController.connectToNodeInReplicatedCluster(possibleLive.getA());
            } catch (Exception e) {
                logger.debug(e.getMessage(), e);
                if (possibleLive.getB() != null) {
                    try {
                        clusterControl = clusterController.connectToNodeInReplicatedCluster(possibleLive.getB());
                    } catch (Exception e1) {
                        clusterControl = null;
                    }
                }
            }
            if (clusterControl == null) {
                if (logger.isTraceEnabled()) {
                    logger.trace("sleeping " + clusterController.getRetryIntervalForReplicatedCluster() + " it should retry");
                }
                // its ok to retry here since we haven't started replication yet
                // it may just be the server has gone since discovery
                Thread.sleep(clusterController.getRetryIntervalForReplicatedCluster());
                signal = SharedNothingBackupQuorum.BACKUP_ACTIVATION.ALREADY_REPLICATING;
                continue;
            }
            activeMQServer.getThreadPool().execute(endpointConnector);
            /**
             * Wait for a signal from the the quorum manager, at this point if replication has been successful we can
             * fail over or if there is an error trying to replicate (such as already replicating) we try the
             * process again on the next live server.  All the action happens inside {@link BackupQuorum}
             */
            signal = backupQuorum.waitForStatusChange();
            if (logger.isTraceEnabled()) {
                logger.trace("Got a signal " + signal + " through backupQuorum.waitForStatusChange()");
            }
            /**
             * replicationEndpoint will be holding lots of open files. Make sure they get
             * closed/sync'ed.
             */
            ActiveMQServerImpl.stopComponent(replicationEndpoint);
            // time to give up
            if (!activeMQServer.isStarted() || signal == STOP) {
                if (logger.isTraceEnabled()) {
                    logger.trace("giving up on the activation:: activemqServer.isStarted=" + activeMQServer.isStarted() + " while signal = " + signal);
                }
                return;
            } else if (signal == FAIL_OVER) {
                // time to fail over
                if (logger.isTraceEnabled()) {
                    logger.trace("signal == FAIL_OVER, breaking the loop");
                }
                break;
            } else if (signal == SharedNothingBackupQuorum.BACKUP_ACTIVATION.FAILURE_REPLICATING) {
                // something has gone badly run restart from scratch
                if (logger.isTraceEnabled()) {
                    logger.trace("Starting a new thread to stop the server!");
                }
                Thread startThread = new Thread(new Runnable() {

                    @Override
                    public void run() {
                        try {
                            if (logger.isTraceEnabled()) {
                                logger.trace("Calling activeMQServer.stop() and start() to restart the server");
                            }
                            activeMQServer.stop();
                            activeMQServer.start();
                        } catch (Exception e) {
                            ActiveMQServerLogger.LOGGER.errorRestartingBackupServer(e, activeMQServer);
                        }
                    }
                });
                startThread.start();
                return;
            }
            // ok, this live is no good, let's reset and try again
            // close this session factory, we're done with it
            clusterControl.close();
            backupQuorum.reset();
            if (replicationEndpoint.getChannel() != null) {
                replicationEndpoint.getChannel().close();
                replicationEndpoint.setChannel(null);
            }
        } while (signal == SharedNothingBackupQuorum.BACKUP_ACTIVATION.ALREADY_REPLICATING);
        if (logger.isTraceEnabled()) {
            logger.trace("Activation loop finished, current signal = " + signal);
        }
        activeMQServer.getClusterManager().getQuorumManager().unRegisterQuorum(backupQuorum);
        if (!isRemoteBackupUpToDate()) {
            logger.debug("throwing exception for !isRemoteBackupUptoDate");
            throw ActiveMQMessageBundle.BUNDLE.backupServerNotInSync();
        }
        if (logger.isTraceEnabled()) {
            logger.trace("@@@ setReplicaPolicy::" + replicaPolicy);
        }
        replicaPolicy.getReplicatedPolicy().setReplicaPolicy(replicaPolicy);
        activeMQServer.setHAPolicy(replicaPolicy.getReplicatedPolicy());
        synchronized (activeMQServer) {
            if (!activeMQServer.isStarted()) {
                logger.trace("Server is stopped, giving up right before becomingLive");
                return;
            }
            ActiveMQServerLogger.LOGGER.becomingLive(activeMQServer);
            logger.trace("stop backup");
            activeMQServer.getNodeManager().stopBackup();
            logger.trace("start store manager");
            activeMQServer.getStorageManager().start();
            logger.trace("activated");
            activeMQServer.getBackupManager().activated();
            if (scalingDown) {
                logger.trace("Scalling down...");
                activeMQServer.initialisePart2(true);
            } else {
                logger.trace("Setting up new activation");
                activeMQServer.setActivation(new SharedNothingLiveActivation(activeMQServer, replicaPolicy.getReplicatedPolicy()));
                logger.trace("initialize part 2");
                activeMQServer.initialisePart2(false);
                if (activeMQServer.getIdentity() != null) {
                    ActiveMQServerLogger.LOGGER.serverIsLive(activeMQServer.getIdentity());
                } else {
                    ActiveMQServerLogger.LOGGER.serverIsLive();
                }
            }
            logger.trace("completeActivation at the end");
            activeMQServer.completeActivation();
        }
    } catch (Exception e) {
        if (logger.isTraceEnabled()) {
            logger.trace(e.getMessage() + ", serverStarted=" + activeMQServer.isStarted(), e);
        }
        if ((e instanceof InterruptedException || e instanceof IllegalStateException) && !activeMQServer.isStarted())
            // do not log these errors if the server is being stopped.
            return;
        ActiveMQServerLogger.LOGGER.initializationError(e);
    }
}
Also used : TransportConfiguration(org.apache.activemq.artemis.api.core.TransportConfiguration) ActiveMQException(org.apache.activemq.artemis.api.core.ActiveMQException) ActiveMQInternalErrorException(org.apache.activemq.artemis.api.core.ActiveMQInternalErrorException) ClusterController(org.apache.activemq.artemis.core.server.cluster.ClusterController) SharedNothingBackupQuorum(org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum) LiveNodeLocator(org.apache.activemq.artemis.core.server.LiveNodeLocator) TopologyMember(org.apache.activemq.artemis.api.core.client.TopologyMember) Pair(org.apache.activemq.artemis.api.core.Pair)

Aggregations

ActiveMQException (org.apache.activemq.artemis.api.core.ActiveMQException)1 ActiveMQInternalErrorException (org.apache.activemq.artemis.api.core.ActiveMQInternalErrorException)1 Pair (org.apache.activemq.artemis.api.core.Pair)1 TransportConfiguration (org.apache.activemq.artemis.api.core.TransportConfiguration)1 TopologyMember (org.apache.activemq.artemis.api.core.client.TopologyMember)1 LiveNodeLocator (org.apache.activemq.artemis.core.server.LiveNodeLocator)1 ClusterController (org.apache.activemq.artemis.core.server.cluster.ClusterController)1 SharedNothingBackupQuorum (org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum)1