Search in sources :

Example 1 with LiveNodeLocator

use of org.apache.activemq.artemis.core.server.LiveNodeLocator in project activemq-artemis by apache.

the class SharedNothingBackupActivation method run.

@Override
public void run() {
    try {
        logger.trace("SharedNothingBackupActivation..start");
        synchronized (activeMQServer) {
            activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED);
        }
        // move all data away:
        activeMQServer.getNodeManager().stop();
        activeMQServer.moveServerData(replicaPolicy.getMaxSavedReplicatedJournalsSize());
        activeMQServer.getNodeManager().start();
        synchronized (this) {
            if (closed) {
                logger.trace("SharedNothingBackupActivation is closed, ignoring activation!");
                return;
            }
        }
        boolean scalingDown = replicaPolicy.getScaleDownPolicy() != null && replicaPolicy.getScaleDownPolicy().isEnabled();
        if (!activeMQServer.initialisePart1(scalingDown)) {
            if (logger.isTraceEnabled()) {
                logger.trace("could not initialize part1 " + scalingDown);
            }
            return;
        }
        logger.trace("Waiting for a synchronize now...");
        synchronized (this) {
            logger.trace("Entered a synchronized");
            if (closed)
                return;
            backupQuorum = new SharedNothingBackupQuorum(activeMQServer.getStorageManager(), activeMQServer.getNodeManager(), activeMQServer.getScheduledPool(), networkHealthCheck, replicaPolicy.getQuorumSize(), replicaPolicy.getVoteRetries(), replicaPolicy.getVoteRetryWait());
            activeMQServer.getClusterManager().getQuorumManager().registerQuorum(backupQuorum);
            activeMQServer.getClusterManager().getQuorumManager().registerQuorumHandler(new ServerConnectVoteHandler(activeMQServer));
        }
        // use a Node Locator to connect to the cluster
        LiveNodeLocator nodeLocator;
        if (activationParams.get(ActivationParams.REPLICATION_ENDPOINT) != null) {
            TopologyMember member = (TopologyMember) activationParams.get(ActivationParams.REPLICATION_ENDPOINT);
            nodeLocator = new NamedNodeIdNodeLocator(member.getNodeId(), new Pair<>(member.getLive(), member.getBackup()));
        } else {
            nodeLocator = replicaPolicy.getGroupName() == null ? new AnyLiveNodeLocatorForReplication(backupQuorum, activeMQServer) : new NamedLiveNodeLocatorForReplication(replicaPolicy.getGroupName(), backupQuorum);
        }
        ClusterController clusterController = activeMQServer.getClusterManager().getClusterController();
        clusterController.addClusterTopologyListenerForReplication(nodeLocator);
        logger.trace("Waiting on cluster connection");
        clusterController.awaitConnectionToReplicationCluster();
        logger.trace("Cluster Connected");
        clusterController.addIncomingInterceptorForReplication(new ReplicationError(nodeLocator));
        // nodeManager.startBackup();
        if (logger.isTraceEnabled()) {
            logger.trace("Starting backup manager");
        }
        activeMQServer.getBackupManager().start();
        if (logger.isTraceEnabled()) {
            logger.trace("Set backup Quorum");
        }
        replicationEndpoint.setBackupQuorum(backupQuorum);
        replicationEndpoint.setExecutor(activeMQServer.getExecutorFactory().getExecutor());
        EndpointConnector endpointConnector = new EndpointConnector();
        if (logger.isTraceEnabled()) {
            logger.trace("Starting Backup Server");
        }
        ActiveMQServerLogger.LOGGER.backupServerStarted(activeMQServer.getVersion().getFullVersion(), activeMQServer.getNodeManager().getNodeId());
        activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED);
        if (logger.isTraceEnabled())
            logger.trace("Setting server state as started");
        SharedNothingBackupQuorum.BACKUP_ACTIVATION signal;
        do {
            if (closed) {
                if (logger.isTraceEnabled()) {
                    logger.trace("Activation is closed, so giving up");
                }
                return;
            }
            if (logger.isTraceEnabled()) {
                logger.trace("looking up the node through nodeLocator.locateNode()");
            }
            // locate the first live server to try to replicate
            nodeLocator.locateNode();
            Pair<TransportConfiguration, TransportConfiguration> possibleLive = nodeLocator.getLiveConfiguration();
            nodeID = nodeLocator.getNodeID();
            if (logger.isTraceEnabled()) {
                logger.trace("nodeID = " + nodeID);
            }
            // in a normal (non failback) scenario if we couldn't find our live server we should fail
            if (!attemptFailBack) {
                if (logger.isTraceEnabled()) {
                    logger.trace("attemptFailback=false, nodeID=" + nodeID);
                }
                // this shouldn't happen
                if (nodeID == null) {
                    logger.debug("Throwing a RuntimeException as nodeID==null ant attemptFailback=false");
                    throw new RuntimeException("Could not establish the connection");
                }
                activeMQServer.getNodeManager().setNodeID(nodeID);
            }
            try {
                if (logger.isTraceEnabled()) {
                    logger.trace("Calling clusterController.connectToNodeInReplicatedCluster(" + possibleLive.getA() + ")");
                }
                clusterControl = clusterController.connectToNodeInReplicatedCluster(possibleLive.getA());
            } catch (Exception e) {
                logger.debug(e.getMessage(), e);
                if (possibleLive.getB() != null) {
                    try {
                        clusterControl = clusterController.connectToNodeInReplicatedCluster(possibleLive.getB());
                    } catch (Exception e1) {
                        clusterControl = null;
                    }
                }
            }
            if (clusterControl == null) {
                if (logger.isTraceEnabled()) {
                    logger.trace("sleeping " + clusterController.getRetryIntervalForReplicatedCluster() + " it should retry");
                }
                // its ok to retry here since we haven't started replication yet
                // it may just be the server has gone since discovery
                Thread.sleep(clusterController.getRetryIntervalForReplicatedCluster());
                signal = SharedNothingBackupQuorum.BACKUP_ACTIVATION.ALREADY_REPLICATING;
                continue;
            }
            activeMQServer.getThreadPool().execute(endpointConnector);
            /**
             * Wait for a signal from the the quorum manager, at this point if replication has been successful we can
             * fail over or if there is an error trying to replicate (such as already replicating) we try the
             * process again on the next live server.  All the action happens inside {@link BackupQuorum}
             */
            signal = backupQuorum.waitForStatusChange();
            if (logger.isTraceEnabled()) {
                logger.trace("Got a signal " + signal + " through backupQuorum.waitForStatusChange()");
            }
            /**
             * replicationEndpoint will be holding lots of open files. Make sure they get
             * closed/sync'ed.
             */
            ActiveMQServerImpl.stopComponent(replicationEndpoint);
            // time to give up
            if (!activeMQServer.isStarted() || signal == STOP) {
                if (logger.isTraceEnabled()) {
                    logger.trace("giving up on the activation:: activemqServer.isStarted=" + activeMQServer.isStarted() + " while signal = " + signal);
                }
                return;
            } else if (signal == FAIL_OVER) {
                // time to fail over
                if (logger.isTraceEnabled()) {
                    logger.trace("signal == FAIL_OVER, breaking the loop");
                }
                break;
            } else if (signal == SharedNothingBackupQuorum.BACKUP_ACTIVATION.FAILURE_REPLICATING) {
                // something has gone badly run restart from scratch
                if (logger.isTraceEnabled()) {
                    logger.trace("Starting a new thread to stop the server!");
                }
                Thread startThread = new Thread(new Runnable() {

                    @Override
                    public void run() {
                        try {
                            if (logger.isTraceEnabled()) {
                                logger.trace("Calling activeMQServer.stop() and start() to restart the server");
                            }
                            activeMQServer.stop();
                            activeMQServer.start();
                        } catch (Exception e) {
                            ActiveMQServerLogger.LOGGER.errorRestartingBackupServer(e, activeMQServer);
                        }
                    }
                });
                startThread.start();
                return;
            }
            // ok, this live is no good, let's reset and try again
            // close this session factory, we're done with it
            clusterControl.close();
            backupQuorum.reset();
            if (replicationEndpoint.getChannel() != null) {
                replicationEndpoint.getChannel().close();
                replicationEndpoint.setChannel(null);
            }
        } while (signal == SharedNothingBackupQuorum.BACKUP_ACTIVATION.ALREADY_REPLICATING);
        if (logger.isTraceEnabled()) {
            logger.trace("Activation loop finished, current signal = " + signal);
        }
        activeMQServer.getClusterManager().getQuorumManager().unRegisterQuorum(backupQuorum);
        if (!isRemoteBackupUpToDate()) {
            logger.debug("throwing exception for !isRemoteBackupUptoDate");
            throw ActiveMQMessageBundle.BUNDLE.backupServerNotInSync();
        }
        if (logger.isTraceEnabled()) {
            logger.trace("@@@ setReplicaPolicy::" + replicaPolicy);
        }
        replicaPolicy.getReplicatedPolicy().setReplicaPolicy(replicaPolicy);
        activeMQServer.setHAPolicy(replicaPolicy.getReplicatedPolicy());
        synchronized (activeMQServer) {
            if (!activeMQServer.isStarted()) {
                logger.trace("Server is stopped, giving up right before becomingLive");
                return;
            }
            ActiveMQServerLogger.LOGGER.becomingLive(activeMQServer);
            logger.trace("stop backup");
            activeMQServer.getNodeManager().stopBackup();
            logger.trace("start store manager");
            activeMQServer.getStorageManager().start();
            logger.trace("activated");
            activeMQServer.getBackupManager().activated();
            if (scalingDown) {
                logger.trace("Scalling down...");
                activeMQServer.initialisePart2(true);
            } else {
                logger.trace("Setting up new activation");
                activeMQServer.setActivation(new SharedNothingLiveActivation(activeMQServer, replicaPolicy.getReplicatedPolicy()));
                logger.trace("initialize part 2");
                activeMQServer.initialisePart2(false);
                if (activeMQServer.getIdentity() != null) {
                    ActiveMQServerLogger.LOGGER.serverIsLive(activeMQServer.getIdentity());
                } else {
                    ActiveMQServerLogger.LOGGER.serverIsLive();
                }
            }
            logger.trace("completeActivation at the end");
            activeMQServer.completeActivation();
        }
    } catch (Exception e) {
        if (logger.isTraceEnabled()) {
            logger.trace(e.getMessage() + ", serverStarted=" + activeMQServer.isStarted(), e);
        }
        if ((e instanceof InterruptedException || e instanceof IllegalStateException) && !activeMQServer.isStarted())
            // do not log these errors if the server is being stopped.
            return;
        ActiveMQServerLogger.LOGGER.initializationError(e);
    }
}
Also used : TransportConfiguration(org.apache.activemq.artemis.api.core.TransportConfiguration) ActiveMQException(org.apache.activemq.artemis.api.core.ActiveMQException) ActiveMQInternalErrorException(org.apache.activemq.artemis.api.core.ActiveMQInternalErrorException) ClusterController(org.apache.activemq.artemis.core.server.cluster.ClusterController) SharedNothingBackupQuorum(org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum) LiveNodeLocator(org.apache.activemq.artemis.core.server.LiveNodeLocator) TopologyMember(org.apache.activemq.artemis.api.core.client.TopologyMember) Pair(org.apache.activemq.artemis.api.core.Pair)

Example 2 with LiveNodeLocator

use of org.apache.activemq.artemis.core.server.LiveNodeLocator in project activemq-artemis by apache.

the class LiveOnlyActivation method connectToScaleDownTarget.

public void connectToScaleDownTarget(ScaleDownPolicy scaleDownPolicy) {
    try {
        scaleDownServerLocator = ScaleDownPolicy.getScaleDownConnector(scaleDownPolicy, activeMQServer);
        // use a Node Locator to connect to the cluster
        scaleDownServerLocator.setProtocolManagerFactory(ActiveMQServerSideProtocolManagerFactory.getInstance(scaleDownServerLocator));
        LiveNodeLocator nodeLocator = scaleDownPolicy.getGroupName() == null ? new AnyLiveNodeLocatorForScaleDown(activeMQServer) : new NamedLiveNodeLocatorForScaleDown(scaleDownPolicy.getGroupName(), activeMQServer);
        scaleDownServerLocator.addClusterTopologyListener(nodeLocator);
        nodeLocator.connectToCluster(scaleDownServerLocator);
        // a timeout is necessary here in case we use a NamedLiveNodeLocatorForScaleDown and there's no matching node in the cluster
        // should the timeout be configurable?
        nodeLocator.locateNode(ActiveMQClient.DEFAULT_DISCOVERY_INITIAL_WAIT_TIMEOUT);
        ClientSessionFactoryInternal clientSessionFactory = null;
        while (clientSessionFactory == null) {
            Pair<TransportConfiguration, TransportConfiguration> possibleLive = null;
            try {
                possibleLive = nodeLocator.getLiveConfiguration();
                if (// we've tried every connector
                possibleLive == null)
                    break;
                clientSessionFactory = (ClientSessionFactoryInternal) scaleDownServerLocator.createSessionFactory(possibleLive.getA(), 0, false);
            } catch (Exception e) {
                logger.trace("Failed to connect to " + possibleLive.getA());
                nodeLocator.notifyRegistrationFailed(false);
                if (clientSessionFactory != null) {
                    clientSessionFactory.close();
                }
                clientSessionFactory = null;
            // should I try the backup (i.e. getB()) from possibleLive?
            }
        }
        if (clientSessionFactory != null) {
            scaleDownClientSessionFactory = clientSessionFactory;
        } else {
            throw new ActiveMQException("Unable to connect to server for scale-down");
        }
    } catch (Exception e) {
        ActiveMQServerLogger.LOGGER.failedToScaleDown(e);
    }
}
Also used : ClientSessionFactoryInternal(org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal) ActiveMQException(org.apache.activemq.artemis.api.core.ActiveMQException) TransportConfiguration(org.apache.activemq.artemis.api.core.TransportConfiguration) LiveNodeLocator(org.apache.activemq.artemis.core.server.LiveNodeLocator) ActiveMQException(org.apache.activemq.artemis.api.core.ActiveMQException)

Aggregations

ActiveMQException (org.apache.activemq.artemis.api.core.ActiveMQException)2 TransportConfiguration (org.apache.activemq.artemis.api.core.TransportConfiguration)2 LiveNodeLocator (org.apache.activemq.artemis.core.server.LiveNodeLocator)2 ActiveMQInternalErrorException (org.apache.activemq.artemis.api.core.ActiveMQInternalErrorException)1 Pair (org.apache.activemq.artemis.api.core.Pair)1 TopologyMember (org.apache.activemq.artemis.api.core.client.TopologyMember)1 ClientSessionFactoryInternal (org.apache.activemq.artemis.core.client.impl.ClientSessionFactoryInternal)1 ClusterController (org.apache.activemq.artemis.core.server.cluster.ClusterController)1 SharedNothingBackupQuorum (org.apache.activemq.artemis.core.server.cluster.qourum.SharedNothingBackupQuorum)1