use of org.apache.activemq.artemis.core.server.LiveNodeLocator in project activemq-artemis by apache.
the class SharedNothingBackupActivation method run.
@Override
public void run() {
try {
logger.trace("SharedNothingBackupActivation..start");
synchronized (activeMQServer) {
activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED);
}
// move all data away:
activeMQServer.getNodeManager().stop();
activeMQServer.moveServerData(replicaPolicy.getMaxSavedReplicatedJournalsSize());
activeMQServer.getNodeManager().start();
synchronized (this) {
if (closed) {
logger.trace("SharedNothingBackupActivation is closed, ignoring activation!");
return;
}
}
boolean scalingDown = replicaPolicy.getScaleDownPolicy() != null && replicaPolicy.getScaleDownPolicy().isEnabled();
if (!activeMQServer.initialisePart1(scalingDown)) {
if (logger.isTraceEnabled()) {
logger.trace("could not initialize part1 " + scalingDown);
}
return;
}
logger.trace("Waiting for a synchronize now...");
synchronized (this) {
logger.trace("Entered a synchronized");
if (closed)
return;
backupQuorum = new SharedNothingBackupQuorum(activeMQServer.getStorageManager(), activeMQServer.getNodeManager(), activeMQServer.getScheduledPool(), networkHealthCheck, replicaPolicy.getQuorumSize(), replicaPolicy.getVoteRetries(), replicaPolicy.getVoteRetryWait());
activeMQServer.getClusterManager().getQuorumManager().registerQuorum(backupQuorum);
activeMQServer.getClusterManager().getQuorumManager().registerQuorumHandler(new ServerConnectVoteHandler(activeMQServer));
}
// use a Node Locator to connect to the cluster
LiveNodeLocator nodeLocator;
if (activationParams.get(ActivationParams.REPLICATION_ENDPOINT) != null) {
TopologyMember member = (TopologyMember) activationParams.get(ActivationParams.REPLICATION_ENDPOINT);
nodeLocator = new NamedNodeIdNodeLocator(member.getNodeId(), new Pair<>(member.getLive(), member.getBackup()));
} else {
nodeLocator = replicaPolicy.getGroupName() == null ? new AnyLiveNodeLocatorForReplication(backupQuorum, activeMQServer) : new NamedLiveNodeLocatorForReplication(replicaPolicy.getGroupName(), backupQuorum);
}
ClusterController clusterController = activeMQServer.getClusterManager().getClusterController();
clusterController.addClusterTopologyListenerForReplication(nodeLocator);
logger.trace("Waiting on cluster connection");
clusterController.awaitConnectionToReplicationCluster();
logger.trace("Cluster Connected");
clusterController.addIncomingInterceptorForReplication(new ReplicationError(nodeLocator));
// nodeManager.startBackup();
if (logger.isTraceEnabled()) {
logger.trace("Starting backup manager");
}
activeMQServer.getBackupManager().start();
if (logger.isTraceEnabled()) {
logger.trace("Set backup Quorum");
}
replicationEndpoint.setBackupQuorum(backupQuorum);
replicationEndpoint.setExecutor(activeMQServer.getExecutorFactory().getExecutor());
EndpointConnector endpointConnector = new EndpointConnector();
if (logger.isTraceEnabled()) {
logger.trace("Starting Backup Server");
}
ActiveMQServerLogger.LOGGER.backupServerStarted(activeMQServer.getVersion().getFullVersion(), activeMQServer.getNodeManager().getNodeId());
activeMQServer.setState(ActiveMQServerImpl.SERVER_STATE.STARTED);
if (logger.isTraceEnabled())
logger.trace("Setting server state as started");
SharedNothingBackupQuorum.BACKUP_ACTIVATION signal;
do {
if (closed) {
if (logger.isTraceEnabled()) {
logger.trace("Activation is closed, so giving up");
}
return;
}
if (logger.isTraceEnabled()) {
logger.trace("looking up the node through nodeLocator.locateNode()");
}
// locate the first live server to try to replicate
nodeLocator.locateNode();
Pair<TransportConfiguration, TransportConfiguration> possibleLive = nodeLocator.getLiveConfiguration();
nodeID = nodeLocator.getNodeID();
if (logger.isTraceEnabled()) {
logger.trace("nodeID = " + nodeID);
}
// in a normal (non failback) scenario if we couldn't find our live server we should fail
if (!attemptFailBack) {
if (logger.isTraceEnabled()) {
logger.trace("attemptFailback=false, nodeID=" + nodeID);
}
// this shouldn't happen
if (nodeID == null) {
logger.debug("Throwing a RuntimeException as nodeID==null ant attemptFailback=false");
throw new RuntimeException("Could not establish the connection");
}
activeMQServer.getNodeManager().setNodeID(nodeID);
}
try {
if (logger.isTraceEnabled()) {
logger.trace("Calling clusterController.connectToNodeInReplicatedCluster(" + possibleLive.getA() + ")");
}
clusterControl = clusterController.connectToNodeInReplicatedCluster(possibleLive.getA());
} catch (Exception e) {
logger.debug(e.getMessage(), e);
if (possibleLive.getB() != null) {
try {
clusterControl = clusterController.connectToNodeInReplicatedCluster(possibleLive.getB());
} catch (Exception e1) {
clusterControl = null;
}
}
}
if (clusterControl == null) {
if (logger.isTraceEnabled()) {
logger.trace("sleeping " + clusterController.getRetryIntervalForReplicatedCluster() + " it should retry");
}
// its ok to retry here since we haven't started replication yet
// it may just be the server has gone since discovery
Thread.sleep(clusterController.getRetryIntervalForReplicatedCluster());
signal = SharedNothingBackupQuorum.BACKUP_ACTIVATION.ALREADY_REPLICATING;
continue;
}
activeMQServer.getThreadPool().execute(endpointConnector);
/**
* Wait for a signal from the the quorum manager, at this point if replication has been successful we can
* fail over or if there is an error trying to replicate (such as already replicating) we try the
* process again on the next live server. All the action happens inside {@link BackupQuorum}
*/
signal = backupQuorum.waitForStatusChange();
if (logger.isTraceEnabled()) {
logger.trace("Got a signal " + signal + " through backupQuorum.waitForStatusChange()");
}
/**
* replicationEndpoint will be holding lots of open files. Make sure they get
* closed/sync'ed.
*/
ActiveMQServerImpl.stopComponent(replicationEndpoint);
// time to give up
if (!activeMQServer.isStarted() || signal == STOP) {
if (logger.isTraceEnabled()) {
logger.trace("giving up on the activation:: activemqServer.isStarted=" + activeMQServer.isStarted() + " while signal = " + signal);
}
return;
} else if (signal == FAIL_OVER) {
// time to fail over
if (logger.isTraceEnabled()) {
logger.trace("signal == FAIL_OVER, breaking the loop");
}
break;
} else if (signal == SharedNothingBackupQuorum.BACKUP_ACTIVATION.FAILURE_REPLICATING) {
// something has gone badly run restart from scratch
if (logger.isTraceEnabled()) {
logger.trace("Starting a new thread to stop the server!");
}
Thread startThread = new Thread(new Runnable() {
@Override
public void run() {
try {
if (logger.isTraceEnabled()) {
logger.trace("Calling activeMQServer.stop() and start() to restart the server");
}
activeMQServer.stop();
activeMQServer.start();
} catch (Exception e) {
ActiveMQServerLogger.LOGGER.errorRestartingBackupServer(e, activeMQServer);
}
}
});
startThread.start();
return;
}
// ok, this live is no good, let's reset and try again
// close this session factory, we're done with it
clusterControl.close();
backupQuorum.reset();
if (replicationEndpoint.getChannel() != null) {
replicationEndpoint.getChannel().close();
replicationEndpoint.setChannel(null);
}
} while (signal == SharedNothingBackupQuorum.BACKUP_ACTIVATION.ALREADY_REPLICATING);
if (logger.isTraceEnabled()) {
logger.trace("Activation loop finished, current signal = " + signal);
}
activeMQServer.getClusterManager().getQuorumManager().unRegisterQuorum(backupQuorum);
if (!isRemoteBackupUpToDate()) {
logger.debug("throwing exception for !isRemoteBackupUptoDate");
throw ActiveMQMessageBundle.BUNDLE.backupServerNotInSync();
}
if (logger.isTraceEnabled()) {
logger.trace("@@@ setReplicaPolicy::" + replicaPolicy);
}
replicaPolicy.getReplicatedPolicy().setReplicaPolicy(replicaPolicy);
activeMQServer.setHAPolicy(replicaPolicy.getReplicatedPolicy());
synchronized (activeMQServer) {
if (!activeMQServer.isStarted()) {
logger.trace("Server is stopped, giving up right before becomingLive");
return;
}
ActiveMQServerLogger.LOGGER.becomingLive(activeMQServer);
logger.trace("stop backup");
activeMQServer.getNodeManager().stopBackup();
logger.trace("start store manager");
activeMQServer.getStorageManager().start();
logger.trace("activated");
activeMQServer.getBackupManager().activated();
if (scalingDown) {
logger.trace("Scalling down...");
activeMQServer.initialisePart2(true);
} else {
logger.trace("Setting up new activation");
activeMQServer.setActivation(new SharedNothingLiveActivation(activeMQServer, replicaPolicy.getReplicatedPolicy()));
logger.trace("initialize part 2");
activeMQServer.initialisePart2(false);
if (activeMQServer.getIdentity() != null) {
ActiveMQServerLogger.LOGGER.serverIsLive(activeMQServer.getIdentity());
} else {
ActiveMQServerLogger.LOGGER.serverIsLive();
}
}
logger.trace("completeActivation at the end");
activeMQServer.completeActivation();
}
} catch (Exception e) {
if (logger.isTraceEnabled()) {
logger.trace(e.getMessage() + ", serverStarted=" + activeMQServer.isStarted(), e);
}
if ((e instanceof InterruptedException || e instanceof IllegalStateException) && !activeMQServer.isStarted())
// do not log these errors if the server is being stopped.
return;
ActiveMQServerLogger.LOGGER.initializationError(e);
}
}
use of org.apache.activemq.artemis.core.server.LiveNodeLocator in project activemq-artemis by apache.
the class LiveOnlyActivation method connectToScaleDownTarget.
public void connectToScaleDownTarget(ScaleDownPolicy scaleDownPolicy) {
try {
scaleDownServerLocator = ScaleDownPolicy.getScaleDownConnector(scaleDownPolicy, activeMQServer);
// use a Node Locator to connect to the cluster
scaleDownServerLocator.setProtocolManagerFactory(ActiveMQServerSideProtocolManagerFactory.getInstance(scaleDownServerLocator));
LiveNodeLocator nodeLocator = scaleDownPolicy.getGroupName() == null ? new AnyLiveNodeLocatorForScaleDown(activeMQServer) : new NamedLiveNodeLocatorForScaleDown(scaleDownPolicy.getGroupName(), activeMQServer);
scaleDownServerLocator.addClusterTopologyListener(nodeLocator);
nodeLocator.connectToCluster(scaleDownServerLocator);
// a timeout is necessary here in case we use a NamedLiveNodeLocatorForScaleDown and there's no matching node in the cluster
// should the timeout be configurable?
nodeLocator.locateNode(ActiveMQClient.DEFAULT_DISCOVERY_INITIAL_WAIT_TIMEOUT);
ClientSessionFactoryInternal clientSessionFactory = null;
while (clientSessionFactory == null) {
Pair<TransportConfiguration, TransportConfiguration> possibleLive = null;
try {
possibleLive = nodeLocator.getLiveConfiguration();
if (// we've tried every connector
possibleLive == null)
break;
clientSessionFactory = (ClientSessionFactoryInternal) scaleDownServerLocator.createSessionFactory(possibleLive.getA(), 0, false);
} catch (Exception e) {
logger.trace("Failed to connect to " + possibleLive.getA());
nodeLocator.notifyRegistrationFailed(false);
if (clientSessionFactory != null) {
clientSessionFactory.close();
}
clientSessionFactory = null;
// should I try the backup (i.e. getB()) from possibleLive?
}
}
if (clientSessionFactory != null) {
scaleDownClientSessionFactory = clientSessionFactory;
} else {
throw new ActiveMQException("Unable to connect to server for scale-down");
}
} catch (Exception e) {
ActiveMQServerLogger.LOGGER.failedToScaleDown(e);
}
}
Aggregations