use of org.voltdb.StartAction in project voltdb by VoltDB.
the class ZKTestBase method setUpZK.
protected void setUpZK(int sites) throws Exception {
m_siteIdToZKPort = new TreeMap<Integer, Integer>();
m_clients = new ArrayList<ZooKeeper>();
m_messengers = new ArrayList<HostMessenger>();
String[] coordinators = IntStream.range(0, sites).mapToObj(i -> ":" + (i + Constants.DEFAULT_INTERNAL_PORT)).toArray(s -> new String[s]);
for (int ii = 0; ii < sites; ii++) {
HostMessenger.Config config = new HostMessenger.Config();
config.internalPort += ii;
config.acceptor = MeshProber.builder().coordinators(coordinators).startAction(StartAction.PROBE).hostCount(sites).build();
int externalPort = m_ports.next();
config.zkInterface = "127.0.0.1:" + externalPort;
m_siteIdToZKPort.put(ii, externalPort);
config.networkThreads = 1;
HostMessenger hm = new HostMessenger(config, null);
hm.start();
m_messengers.add(hm);
}
for (HostMessenger hm : m_messengers) {
MeshProber.prober(hm).waitForDetermination();
}
}
use of org.voltdb.StartAction in project voltdb by VoltDB.
the class MeshProber method considerMeshPlea.
@Override
public JoinAcceptor.PleaDecision considerMeshPlea(ZooKeeper zk, int hostId, JSONObject jo) {
checkArgument(zk != null, "zookeeper is null");
checkArgument(jo != null, "json object is null");
if (!HostCriteria.hasCriteria(jo)) {
return new JoinAcceptor.PleaDecision(String.format("Joining node version %s is incompatible with this node verion %s", jo.optString(SocketJoiner.VERSION_STRING, "(unknown)"), m_versionChecker.getVersionString()), false, false);
}
HostCriteria hc = new HostCriteria(jo);
Map<Integer, HostCriteria> hostCriteria = m_hostCriteria.get();
// when the cluster is forming anew)
if (!getNodeState().operational() && !hostCriteria.values().stream().anyMatch(c -> c.getNodeState().operational())) {
List<String> incompatibilities = asHostCriteria().listIncompatibilities(hc);
if (!incompatibilities.isEmpty()) {
Joiner joiner = Joiner.on("\n ").skipNulls();
String error = "Incompatible joining criteria:\n " + joiner.join(incompatibilities);
return new JoinAcceptor.PleaDecision(error, false, false);
}
return new JoinAcceptor.PleaDecision(null, true, false);
} else {
StartAction operationalStartAction = hostCriteria.values().stream().filter(c -> c.getNodeState().operational()).map(c -> c.getStartAction()).findFirst().orElse(getStartAction());
if (operationalStartAction == StartAction.PROBE && hc.getStartAction() != StartAction.PROBE) {
String msg = "Invalid VoltDB command. Please use init and start to join this cluster";
return new JoinAcceptor.PleaDecision(msg, false, false);
}
}
// how many hosts are already in the mesh?
Stat stat = new Stat();
try {
zk.getChildren(CoreZK.hosts, false, stat);
} catch (InterruptedException e) {
String msg = "Interrupted while considering mesh plea";
m_networkLog.error(msg, e);
return new JoinAcceptor.PleaDecision(msg, false, false);
} catch (KeeperException e) {
EnumSet<KeeperException.Code> closing = EnumSet.of(KeeperException.Code.SESSIONEXPIRED, KeeperException.Code.CONNECTIONLOSS);
if (closing.contains(e.code())) {
return new JoinAcceptor.PleaDecision("Shutting down", false, false);
} else {
String msg = "Failed to list hosts while considering a mesh plea";
m_networkLog.error(msg, e);
return new JoinAcceptor.PleaDecision(msg, false, false);
}
}
// connecting to already wholly formed cluster
if (stat.getNumChildren() >= getHostCount()) {
return new JoinAcceptor.PleaDecision(hc.isAddAllowed() ? null : "Cluster is already complete", hc.isAddAllowed(), false);
} else if (stat.getNumChildren() < getHostCount()) {
// check for concurrent rejoins
final int rejoiningHost = CoreZK.createRejoinNodeIndicator(zk, hostId);
if (rejoiningHost == -1) {
return new JoinAcceptor.PleaDecision(null, true, false);
} else {
String msg = "Only one host can rejoin at a time. Host " + rejoiningHost + " is still rejoining.";
return new JoinAcceptor.PleaDecision(msg, false, true);
}
}
return new JoinAcceptor.PleaDecision(null, true, false);
}
use of org.voltdb.StartAction in project voltdb by VoltDB.
the class CommandLine method startCommand.
public CommandLine startCommand(String command) {
StartAction action = StartAction.monickerFor(command);
if (action == null) {
// command wasn't a valid enum type, throw an exception.
String msg = "Unknown action: " + command + ". ";
hostLog.warn(msg);
throw new IllegalArgumentException(msg);
}
m_startAction = action;
return this;
}
use of org.voltdb.StartAction in project voltdb by VoltDB.
the class TestStateMachine method setUp.
@Before
public void setUp() throws Exception {
setUpZK(NUM_AGREEMENT_SITES);
coordinators = IntStream.range(0, NUM_AGREEMENT_SITES).mapToObj(i -> ":" + (i + Constants.DEFAULT_INTERNAL_PORT)).toArray(s -> new String[s]);
criteria = MeshProber.builder().coordinators(coordinators).startAction(StartAction.PROBE).hostCount(NUM_AGREEMENT_SITES).build();
ZooKeeper zk = m_messengers.get(0).getZK();
ZKUtil.addIfMissing(zk, "/test", CreateMode.PERSISTENT, null);
ZKUtil.addIfMissing(zk, "/test/db", CreateMode.PERSISTENT, null);
ZKUtil.addIfMissing(zk, stateMachineManagerRoot, CreateMode.PERSISTENT, null);
for (int ii = 0; ii < NUM_AGREEMENT_SITES; ii++) {
addStateMachinesFor(ii);
}
}
use of org.voltdb.StartAction in project voltdb by VoltDB.
the class MeshProber method determineStartActionIfNecessary.
/**
* Check to see if we have enough {@link HostCriteria} gathered to make a
* start action {@link Determination}
*/
private void determineStartActionIfNecessary(Map<Integer, HostCriteria> hostCriteria) {
// already made a determination
if (m_probedDetermination.isDone()) {
return;
}
final int ksafety = getkFactor() + 1;
// node has no recoverable artifacts (Command Logs, Snapshots)
int bare = 0;
int unmeshed = 0;
int operational = 0;
int haveTerminus = 0;
int hostCount = getHostCount();
int missingHostCount = getmissingHostCount();
// both paused and safemode need to be specified on only one node to
// make them a cluster attribute. These are overridden if there are
// any nodes in operational state
boolean paused = isPaused();
boolean safemode = isSafeMode();
final NavigableSet<String> terminusNonces = new TreeSet<>();
for (HostCriteria c : hostCriteria.values()) {
if (c.getNodeState().operational()) {
operational += 1;
// prefer host count from operational nodes
if (operational == 1) {
paused = c.isPaused();
hostCount = c.getHostCount();
}
}
unmeshed += c.getNodeState().unmeshed() ? 1 : 0;
bare += c.isBare() ? 1 : 0;
if (c.isPaused() && operational == 0) {
paused = c.isPaused();
}
safemode = safemode || c.isSafeMode();
if (c.getTerminusNonce() != null) {
terminusNonces.add(c.getTerminusNonce());
++haveTerminus;
}
}
int expectedHostCount = hostCount - missingHostCount;
// not enough host criteria to make a determination
if (hostCriteria.size() < expectedHostCount && operational == 0) {
m_networkLog.debug("have yet to receive all the required host criteria");
return;
}
// handle add (i.e. join) cases too
if (hostCount < getHostCount() && hostCriteria.size() <= expectedHostCount) {
m_networkLog.debug("have yet to receive all the required host criteria");
return;
}
m_networkLog.debug("Received all the required host criteria");
// kfactor + 1
safemode = safemode && operational == 0 && bare < ksafety;
if (m_networkLog.isDebugEnabled()) {
m_networkLog.debug("We have " + operational + " operational nodes, " + bare + " bare nodes, and " + unmeshed + " unmeshed nodes");
m_networkLog.debug("Propagated cluster attribute are paused: " + paused + ", and safemode: " + safemode);
}
if (terminusNonces.size() > 1) {
org.voltdb.VoltDB.crashLocalVoltDB("Detected multiple startup snapshots, cannot " + "proceed with cluster startup. Snapshot IDs " + terminusNonces);
}
String terminusNonce = terminusNonces.pollFirst();
if (operational == 0 && haveTerminus <= (hostCount - ksafety)) {
terminusNonce = null;
}
if (getStartAction() != StartAction.PROBE) {
m_probedDetermination.set(new Determination(getStartAction(), getHostCount(), paused, terminusNonce));
return;
}
StartAction determination = isBare() ? StartAction.CREATE : StartAction.RECOVER;
if (operational > 0 && operational < hostCount) {
// rejoin
determination = StartAction.LIVE_REJOIN;
} else if (operational > 0 && operational == hostCount) {
// join
if (isAddAllowed()) {
// kfactor + 1
hostCount = hostCount + ksafety;
determination = StartAction.JOIN;
} else {
org.voltdb.VoltDB.crashLocalVoltDB("Node is not allowed to rejoin an already complete cluster");
return;
}
} else if (operational == 0 && bare == unmeshed) {
determination = StartAction.CREATE;
} else if (operational == 0 && bare < ksafety) /* kfactor + 1 */
{
determination = safemode ? StartAction.SAFE_RECOVER : StartAction.RECOVER;
} else if (operational == 0 && bare >= ksafety) /* kfactor + 1 */
{
org.voltdb.VoltDB.crashLocalVoltDB("Cluster has incomplete command logs: " + bare + " nodes have no command logs, while " + (unmeshed - bare) + " nodes have them");
return;
}
final Determination dtrm = new Determination(determination, hostCount, paused, terminusNonce);
if (m_networkLog.isDebugEnabled()) {
m_networkLog.debug("made the following " + dtrm);
}
m_probedDetermination.set(dtrm);
}
Aggregations