use of com.ms.silverking.cloud.meta.ExclusionSet in project SilverKing by Morgan-Stanley.
the class NodeRingMaster2 method getAllCurrentNonExcludedReplicaServers.
private Set<IPAndPort> getAllCurrentNonExcludedReplicaServers() {
Set<IPAndPort> nonExcludedReplicas;
Set<IPAndPort> allReplicas;
ExclusionSet curExclusionSet;
allReplicas = curMapState.getResolvedReplicaMap().allReplicas();
// curExclusionSet = curMapState.getCurrentExclusionSet();
curExclusionSet = latestExclusionSet;
nonExcludedReplicas = ImmutableSet.copyOf(curExclusionSet.filterByIP(allReplicas));
return nonExcludedReplicas;
}
use of com.ms.silverking.cloud.meta.ExclusionSet in project SilverKing by Morgan-Stanley.
the class HealthMonitor method updateInstanceExclusionSet.
private void updateInstanceExclusionSet(Set<IPAndPort> guiltySuspects, Set<IPAndPort> newActiveNodes) {
try {
if (instanceExclusionZK != null) {
ExclusionSet oldExclusionSet;
ExclusionSet newExclusionSet;
if (!guiltySuspects.isEmpty()) {
Log.warning(String.format("Marking as bad"));
Log.warning(String.format("%s\n", CollectionUtil.toString(hostStringSet(guiltySuspects))));
} else {
Log.warning(String.format("No guilty suspects"));
}
if (!newActiveNodes.isEmpty()) {
Log.warning(String.format("Marking as good"));
Log.warning(String.format("%s\n", CollectionUtil.toString(hostStringSet(newActiveNodes))));
} else {
Log.warning(String.format("No newly good nodes"));
}
Log.warning(String.format("Latest exclusion set path %s", instanceExclusionZK.getLatestZKPath()));
if (instanceExclusionZK.getLatestZKPath() != null) {
oldExclusionSet = instanceExclusionZK.readLatestFromZK();
} else {
oldExclusionSet = ExclusionSet.emptyExclusionSet(0);
}
newExclusionSet = oldExclusionSet.add(hostStringSet(guiltySuspects)).remove(hostStringSet(newActiveNodes));
// window of vulnerability here
// for now we ensure this isn't violated externally
Log.warning(String.format("Old exclusion set %d %s", oldExclusionSet.size(), oldExclusionSet));
Log.warning(String.format("New exclusion set %d %s", newExclusionSet.size(), newExclusionSet));
if (!newExclusionSet.equals(oldExclusionSet)) {
Log.warning(String.format("Writing exclusion set"));
instanceExclusionZK.writeToZK(newExclusionSet);
Log.warning(String.format("Latest exclusion set path after write %s", instanceExclusionZK.getLatestZKPath()));
} else {
Log.warning(String.format("No change in exclusion set"));
}
} else {
Log.warning(String.format("Unable to mark as bad as exclusionZK is null"));
}
} catch (Exception e) {
Log.logErrorWarning(e, "Exception in updateInstanceExclusionSet");
}
}
use of com.ms.silverking.cloud.meta.ExclusionSet in project SilverKing by Morgan-Stanley.
the class SKAdmin method execClusterCommandGroup.
private boolean execClusterCommandGroup(SKAdminCommand[] commands) throws IOException, KeeperException {
/*
* Each DHT consists of active + passive nodes
* Filter active&passive by particular host groups
* any servers that aren't in the included host groups won't be used
* Fetch all host group tables
* Fetch all class variables for the host groups
* For ChecSKFS, fetch the skfs environment
* Wait for all fetches to complete
* Create map of servers->commands to run
* Pass the command map to TwoLevelParallelSSH and run
* Run/wait until complete
*/
// Map<String,HostGroupTable> hostGroupTables;
// hostGroupTables = getHostGroupTables(hostGroups, dhtMC.getZooKeeper().getZKConfig());
Set<String> activeHostGroupNames;
Map<String, ClassVars> hostGroupToClassVars;
HostGroupTable hostGroupTable;
Set<String> validActiveServers;
Set<String> validPassiveServers;
String hostGroupTableName;
Map<String, String[]> serverCommands;
Set<String> passiveNodeHostGroupNames;
boolean result;
Set<String> targetServers;
Set<String> passiveTargetServers;
targetServers = CollectionUtil.parseSet(options.targets, ",");
activeHostGroupNames = dhtConfig.getHostGroups();
Log.warning("hostGroupNames: ", CollectionUtil.toString(activeHostGroupNames));
hostGroupToClassVars = getHostGroupToClassVarsMap(dhtConfig);
Log.warning("hostGroupToClassVars: ", CollectionUtil.mapToString(hostGroupToClassVars));
Log.warning("ringConfig: ", ringConfig);
hostGroupTableName = ringConfig.getCloudConfiguration().getHostGroupTableName();
Log.warning("hostGroupTableName: ", hostGroupTableName);
hostGroupTable = getHostGroupTable(hostGroupTableName, dhtMC.getZooKeeper().getZKConfig());
// FUTURE - Do more validation of configuration. E.g. prevent a server from being both
// active and passive, the ring from containing servers without class vars, etc.
validActiveServers = findValidActiveServers(activeHostGroupNames, hostGroupTable, ringTree);
validActiveServers = retainOnlySpecifiedAndNonExcludedServers(validActiveServers, targetServers);
verifyServerEligibility(validActiveServers, commands);
Log.warning("validActiveServers: ", CollectionUtil.toString(validActiveServers));
// Allow StopNodes with empty validActiveServers if the target is activeDaemons
if (options.targetsEqualsActiveDaemonsTarget() && validActiveServers.isEmpty()) {
boolean exitOK;
exitOK = true;
for (SKAdminCommand command : commands) {
if (command != SKAdminCommand.StopNodes) {
exitOK = false;
}
}
if (exitOK) {
return true;
}
}
passiveTargetServers = new HashSet<>();
passiveTargetServers.addAll(targetServers);
passiveTargetServers.removeAll(validActiveServers);
passiveNodeHostGroupNames = dhtConfig.getPassiveNodeHostGroupsAsSet();
Log.warning("passiveNodeHostGroupNames: ", CollectionUtil.toString(passiveNodeHostGroupNames));
if (passiveTargetServers.size() > 0) {
validPassiveServers = ImmutableSet.copyOf(passiveTargetServers);
} else {
validPassiveServers = findValidPassiveServers(passiveNodeHostGroupNames, hostGroupTable);
}
validPassiveServers = retainOnlySpecifiedAndNonExcludedServers(validPassiveServers, passiveTargetServers);
Log.warning("validPassiveServers: ", CollectionUtil.toString(validPassiveServers));
if (Arrays.contains(commands, SKAdminCommand.ClearData) && !options.targetsEqualsExclusionsTarget()) {
Log.countdownWarning("*** Clearing ALL data ***", unsafeWarningCountdown);
}
result = true;
for (SKAdminCommand command : commands) {
boolean _result;
Log.warning("Executing cluster command: ", command);
serverCommands = createServerCommands(command, validActiveServers, validPassiveServers, hostGroupTable, hostGroupToClassVars, activeHostGroupNames, passiveNodeHostGroupNames);
displayCommandMap(serverCommands);
if (!options.displayOnly) {
_result = execCommandMap(serverCommands, validActiveServers.size() > 0 ? validActiveServers : validPassiveServers, hostGroupTable);
result = result && _result;
if (!result) {
break;
}
}
if (command.equals(SKAdminCommand.StartNodes)) {
int[] timeouts;
boolean running;
int attemptIndex;
Log.warning("Waiting for nodes to enter running state...");
timeouts = NumUtil.parseIntArray(options.timeoutSeconds, ",");
running = false;
attemptIndex = 0;
do {
Pair<Set<IPAndPort>, Boolean> waitResult;
Set<IPAndPort> failedServers;
Log.warningf("attemptIndex: %d\ttimeout: %d", attemptIndex, timeouts[attemptIndex]);
if (replicaSetExcludedByExclusions(exclusionSet)) {
return false;
}
waitResult = waitUntilRunning(IPAndPort.set(validActiveServers, dhtConfig.getPort()), timeouts[attemptIndex]);
failedServers = waitResult.getV1();
if (waitResult.getV2()) {
running = true;
} else {
++attemptIndex;
if (attemptIndex < timeouts.length) {
Log.warningf("Adding to instance exclusion set: %s", failedServers);
if (options.excludeInstanceExclusions) {
exclusionSet = exclusionSet.addByIPAndPort(failedServers);
}
addToInstanceExclusions(failedServers);
validActiveServers = removeServers(validActiveServers, failedServers);
}
}
} while (!running && attemptIndex < timeouts.length);
if (!running) {
return false;
}
}
}
return result;
}
use of com.ms.silverking.cloud.meta.ExclusionSet in project SilverKing by Morgan-Stanley.
the class SKAdmin method replicaSetExcludedByExclusions.
private boolean replicaSetExcludedByExclusions(ExclusionSet es) throws KeeperException, IOException {
InstantiatedRingTree curTree;
ResolvedReplicaMap replicaMap;
List<Set<IPAndPort>> excludedReplicaSets;
curTree = readCurrentTree();
replicaMap = curTree.getResolvedMap(ringConfig.getRingParentName(), new ReplicaNaiveIPPrioritizer());
excludedReplicaSets = replicaMap.getExcludedReplicaSets(es.asIPAndPortSet(0));
if (excludedReplicaSets.size() != 0) {
Log.warning("Exclusion set excludes at least one replica set:");
for (Set<IPAndPort> s : excludedReplicaSets) {
Log.warningf("%s", s);
}
return true;
}
return false;
}
use of com.ms.silverking.cloud.meta.ExclusionSet in project SilverKing by Morgan-Stanley.
the class SKAdmin method createServerCommands.
private Map<String, String[]> createServerCommands(SKAdminCommand command, Set<String> validActiveServers, Set<String> validPassiveServers, HostGroupTable hostGroupTable, Map<String, ClassVars> hostGroupToClassVars, Set<String> activeHostGroupNames, Set<String> passiveNodeHostGroupNames) {
Map<String, String[]> serverCommands;
Set<String> allServers;
allServers = new HashSet<>();
if (command == SKAdminCommand.ClearInstanceExclusionsData) {
try {
ExclusionSet e;
e = getInstanceExclusions();
if (replicaSetExcludedByExclusions(e)) {
Log.warning("Can't clear instance exclusions data. At least one replica set is entirely excluded.");
throw new RuntimeException("Entire replica set excluded");
} else {
Log.warning("Servers to clear data from:\n", e);
Log.countdownWarning("*** Clearing instance exclusions data ***", unsafeWarningCountdown);
allServers.addAll(e.getServers());
}
} catch (KeeperException | IOException e) {
throw new RuntimeException("Exception calling getInstanceExclusions()", e);
}
} else {
allServers.addAll(validActiveServers);
allServers.addAll(validPassiveServers);
}
serverCommands = new HashMap<>();
for (String server : allServers) {
String rawServerCommand;
String[] serverCommand;
ClassVars serverClassVars;
serverClassVars = getServerClassVars(server, hostGroupTable, activeHostGroupNames, passiveNodeHostGroupNames, hostGroupToClassVars);
if (serverClassVars != null) {
switch(command) {
case StartNodes:
rawServerCommand = createStartCommand(dhtConfig, serverClassVars, options);
break;
case StopNodes:
rawServerCommand = createStopCommand(dhtConfig, serverClassVars);
break;
case ClearInstanceExclusionsData:
case ClearData:
rawServerCommand = createClearDataCommand(dhtConfig, serverClassVars);
break;
case StartSKFS:
if (options.destructive) {
throw new RuntimeException("Destructive StartSKFS not supported");
}
case CheckSKFS:
rawServerCommand = createCheckSKFSCommand(dhtConfig, serverClassVars);
break;
case StopSKFS:
rawServerCommand = createStopSKFSCommand(dhtConfig, serverClassVars);
break;
default:
throw new RuntimeException("Unsupported command: " + command);
}
serverCommand = rawServerCommand.split("\\s+");
serverCommands.put(server, serverCommand);
}
}
return serverCommands;
}
Aggregations