use of com.yahoo.vdslib.state.NodeState in project vespa by vespa-engine.
the class RpcServer method handleRpcRequests.
public boolean handleRpcRequests(ContentCluster cluster, ClusterState systemState, NodeStateOrHostInfoChangeHandler changeListener, NodeAddedOrRemovedListener addedListener) {
boolean handledAnyRequests = false;
if (!isConnected()) {
long time = timer.getCurrentTimeInMillis();
try {
connect();
} catch (ListenFailedException e) {
if (!e.getMessage().equals(lastConnectError) || time - lastConnectErrorTime > 60 * 1000) {
lastConnectError = e.getMessage();
lastConnectErrorTime = time;
log.log(LogLevel.WARNING, "Failed to bind RPC server to port " + port + ": " + e.getMessage());
}
} catch (Exception e) {
if (!e.getMessage().equals(lastConnectError) || time - lastConnectErrorTime > 60 * 1000) {
lastConnectError = e.getMessage();
lastConnectErrorTime = time;
log.log(LogLevel.WARNING, "Failed to initailize RPC server socket: " + e.getMessage());
}
}
}
for (int j = 0; j < 10; ++j) {
// Max perform 10 RPC requests per cycle.
Request req;
synchronized (monitor) {
if (rpcRequests.isEmpty())
break;
Iterator<Request> it = rpcRequests.iterator();
req = it.next();
it.remove();
handledAnyRequests = true;
}
try {
if (req.methodName().equals("getMaster")) {
log.log(LogLevel.DEBUG, "Resolving RPC getMaster request");
Integer master = masterHandler.getMaster();
String masterReason = masterHandler.getMasterReason();
req.returnValues().add(new Int32Value(master == null ? -1 : master));
req.returnValues().add(new StringValue(masterReason == null ? "No reason given" : masterReason));
req.returnRequest();
continue;
}
if (!masterHandler.isMaster()) {
throw new IllegalStateException("Refusing to answer RPC calls as we are not the master fleetcontroller.");
}
if (req.methodName().equals("getNodeList")) {
log.log(LogLevel.DEBUG, "Resolving RPC getNodeList request");
List<String> slobrok = new ArrayList<String>();
List<String> rpc = new ArrayList<String>();
for (NodeInfo node : cluster.getNodeInfo()) {
String s1 = node.getSlobrokAddress();
String s2 = node.getRpcAddress();
assert (s1 != null);
slobrok.add(s1);
rpc.add(s2 == null ? "" : s2);
}
req.returnValues().add(new StringArray(slobrok.toArray(new String[slobrok.size()])));
req.returnValues().add(new StringArray(rpc.toArray(new String[rpc.size()])));
req.returnRequest();
} else if (req.methodName().equals("getSystemState")) {
log.log(LogLevel.DEBUG, "Resolving RPC getSystemState request");
req.returnValues().add(new StringValue(""));
req.returnValues().add(new StringValue(systemState.toString(true)));
req.returnRequest();
} else if (req.methodName().equals("getNodeState")) {
log.log(LogLevel.DEBUG, "Resolving RPC getNodeState request");
NodeType nodeType = NodeType.get(req.parameters().get(0).asString());
int nodeIndex = req.parameters().get(1).asInt32();
Node node = new Node(nodeType, nodeIndex);
// First parameter is current state in system state
NodeState ns = systemState.getNodeState(node);
req.returnValues().add(new StringValue(systemState.getNodeState(node).serialize()));
// Second parameter is state node is reporting
NodeInfo nodeInfo = cluster.getNodeInfo(node);
if (nodeInfo == null)
throw new RuntimeException("No node " + node + " exists in cluster " + cluster.getName());
NodeState fromNode = nodeInfo.getReportedState();
req.returnValues().add(new StringValue(fromNode == null ? "unknown" : fromNode.serialize()));
// Third parameter is state node has been requested to be in
req.returnValues().add(new StringValue(nodeInfo.getWantedState().serialize()));
// Fourth parameter is RPC address of node
req.returnValues().add(new StringValue(nodeInfo.getRpcAddress() == null ? "" : nodeInfo.getRpcAddress()));
req.returnRequest();
} else if (req.methodName().equals("setNodeState")) {
String slobrokAddress = req.parameters().get(0).asString();
int lastSlash = slobrokAddress.lastIndexOf('/');
int nextButLastSlash = slobrokAddress.lastIndexOf('/', lastSlash - 1);
if (lastSlash == -1 || nextButLastSlash == -1) {
throw new IllegalStateException("Invalid slobrok address '" + slobrokAddress + "'.");
}
NodeType nodeType = NodeType.get(slobrokAddress.substring(nextButLastSlash + 1, lastSlash));
Integer nodeIndex = Integer.valueOf(slobrokAddress.substring(lastSlash + 1));
NodeInfo node = cluster.getNodeInfo(new Node(nodeType, nodeIndex));
if (node == null)
throw new IllegalStateException("Cannot set wanted state of node " + new Node(nodeType, nodeIndex) + ". Index does not correspond to a configured node.");
NodeState nodeState = NodeState.deserialize(nodeType, req.parameters().get(1).asString());
if (nodeState.getDescription().equals("") && !nodeState.getState().equals(State.UP) && !nodeState.getState().equals(State.RETIRED)) {
nodeState.setDescription("Set by remote RPC client");
}
NodeState oldState = node.getUserWantedState();
String message = (nodeState.getState().equals(State.UP) ? "Clearing wanted nodeState for node " + node : "New wantedstate '" + nodeState.toString() + "' stored for node " + node);
if (!oldState.equals(nodeState) || !oldState.getDescription().equals(nodeState.getDescription())) {
if (!nodeState.getState().validWantedNodeState(nodeType)) {
throw new IllegalStateException("State " + nodeState.getState() + " can not be used as wanted state for node of type " + nodeType);
}
node.setWantedState(nodeState);
changeListener.handleNewWantedNodeState(node, nodeState);
} else {
message = "Node " + node + " already had wanted state " + nodeState.toString();
log.log(LogLevel.DEBUG, message);
}
req.returnValues().add(new StringValue(message));
req.returnRequest();
if (nodeState.getState() == State.UP && node.getPrematureCrashCount() > 0) {
log.log(LogLevel.INFO, "Clearing premature crash count of " + node.getPrematureCrashCount() + " as wanted state was set to up");
node.setPrematureCrashCount(0);
}
}
} catch (Exception e) {
if (log.isLoggable(LogLevel.DEBUG)) {
StringWriter sw = new StringWriter();
e.printStackTrace(new PrintWriter(sw));
log.log(LogLevel.DEBUG, "Failed RPC Request: " + sw);
}
String errorMsg = e.getMessage();
if (errorMsg == null) {
errorMsg = e.toString();
}
req.setError(ErrorCode.METHOD_FAILED, errorMsg);
req.returnRequest();
}
}
return handledAnyRequests;
}
use of com.yahoo.vdslib.state.NodeState in project vespa by vespa-engine.
the class NodeStateGatherer method sendMessages.
/**
* Sends state requests to nodes that does not have one pending and is due
* for another attempt.
*/
public boolean sendMessages(ContentCluster cluster, Communicator communicator, NodeStateOrHostInfoChangeHandler listener) {
boolean sentAnyMessages = false;
long currentTime = timer.getCurrentTimeInMillis();
for (NodeInfo info : cluster.getNodeInfo()) {
Long requestTime = info.getLatestNodeStateRequestTime();
// pending request
if (requestTime != null && (currentTime - requestTime < nodeStateRequestTimeoutMS))
continue;
// too early
if (info.getTimeForNextStateRequestAttempt() > currentTime)
continue;
if (info.getRpcAddress() == null || info.isRpcAddressOutdated()) {
// Cannot query state of node without RPC address
log.log(LogLevel.DEBUG, "Not sending getNodeState request to node " + info.getNode() + ": Not in slobrok");
NodeState reportedState = info.getReportedState().clone();
if ((!reportedState.getState().equals(State.DOWN) && currentTime - info.getRpcAddressOutdatedTimestamp() > maxSlobrokDisconnectGracePeriod) || // Don't wait for grace period if we expect node to be stopping
reportedState.getState().equals(State.STOPPING)) {
log.log(LogLevel.DEBUG, "Setting reported state to DOWN " + (reportedState.getState().equals(State.STOPPING) ? "as node completed stopping." : "as node has been out of slobrok longer than " + maxSlobrokDisconnectGracePeriod + "."));
if (reportedState.getState().oneOf("iur") || !reportedState.hasDescription()) {
StringBuilder sb = new StringBuilder().append("Set node down as it has been out of slobrok for ").append(currentTime - info.getRpcAddressOutdatedTimestamp()).append(" ms which is more than the max limit of ").append(maxSlobrokDisconnectGracePeriod).append(" ms.");
reportedState.setDescription(sb.toString());
}
reportedState.setState(State.DOWN);
listener.handleNewNodeState(info, reportedState.clone());
}
// Must reset it to null to get connection attempts counted
info.setReportedState(reportedState, currentTime);
continue;
}
communicator.getNodeState(info, waiter);
sentAnyMessages = true;
}
return sentAnyMessages;
}
use of com.yahoo.vdslib.state.NodeState in project vespa by vespa-engine.
the class NodeStateGatherer method handleError.
private NodeState handleError(GetNodeStateRequest req, NodeInfo info, long currentTime) {
String prefix = "Failed get node state request: ";
NodeState newState = new NodeState(info.getNode().getType(), State.DOWN);
if (req.getReply().getReturnCode() == ErrorCode.TIMEOUT) {
String msg = "RPC timeout";
if (info.getReportedState().getState().oneOf("ui")) {
eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + "RPC timeout talking to node.", NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO);
} else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
}
newState.setDescription(msg);
} else if (req.getReply().getReturnCode() == ErrorCode.CONNECTION) {
Target target = info.lastRequestInfoConnection;
Exception reason = (target == null ? null : target.getConnectionLostReason());
if (reason != null) {
String msg = reason.getMessage();
if (msg == null)
msg = "(null)";
newState.setDescription(msg);
if (msg.equals("Connection refused")) {
msg = "Connection error: Connection refused";
if (info.getReportedState().getState().oneOf("ui")) {
eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO);
} else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
}
newState.setState(State.DOWN);
} else if (msg.equals("jrt: Connection closed by peer") || msg.equals("Connection reset by peer")) {
msg = "Connection error: Closed at other end. (Node or switch likely shut down)";
if (info.isRpcAddressOutdated()) {
msg += " Node is no longer in slobrok.";
}
if (info.getReportedState().getState().oneOf("ui")) {
eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO);
} else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
}
newState.setState(State.DOWN).setDescription(msg);
} else if (msg.equals("Connection timed out")) {
if (info.getReportedState().getState().oneOf("ui")) {
msg = "Connection error: Timeout";
eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO);
} else {
log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
}
} else {
msg = "Connection error: " + reason;
if (info.getReportedState().getState().oneOf("ui")) {
eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING);
} else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
}
newState.setDescription(msg);
}
} else {
String msg = "Connection error: Unexpected error with no reason set. Assuming it is a network issue: " + req.getReply().getReturnCode() + ": " + req.getReply().getReturnMessage();
if (info.getReportedState().getState().oneOf("ui")) {
eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING);
} else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
}
newState.setDescription(msg);
}
} else if (req.getReply().getReturnCode() == Communicator.TRANSIENT_ERROR) {
return null;
} else if (req.getReply().getReturnCode() == ErrorCode.NO_SUCH_METHOD) {
String msg = "no such RPC method error";
if (info.getReportedState().getState().oneOf("ui")) {
eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING);
} else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
}
newState.setState(State.DOWN).setDescription(msg + ": get node state");
} else if (req.getReply().getReturnCode() == 75004) {
String msg = "Node refused to answer RPC request and is likely stopping: " + req.getReply().getReturnMessage();
// The node is shutting down and is not accepting requests from anyone
if (info.getReportedState().getState().equals(State.STOPPING)) {
log.log(LogLevel.DEBUG, "Failed to get node state from " + info + " because it is still shutting down.");
} else {
if (info.getReportedState().getState().oneOf("ui")) {
eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.INFO);
} else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
}
}
newState.setState(State.STOPPING).setDescription(msg);
} else {
String msg = "Got unexpected error, assumed to be node issue " + req.getReply().getReturnCode() + ": " + req.getReply().getReturnMessage();
if (info.getReportedState().getState().oneOf("ui")) {
eventLog.addNodeOnlyEvent(NodeEvent.forBaseline(info, prefix + msg, NodeEvent.Type.REPORTED, currentTime), LogLevel.WARNING);
} else if (!info.getReportedState().hasDescription() || !info.getReportedState().getDescription().equals(msg)) {
log.log(LogLevel.DEBUG, "Failed to talk to node " + info + ": " + req.getReply().getReturnCode() + " " + req.getReply().getReturnMessage() + ": " + msg);
}
newState.setState(State.DOWN).setDescription(msg);
}
return newState;
}
use of com.yahoo.vdslib.state.NodeState in project vespa by vespa-engine.
the class PartitionStateRequest method calculateResult.
@Override
public Response.PartitionResponse calculateResult(RemoteClusterControllerTask.Context context) throws StateRestApiException {
Response.PartitionResponse result = new Response.PartitionResponse();
if (verboseReports.contains(VerboseReport.STATISTICS)) {
fillInMetrics(context.cluster.getNodeInfo(id.getNode()).getHostInfo().getMetrics(), result);
}
NodeState nodeState = context.currentConsolidatedState.getNodeState(id.getNode());
DiskState diskState = nodeState.getDiskState(id.getPartitionIndex());
result.addState("generated", new Response.UnitStateImpl(diskState));
return result;
}
use of com.yahoo.vdslib.state.NodeState in project vespa by vespa-engine.
the class SetNodeStateRequest method setDistributorWantedState.
/**
* Set the wanted state on the distributor to something appropriate given the storage is being
* set to (or is equal to) newStorageWantedState.
*/
private static void setDistributorWantedState(ContentCluster cluster, int index, NodeState newStorageWantedState, NodeStateOrHostInfoChangeHandler stateListener) {
Node distributorNode = new Node(NodeType.DISTRIBUTOR, index);
NodeInfo nodeInfo = cluster.getNodeInfo(distributorNode);
if (nodeInfo == null) {
throw new IllegalStateException("Missing distributor at index " + distributorNode.getIndex());
}
State newState;
switch(newStorageWantedState.getState()) {
case MAINTENANCE:
newState = State.DOWN;
break;
case RETIRED:
newState = State.UP;
break;
default:
newState = newStorageWantedState.getState();
if (!newState.validWantedNodeState(distributorNode.getType())) {
throw new IllegalStateException("Distributor cannot be set to wanted state " + newState);
}
}
NodeState newWantedState = new NodeState(distributorNode.getType(), newState);
newWantedState.setDescription(newStorageWantedState.getDescription());
NodeState currentWantedState = nodeInfo.getUserWantedState();
if (newWantedState.getState() != currentWantedState.getState() || !Objects.equals(newWantedState.getDescription(), currentWantedState.getDescription())) {
setNewWantedState(nodeInfo, newWantedState, stateListener);
}
}
Aggregations