use of org.onosproject.cluster.NodeId in project onos by opennetworkinglab.
the class DeviceManager method updateMastershipFor.
/**
* Update the mastership for this device. If there is a node able
* to reach the device and this node is the master move the
* mastership to the next node still connected to this device.
* If the current node is a backup, it demotes itself to the bottom
* of the candidates list
*
* @param deviceId the device for which we have to update the mastership
* @return the NodeId of any node that can reach the device, or null if
* none of the ONOS instances can reach the device
*/
private NodeId updateMastershipFor(DeviceId deviceId) {
Map<NodeId, CompletableFuture<Boolean>> probes = Maps.newHashMap();
// Request a probe only if the node is ready
for (ControllerNode onosNode : clusterService.getNodes()) {
if (!clusterService.getState(onosNode.id()).isReady() || localNodeId.equals(onosNode.id())) {
continue;
}
probes.put(onosNode.id(), communicationService.sendAndReceive(deviceId, PROBE_SUBJECT, SERIALIZER::encode, SERIALIZER::decode, onosNode.id()));
}
// Returns the first node able to reach the device
// FIXME [SDFAB-935] optimize by looking at the MastershipInfo
boolean isReachable;
NodeId nextMaster = null;
// FIXME Should we expose timeout? Understand if there is need to signal to the caller
for (Map.Entry<NodeId, CompletableFuture<Boolean>> probe : probes.entrySet()) {
isReachable = Tools.futureGetOrElse(probe.getValue(), PROBE_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS, Boolean.FALSE);
if (isReachable) {
nextMaster = probe.getKey();
}
}
// FIXME [SDFAB-935] optimize demote by looking at the MastershipInfo;
if (nextMaster != null) {
log.info("Device {} is still connected to {}", deviceId, nextMaster);
MastershipRole myRole = mastershipService.getLocalRole(deviceId);
if (myRole == MASTER) {
log.info("Handing over the mastership of {} to next master {}", deviceId, nextMaster);
mastershipAdminService.setRole(nextMaster, deviceId, MASTER);
// Do not demote here because setRole can return before the mastership has been passed.
// Current implementation promotes first the nextMaster as top of candidate list and then
// transfer the leadership. We can use the BACKUP events to do demote or leverage periodic
// checks.
} else if (myRole == STANDBY) {
log.info("Demote current instance to the bottom of the candidates list for {}", deviceId);
mastershipAdminService.demote(localNodeId, deviceId);
} else {
log.debug("No valid role for {}", deviceId);
}
}
return nextMaster;
}
use of org.onosproject.cluster.NodeId in project onos by opennetworkinglab.
the class ControllerNodeCodec method decode.
@Override
public ControllerNode decode(ObjectNode json, CodecContext context) {
checkNotNull(json, "JSON cannot be null");
String ip = json.path("ip").asText();
return new DefaultControllerNode(new NodeId(json.path("id").asText(ip)), IpAddress.valueOf(ip), json.path("tcpPort").asInt(DEFAULT_PORT));
}
use of org.onosproject.cluster.NodeId in project onos by opennetworkinglab.
the class GossipDeviceStore method handleAdvertisement.
/**
* Responds to anti-entropy advertisement message.
* <p>
* Notify sender about out-dated information using regular replication message.
* Send back advertisement to sender if not in sync.
*
* @param advertisement to respond to
*/
private void handleAdvertisement(DeviceAntiEntropyAdvertisement advertisement) {
/*
* NOTE that when an instance rejoins the cluster, it will generate
* device events and send to the local apps through the delegate. This
* approach might be not the best if the apps are not enough robust or
* if there is no proper coordination in the cluster. Also, note that
* any ECMap will act on the same way during the bootstrap process
*/
final NodeId sender = advertisement.sender();
Map<DeviceFragmentId, Timestamp> devAds = new HashMap<>(advertisement.deviceFingerPrints());
Map<PortFragmentId, Timestamp> portAds = new HashMap<>(advertisement.ports());
Map<DeviceId, Timestamp> offlineAds = new HashMap<>(advertisement.offline());
// Fragments to request
Collection<DeviceFragmentId> reqDevices = new ArrayList<>();
Collection<PortFragmentId> reqPorts = new ArrayList<>();
for (Entry<DeviceId, Map<ProviderId, DeviceDescriptions>> de : deviceDescs.entrySet()) {
final DeviceId deviceId = de.getKey();
final Map<ProviderId, DeviceDescriptions> lDevice = de.getValue();
synchronized (lDevice) {
// latestTimestamp across provider
// Note: can be null initially
Timestamp localLatest = offline.get(deviceId);
// handle device Ads
for (Entry<ProviderId, DeviceDescriptions> prov : lDevice.entrySet()) {
final ProviderId provId = prov.getKey();
final DeviceDescriptions lDeviceDescs = prov.getValue();
final DeviceFragmentId devFragId = new DeviceFragmentId(deviceId, provId);
Timestamped<DeviceDescription> lProvDevice = lDeviceDescs.getDeviceDesc();
Timestamp advDevTimestamp = devAds.get(devFragId);
if (advDevTimestamp == null || lProvDevice.isNewerThan(advDevTimestamp)) {
// remote does not have it or outdated, suggest
log.trace("send to {} device update {} for {}", sender, lProvDevice, deviceId);
notifyPeer(sender, new InternalDeviceEvent(provId, deviceId, lProvDevice));
} else if (!lProvDevice.timestamp().equals(advDevTimestamp)) {
// local is outdated, request
log.trace("need update {} < {} for device {} from {}", lProvDevice.timestamp(), advDevTimestamp, deviceId, sender);
reqDevices.add(devFragId);
}
// handle port Ads
for (Entry<PortNumber, Timestamped<PortDescription>> pe : lDeviceDescs.getPortDescs().entrySet()) {
final PortNumber num = pe.getKey();
final Timestamped<PortDescription> lPort = pe.getValue();
final PortFragmentId portFragId = new PortFragmentId(deviceId, provId, num);
Timestamp advPortTimestamp = portAds.get(portFragId);
if (advPortTimestamp == null || lPort.isNewerThan(advPortTimestamp)) {
// remote does not have it or outdated, suggest
log.trace("send to {} port update {} for {}/{}", sender, lPort, deviceId, num);
notifyPeer(sender, new InternalPortStatusEvent(provId, deviceId, lPort));
} else if (!lPort.timestamp().equals(advPortTimestamp)) {
// local is outdated, request
log.trace("need update {} < {} for port {} from {}", lPort.timestamp(), advPortTimestamp, num, sender);
reqPorts.add(portFragId);
}
// remove port Ad already processed
portAds.remove(portFragId);
}
// end local port loop
// remove device Ad already processed
devAds.remove(devFragId);
// find latest and update
final Timestamp providerLatest = lDeviceDescs.getLatestTimestamp();
if (localLatest == null || providerLatest.compareTo(localLatest) > 0) {
localLatest = providerLatest;
}
}
// end local provider loop
// checking if remote timestamp is more recent.
Timestamp rOffline = offlineAds.get(deviceId);
if (localLatest == null || (rOffline != null && rOffline.compareTo(localLatest) > 0)) {
// remote offline timestamp suggests that the
// device is off-line
log.trace("remote offline timestamp from {} suggests that the device {} is off-line", sender, deviceId);
markOfflineInternal(deviceId, rOffline);
}
Timestamp lOffline = offline.get(deviceId);
if (lOffline != null && rOffline == null) {
// locally offline, but remote is online, suggest offline
log.trace("suggest to {} sthat the device {} is off-line", sender, deviceId);
notifyPeer(sender, new InternalDeviceStatusChangeEvent(deviceId, lOffline, false));
}
// remove device offline Ad already processed
offlineAds.remove(deviceId);
}
// end local device loop
}
// device lock
// If there is any Ads left, request them
log.trace("Ads left {}, {}", devAds, portAds);
reqDevices.addAll(devAds.keySet());
reqPorts.addAll(portAds.keySet());
if (reqDevices.isEmpty() && reqPorts.isEmpty()) {
log.trace("Nothing to request to remote peer {}", sender);
return;
}
log.debug("Need to sync {} {}", reqDevices, reqPorts);
// 2-way Anti-Entropy for now
try {
unicastMessage(sender, DEVICE_ADVERTISE, createAdvertisement());
} catch (IOException e) {
log.error("Failed to send response advertisement to " + sender, e);
}
// Sketch of 3-way Anti-Entropy
// DeviceAntiEntropyRequest request = new DeviceAntiEntropyRequest(self, reqDevices, reqPorts);
// ClusterMessage message = new ClusterMessage(
// clusterService.getLocalNode().id(),
// GossipDeviceStoreMessageSubjects.DEVICE_REQUEST,
// SERIALIZER.encode(request));
//
// try {
// clusterCommunicator.unicast(message, advertisement.sender());
// } catch (IOException e) {
// log.error("Failed to send advertisement reply to "
// + advertisement.sender(), e);
// }
}
use of org.onosproject.cluster.NodeId in project onos by opennetworkinglab.
the class GossipDeviceStore method removeDevice.
@Override
public synchronized DeviceEvent removeDevice(DeviceId deviceId) {
final NodeId myId = clusterService.getLocalNode().id();
NodeId master = mastershipService.getMasterFor(deviceId);
// if there exist a master, forward
// if there is no master, try to become one and process
boolean relinquishAtEnd = false;
if (master == null) {
final MastershipRole myRole = mastershipService.getLocalRole(deviceId);
if (myRole != MastershipRole.NONE) {
relinquishAtEnd = true;
}
log.debug("Temporarily requesting role for {} to remove", deviceId);
if (mastershipService.requestRoleFor(deviceId).join() == MastershipRole.MASTER) {
master = myId;
}
}
boolean isMaster = myId.equals(master);
// If this node is not the master, forward the request.
if (!isMaster) {
log.debug("{} has control of {}, forwarding remove request", master, deviceId);
// TODO check unicast return value
clusterCommunicator.unicast(deviceId, DEVICE_REMOVE_REQ, SERIALIZER::encode, master);
/* error log:
log.error("Failed to forward {} remove request to {}", deviceId, master, e);
*/
}
// If this node is the master, get a timestamp. Otherwise, default to the current device timestamp.
Timestamp timestamp = isMaster ? deviceClockService.getTimestamp(deviceId) : null;
DeviceEvent event = removeDeviceInternal(deviceId, timestamp);
// If this node is the master, update peers.
if (isMaster && event != null) {
log.debug("Notifying peers of a device removed topology event for deviceId: {}", deviceId);
notifyPeers(new InternalDeviceRemovedEvent(deviceId, timestamp));
}
notifyDelegateIfNotNull(event);
// Relinquish mastership if acquired to remove the device.
if (relinquishAtEnd) {
log.debug("Relinquishing temporary role acquired for {}", deviceId);
mastershipService.relinquishMastership(deviceId);
}
return event;
}
use of org.onosproject.cluster.NodeId in project onos by opennetworkinglab.
the class GossipDeviceStore method createOrUpdateDevice.
@Override
public synchronized DeviceEvent createOrUpdateDevice(ProviderId providerId, DeviceId deviceId, DeviceDescription deviceDescription) {
NodeId localNode = clusterService.getLocalNode().id();
NodeId deviceNode = mastershipService.getMasterFor(deviceId);
boolean isMaster = localNode.equals(deviceNode);
// Process device update only if we're the master,
// otherwise signal the actual master.
DeviceEvent deviceEvent = null;
// If this node is the master for the device, acquire a new timestamp. Otherwise,
// use a 0,0 or tombstone timestamp to create the device if it doesn't already exist.
Timestamp newTimestamp;
try {
newTimestamp = isMaster ? deviceClockService.getTimestamp(deviceId) : removalRequest.getOrDefault(deviceId, DEFAULT_TIMESTAMP);
} catch (IllegalStateException e) {
newTimestamp = removalRequest.getOrDefault(deviceId, DEFAULT_TIMESTAMP);
isMaster = false;
}
final Timestamped<DeviceDescription> deltaDesc = new Timestamped<>(deviceDescription, newTimestamp);
final Timestamped<DeviceDescription> mergedDesc;
final Map<ProviderId, DeviceDescriptions> device = getOrCreateDeviceDescriptionsMap(deviceId);
synchronized (device) {
deviceEvent = createOrUpdateDeviceInternal(providerId, deviceId, deltaDesc);
if (deviceEvent == null) {
return null;
}
mergedDesc = device.get(providerId).getDeviceDesc();
}
// If this node is the master for the device, update peers.
if (isMaster) {
log.debug("Notifying peers of a device update topology event for providerId: {} and deviceId: {}", providerId, deviceId);
notifyPeers(new InternalDeviceEvent(providerId, deviceId, mergedDesc));
}
notifyDelegateIfNotNull(deviceEvent);
return deviceEvent;
}
Aggregations