Search in sources :

Example 16 with NodeId

use of org.onosproject.cluster.NodeId in project onos by opennetworkinglab.

the class DeviceManager method updateMastershipFor.

/**
 * Update the mastership for this device. If there is a node able
 * to reach the device and this node is the master move the
 * mastership to the next node still connected to this device.
 * If the current node is a backup, it demotes itself to the bottom
 * of the candidates list
 *
 * @param deviceId the device for which we have to update the mastership
 * @return the NodeId of any node that can reach the device, or null if
 * none of the ONOS instances can reach the device
 */
private NodeId updateMastershipFor(DeviceId deviceId) {
    Map<NodeId, CompletableFuture<Boolean>> probes = Maps.newHashMap();
    // Request a probe only if the node is ready
    for (ControllerNode onosNode : clusterService.getNodes()) {
        if (!clusterService.getState(onosNode.id()).isReady() || localNodeId.equals(onosNode.id())) {
            continue;
        }
        probes.put(onosNode.id(), communicationService.sendAndReceive(deviceId, PROBE_SUBJECT, SERIALIZER::encode, SERIALIZER::decode, onosNode.id()));
    }
    // Returns the first node able to reach the device
    // FIXME [SDFAB-935] optimize by looking at the MastershipInfo
    boolean isReachable;
    NodeId nextMaster = null;
    // FIXME Should we expose timeout? Understand if there is need to signal to the caller
    for (Map.Entry<NodeId, CompletableFuture<Boolean>> probe : probes.entrySet()) {
        isReachable = Tools.futureGetOrElse(probe.getValue(), PROBE_TIMEOUT_MILLIS, TimeUnit.MILLISECONDS, Boolean.FALSE);
        if (isReachable) {
            nextMaster = probe.getKey();
        }
    }
    // FIXME [SDFAB-935] optimize demote by looking at the MastershipInfo;
    if (nextMaster != null) {
        log.info("Device {} is still connected to {}", deviceId, nextMaster);
        MastershipRole myRole = mastershipService.getLocalRole(deviceId);
        if (myRole == MASTER) {
            log.info("Handing over the mastership of {} to next master {}", deviceId, nextMaster);
            mastershipAdminService.setRole(nextMaster, deviceId, MASTER);
        // Do not demote here because setRole can return before the mastership has been passed.
        // Current implementation promotes first the nextMaster as top of candidate list and then
        // transfer the leadership. We can use the BACKUP events to do demote or leverage periodic
        // checks.
        } else if (myRole == STANDBY) {
            log.info("Demote current instance to the bottom of the candidates list for {}", deviceId);
            mastershipAdminService.demote(localNodeId, deviceId);
        } else {
            log.debug("No valid role for {}", deviceId);
        }
    }
    return nextMaster;
}
Also used : CompletableFuture(java.util.concurrent.CompletableFuture) NodeId(org.onosproject.cluster.NodeId) ControllerNode(org.onosproject.cluster.ControllerNode) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) MastershipRole(org.onosproject.net.MastershipRole)

Example 17 with NodeId

use of org.onosproject.cluster.NodeId in project onos by opennetworkinglab.

the class ControllerNodeCodec method decode.

@Override
public ControllerNode decode(ObjectNode json, CodecContext context) {
    checkNotNull(json, "JSON cannot be null");
    String ip = json.path("ip").asText();
    return new DefaultControllerNode(new NodeId(json.path("id").asText(ip)), IpAddress.valueOf(ip), json.path("tcpPort").asInt(DEFAULT_PORT));
}
Also used : NodeId(org.onosproject.cluster.NodeId) DefaultControllerNode(org.onosproject.cluster.DefaultControllerNode)

Example 18 with NodeId

use of org.onosproject.cluster.NodeId in project onos by opennetworkinglab.

the class GossipDeviceStore method handleAdvertisement.

/**
 * Responds to anti-entropy advertisement message.
 * <p>
 * Notify sender about out-dated information using regular replication message.
 * Send back advertisement to sender if not in sync.
 *
 * @param advertisement to respond to
 */
private void handleAdvertisement(DeviceAntiEntropyAdvertisement advertisement) {
    /*
         * NOTE that when an instance rejoins the cluster, it will generate
         * device events and send to the local apps through the delegate. This
         * approach might be not the best if the apps are not enough robust or
         * if there is no proper coordination in the cluster. Also, note that
         * any ECMap will act on the same way during the bootstrap process
         */
    final NodeId sender = advertisement.sender();
    Map<DeviceFragmentId, Timestamp> devAds = new HashMap<>(advertisement.deviceFingerPrints());
    Map<PortFragmentId, Timestamp> portAds = new HashMap<>(advertisement.ports());
    Map<DeviceId, Timestamp> offlineAds = new HashMap<>(advertisement.offline());
    // Fragments to request
    Collection<DeviceFragmentId> reqDevices = new ArrayList<>();
    Collection<PortFragmentId> reqPorts = new ArrayList<>();
    for (Entry<DeviceId, Map<ProviderId, DeviceDescriptions>> de : deviceDescs.entrySet()) {
        final DeviceId deviceId = de.getKey();
        final Map<ProviderId, DeviceDescriptions> lDevice = de.getValue();
        synchronized (lDevice) {
            // latestTimestamp across provider
            // Note: can be null initially
            Timestamp localLatest = offline.get(deviceId);
            // handle device Ads
            for (Entry<ProviderId, DeviceDescriptions> prov : lDevice.entrySet()) {
                final ProviderId provId = prov.getKey();
                final DeviceDescriptions lDeviceDescs = prov.getValue();
                final DeviceFragmentId devFragId = new DeviceFragmentId(deviceId, provId);
                Timestamped<DeviceDescription> lProvDevice = lDeviceDescs.getDeviceDesc();
                Timestamp advDevTimestamp = devAds.get(devFragId);
                if (advDevTimestamp == null || lProvDevice.isNewerThan(advDevTimestamp)) {
                    // remote does not have it or outdated, suggest
                    log.trace("send to {} device update {} for {}", sender, lProvDevice, deviceId);
                    notifyPeer(sender, new InternalDeviceEvent(provId, deviceId, lProvDevice));
                } else if (!lProvDevice.timestamp().equals(advDevTimestamp)) {
                    // local is outdated, request
                    log.trace("need update {} < {} for device {} from {}", lProvDevice.timestamp(), advDevTimestamp, deviceId, sender);
                    reqDevices.add(devFragId);
                }
                // handle port Ads
                for (Entry<PortNumber, Timestamped<PortDescription>> pe : lDeviceDescs.getPortDescs().entrySet()) {
                    final PortNumber num = pe.getKey();
                    final Timestamped<PortDescription> lPort = pe.getValue();
                    final PortFragmentId portFragId = new PortFragmentId(deviceId, provId, num);
                    Timestamp advPortTimestamp = portAds.get(portFragId);
                    if (advPortTimestamp == null || lPort.isNewerThan(advPortTimestamp)) {
                        // remote does not have it or outdated, suggest
                        log.trace("send to {} port update {} for {}/{}", sender, lPort, deviceId, num);
                        notifyPeer(sender, new InternalPortStatusEvent(provId, deviceId, lPort));
                    } else if (!lPort.timestamp().equals(advPortTimestamp)) {
                        // local is outdated, request
                        log.trace("need update {} < {} for port {} from {}", lPort.timestamp(), advPortTimestamp, num, sender);
                        reqPorts.add(portFragId);
                    }
                    // remove port Ad already processed
                    portAds.remove(portFragId);
                }
                // end local port loop
                // remove device Ad already processed
                devAds.remove(devFragId);
                // find latest and update
                final Timestamp providerLatest = lDeviceDescs.getLatestTimestamp();
                if (localLatest == null || providerLatest.compareTo(localLatest) > 0) {
                    localLatest = providerLatest;
                }
            }
            // end local provider loop
            // checking if remote timestamp is more recent.
            Timestamp rOffline = offlineAds.get(deviceId);
            if (localLatest == null || (rOffline != null && rOffline.compareTo(localLatest) > 0)) {
                // remote offline timestamp suggests that the
                // device is off-line
                log.trace("remote offline timestamp from {} suggests that the device {} is off-line", sender, deviceId);
                markOfflineInternal(deviceId, rOffline);
            }
            Timestamp lOffline = offline.get(deviceId);
            if (lOffline != null && rOffline == null) {
                // locally offline, but remote is online, suggest offline
                log.trace("suggest to {} sthat the device {} is off-line", sender, deviceId);
                notifyPeer(sender, new InternalDeviceStatusChangeEvent(deviceId, lOffline, false));
            }
            // remove device offline Ad already processed
            offlineAds.remove(deviceId);
        }
    // end local device loop
    }
    // device lock
    // If there is any Ads left, request them
    log.trace("Ads left {}, {}", devAds, portAds);
    reqDevices.addAll(devAds.keySet());
    reqPorts.addAll(portAds.keySet());
    if (reqDevices.isEmpty() && reqPorts.isEmpty()) {
        log.trace("Nothing to request to remote peer {}", sender);
        return;
    }
    log.debug("Need to sync {} {}", reqDevices, reqPorts);
    // 2-way Anti-Entropy for now
    try {
        unicastMessage(sender, DEVICE_ADVERTISE, createAdvertisement());
    } catch (IOException e) {
        log.error("Failed to send response advertisement to " + sender, e);
    }
// Sketch of 3-way Anti-Entropy
// DeviceAntiEntropyRequest request = new DeviceAntiEntropyRequest(self, reqDevices, reqPorts);
// ClusterMessage message = new ClusterMessage(
// clusterService.getLocalNode().id(),
// GossipDeviceStoreMessageSubjects.DEVICE_REQUEST,
// SERIALIZER.encode(request));
// 
// try {
// clusterCommunicator.unicast(message, advertisement.sender());
// } catch (IOException e) {
// log.error("Failed to send advertisement reply to "
// + advertisement.sender(), e);
// }
}
Also used : ProviderId(org.onosproject.net.provider.ProviderId) DeviceDescription(org.onosproject.net.device.DeviceDescription) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) DeviceId(org.onosproject.net.DeviceId) ArrayList(java.util.ArrayList) WallClockTimestamp(org.onosproject.store.service.WallClockTimestamp) Timestamp(org.onosproject.store.Timestamp) MastershipBasedTimestamp(org.onosproject.store.impl.MastershipBasedTimestamp) MultiValuedTimestamp(org.onosproject.store.service.MultiValuedTimestamp) Timestamped(org.onosproject.store.impl.Timestamped) PortDescription(org.onosproject.net.device.PortDescription) IOException(java.io.IOException) ControllerNodeToNodeId.toNodeId(org.onosproject.cluster.ControllerNodeToNodeId.toNodeId) NodeId(org.onosproject.cluster.NodeId) PortNumber(org.onosproject.net.PortNumber) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) EventuallyConsistentMap(org.onosproject.store.service.EventuallyConsistentMap)

Example 19 with NodeId

use of org.onosproject.cluster.NodeId in project onos by opennetworkinglab.

the class GossipDeviceStore method removeDevice.

@Override
public synchronized DeviceEvent removeDevice(DeviceId deviceId) {
    final NodeId myId = clusterService.getLocalNode().id();
    NodeId master = mastershipService.getMasterFor(deviceId);
    // if there exist a master, forward
    // if there is no master, try to become one and process
    boolean relinquishAtEnd = false;
    if (master == null) {
        final MastershipRole myRole = mastershipService.getLocalRole(deviceId);
        if (myRole != MastershipRole.NONE) {
            relinquishAtEnd = true;
        }
        log.debug("Temporarily requesting role for {} to remove", deviceId);
        if (mastershipService.requestRoleFor(deviceId).join() == MastershipRole.MASTER) {
            master = myId;
        }
    }
    boolean isMaster = myId.equals(master);
    // If this node is not the master, forward the request.
    if (!isMaster) {
        log.debug("{} has control of {}, forwarding remove request", master, deviceId);
        // TODO check unicast return value
        clusterCommunicator.unicast(deviceId, DEVICE_REMOVE_REQ, SERIALIZER::encode, master);
    /* error log:
             log.error("Failed to forward {} remove request to {}", deviceId, master, e);
             */
    }
    // If this node is the master, get a timestamp. Otherwise, default to the current device timestamp.
    Timestamp timestamp = isMaster ? deviceClockService.getTimestamp(deviceId) : null;
    DeviceEvent event = removeDeviceInternal(deviceId, timestamp);
    // If this node is the master, update peers.
    if (isMaster && event != null) {
        log.debug("Notifying peers of a device removed topology event for deviceId: {}", deviceId);
        notifyPeers(new InternalDeviceRemovedEvent(deviceId, timestamp));
    }
    notifyDelegateIfNotNull(event);
    // Relinquish mastership if acquired to remove the device.
    if (relinquishAtEnd) {
        log.debug("Relinquishing temporary role acquired for {}", deviceId);
        mastershipService.relinquishMastership(deviceId);
    }
    return event;
}
Also used : DeviceEvent(org.onosproject.net.device.DeviceEvent) ControllerNodeToNodeId.toNodeId(org.onosproject.cluster.ControllerNodeToNodeId.toNodeId) NodeId(org.onosproject.cluster.NodeId) MastershipRole(org.onosproject.net.MastershipRole) WallClockTimestamp(org.onosproject.store.service.WallClockTimestamp) Timestamp(org.onosproject.store.Timestamp) MastershipBasedTimestamp(org.onosproject.store.impl.MastershipBasedTimestamp) MultiValuedTimestamp(org.onosproject.store.service.MultiValuedTimestamp)

Example 20 with NodeId

use of org.onosproject.cluster.NodeId in project onos by opennetworkinglab.

the class GossipDeviceStore method createOrUpdateDevice.

@Override
public synchronized DeviceEvent createOrUpdateDevice(ProviderId providerId, DeviceId deviceId, DeviceDescription deviceDescription) {
    NodeId localNode = clusterService.getLocalNode().id();
    NodeId deviceNode = mastershipService.getMasterFor(deviceId);
    boolean isMaster = localNode.equals(deviceNode);
    // Process device update only if we're the master,
    // otherwise signal the actual master.
    DeviceEvent deviceEvent = null;
    // If this node is the master for the device, acquire a new timestamp. Otherwise,
    // use a 0,0 or tombstone timestamp to create the device if it doesn't already exist.
    Timestamp newTimestamp;
    try {
        newTimestamp = isMaster ? deviceClockService.getTimestamp(deviceId) : removalRequest.getOrDefault(deviceId, DEFAULT_TIMESTAMP);
    } catch (IllegalStateException e) {
        newTimestamp = removalRequest.getOrDefault(deviceId, DEFAULT_TIMESTAMP);
        isMaster = false;
    }
    final Timestamped<DeviceDescription> deltaDesc = new Timestamped<>(deviceDescription, newTimestamp);
    final Timestamped<DeviceDescription> mergedDesc;
    final Map<ProviderId, DeviceDescriptions> device = getOrCreateDeviceDescriptionsMap(deviceId);
    synchronized (device) {
        deviceEvent = createOrUpdateDeviceInternal(providerId, deviceId, deltaDesc);
        if (deviceEvent == null) {
            return null;
        }
        mergedDesc = device.get(providerId).getDeviceDesc();
    }
    // If this node is the master for the device, update peers.
    if (isMaster) {
        log.debug("Notifying peers of a device update topology event for providerId: {} and deviceId: {}", providerId, deviceId);
        notifyPeers(new InternalDeviceEvent(providerId, deviceId, mergedDesc));
    }
    notifyDelegateIfNotNull(deviceEvent);
    return deviceEvent;
}
Also used : ProviderId(org.onosproject.net.provider.ProviderId) DeviceEvent(org.onosproject.net.device.DeviceEvent) DeviceDescription(org.onosproject.net.device.DeviceDescription) ControllerNodeToNodeId.toNodeId(org.onosproject.cluster.ControllerNodeToNodeId.toNodeId) NodeId(org.onosproject.cluster.NodeId) Timestamped(org.onosproject.store.impl.Timestamped) WallClockTimestamp(org.onosproject.store.service.WallClockTimestamp) Timestamp(org.onosproject.store.Timestamp) MastershipBasedTimestamp(org.onosproject.store.impl.MastershipBasedTimestamp) MultiValuedTimestamp(org.onosproject.store.service.MultiValuedTimestamp)

Aggregations

NodeId (org.onosproject.cluster.NodeId)150 DeviceId (org.onosproject.net.DeviceId)38 ClusterService (org.onosproject.cluster.ClusterService)36 Set (java.util.Set)26 MastershipRole (org.onosproject.net.MastershipRole)23 ControllerNode (org.onosproject.cluster.ControllerNode)22 Test (org.junit.Test)18 Activate (org.osgi.service.component.annotations.Activate)18 List (java.util.List)16 MastershipService (org.onosproject.mastership.MastershipService)15 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)13 Map (java.util.Map)12 ImmutableSet (com.google.common.collect.ImmutableSet)11 ArrayList (java.util.ArrayList)11 Collectors (java.util.stream.Collectors)11 HashSet (java.util.HashSet)10 Optional (java.util.Optional)10 ClusterCommunicationService (org.onosproject.store.cluster.messaging.ClusterCommunicationService)10 Component (org.osgi.service.component.annotations.Component)9 Deactivate (org.osgi.service.component.annotations.Deactivate)9