Search in sources :

Example 1 with ClusterMembershipRevision

use of com.netflix.titus.api.clustermembership.model.ClusterMembershipRevision in project titus-control-plane by Netflix.

the class KubeRegistrationActions method registerLocal.

public static Mono<Function<KubeClusterState, KubeClusterState>> registerLocal(KubeContext context, KubeClusterState kubeClusterState, Function<ClusterMember, ClusterMembershipRevision<ClusterMember>> selfUpdate) {
    ClusterMember localMember = kubeClusterState.getLocalMemberRevision().getCurrent();
    ClusterMembershipRevision<ClusterMember> newRevision = setRegistrationStatus(selfUpdate.apply(localMember), true);
    KubeMembershipExecutor membershipExecutor = context.getKubeMembershipExecutor();
    Mono<ClusterMembershipRevision<ClusterMember>> monoAction;
    if (kubeClusterState.isRegistered()) {
        monoAction = membershipExecutor.updateLocal(newRevision).onErrorResume(e -> {
            if (!KubeUtils.is4xx(e)) {
                return Mono.error(e);
            }
            int status = KubeUtils.getHttpStatusCode(e);
            if (status == 404) {
                return membershipExecutor.createLocal(newRevision);
            }
            // Bad or stale data record. Remove it first and than register.
            return membershipExecutor.removeMember(newRevision.getCurrent().getMemberId()).then(membershipExecutor.createLocal(newRevision));
        });
    } else {
        monoAction = membershipExecutor.createLocal(newRevision).onErrorResume(e -> {
            if (!KubeUtils.is4xx(e)) {
                return Mono.error(e);
            }
            // Bad or stale data record. Remove it first and than register.
            return membershipExecutor.removeMember(newRevision.getCurrent().getMemberId()).then(membershipExecutor.createLocal(newRevision));
        });
    }
    return monoAction.onErrorMap(KubeUtils::toConnectorException).map(update -> currentState -> currentState.setMustRegister(true).setLocalClusterMemberRevision(update, true));
}
Also used : KubeUtils(com.netflix.titus.ext.kube.clustermembership.connector.transport.KubeUtils) KubeMembershipExecutor(com.netflix.titus.ext.kube.clustermembership.connector.KubeMembershipExecutor) ClusterMember(com.netflix.titus.api.clustermembership.model.ClusterMember) Mono(reactor.core.publisher.Mono) KubeClusterState(com.netflix.titus.ext.kube.clustermembership.connector.KubeClusterState) Function(java.util.function.Function) ClusterMembershipRevision(com.netflix.titus.api.clustermembership.model.ClusterMembershipRevision) KubeContext(com.netflix.titus.ext.kube.clustermembership.connector.KubeContext) ClusterMember(com.netflix.titus.api.clustermembership.model.ClusterMember) KubeMembershipExecutor(com.netflix.titus.ext.kube.clustermembership.connector.KubeMembershipExecutor) ClusterMembershipRevision(com.netflix.titus.api.clustermembership.model.ClusterMembershipRevision)

Example 2 with ClusterMembershipRevision

use of com.netflix.titus.api.clustermembership.model.ClusterMembershipRevision in project titus-control-plane by Netflix.

the class MultiNodeClusterMemberResolver method refresh.

private void refresh() {
    // Always first add missing seed nodes.
    addNewSeeds();
    ClusterMembershipSnapshot resolvedSnapshot = buildSnapshot();
    // If no members, we cannot make any progress, so exit.
    if (resolvedSnapshot.getMemberRevisions().isEmpty()) {
        Set<String> toRemove = memberResolversByIpAddress.keySet().stream().filter(ip -> !isSeedIp(ip)).collect(Collectors.toSet());
        disconnectTerminatedMembers(toRemove);
        logger.debug("Cannot connect to any cluster member. Known members: {}", toResolvedMembersString());
        return;
    }
    // As IP address can be reused take always more recent record first.
    Map<String, ClusterMembershipRevision<ClusterMember>> resolvedMembersByIp = new HashMap<>();
    resolvedSnapshot.getMemberRevisions().forEach((memberId, revision) -> {
        String ipAddress = addressSelector.apply(revision.getCurrent()).getIpAddress();
        ClusterMembershipRevision<ClusterMember> previous = resolvedMembersByIp.get(ipAddress);
        if (previous == null || previous.getTimestamp() < revision.getTimestamp()) {
            resolvedMembersByIp.put(ipAddress, revision);
        }
    });
    // Find terminated members.
    Set<String> toRemove = memberResolversByIpAddress.keySet().stream().filter(ip -> !resolvedMembersByIp.containsKey(ip) && !isSeedIp(ip)).collect(Collectors.toSet());
    disconnectTerminatedMembers(toRemove);
    // Find new members that we should connect to.
    Set<String> toAdd = resolvedMembersByIp.keySet().stream().filter(ip -> !memberResolversByIpAddress.containsKey(ip)).collect(Collectors.toSet());
    connectNewMembers(resolvedMembersByIp, toAdd);
}
Also used : Stopwatch(com.google.common.base.Stopwatch) CollectionsExt(com.netflix.titus.common.util.CollectionsExt) LoggerFactory(org.slf4j.LoggerFactory) ClusterMembershipSnapshot(com.netflix.titus.api.clustermembership.model.ClusterMembershipSnapshot) HashMap(java.util.HashMap) ReactorExt(com.netflix.titus.common.util.rx.ReactorExt) Function(java.util.function.Function) Supplier(java.util.function.Supplier) ConcurrentMap(java.util.concurrent.ConcurrentMap) ScheduleReference(com.netflix.titus.common.framework.scheduler.ScheduleReference) ClusterMemberAddress(com.netflix.titus.api.clustermembership.model.ClusterMemberAddress) Duration(java.time.Duration) Map(java.util.Map) ClusterMember(com.netflix.titus.api.clustermembership.model.ClusterMember) ClusterMembershipRevision(com.netflix.titus.api.clustermembership.model.ClusterMembershipRevision) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) Retryers(com.netflix.titus.common.util.retry.Retryers) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ClusterMemberLeadership(com.netflix.titus.api.clustermembership.model.ClusterMemberLeadership) Set(java.util.Set) Collectors(java.util.stream.Collectors) ReplayProcessor(reactor.core.publisher.ReplayProcessor) TimeUnit(java.util.concurrent.TimeUnit) Flux(reactor.core.publisher.Flux) List(java.util.List) ScheduleDescriptor(com.netflix.titus.common.framework.scheduler.model.ScheduleDescriptor) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) ClusterMembershipSnapshot(com.netflix.titus.api.clustermembership.model.ClusterMembershipSnapshot) ClusterMember(com.netflix.titus.api.clustermembership.model.ClusterMember) HashMap(java.util.HashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ClusterMembershipRevision(com.netflix.titus.api.clustermembership.model.ClusterMembershipRevision)

Example 3 with ClusterMembershipRevision

use of com.netflix.titus.api.clustermembership.model.ClusterMembershipRevision in project titus-control-plane by Netflix.

the class GrpcClusterMembershipLeaderNameResolver method refresh.

private void refresh(ClusterMembershipSnapshot snapshot) {
    try {
        Optional<ClusterMembershipRevision<ClusterMember>> leaderOpt = snapshot.getLeaderRevision().flatMap(l -> Optional.ofNullable(snapshot.getMemberRevisions().get(l.getCurrent().getMemberId())));
        if (leaderOpt.isPresent()) {
            ClusterMembershipRevision<ClusterMember> memberRevision = leaderOpt.get();
            ClusterMemberAddress address = addressSelector.apply(memberRevision.getCurrent());
            if (lastLeader == null || !lastLeader.getCurrent().getMemberId().equals(memberRevision.getCurrent().getMemberId())) {
                logger.info("New leader: {}", memberRevision);
                Evaluators.acceptNotNull(leaderMetrics, LeaderMetrics::close);
                lastLeader = memberRevision;
                leaderMetrics = new LeaderMetrics(lastLeader, titusRuntime);
            } else {
                logger.debug("Refreshing: {}", lastLeader);
            }
            EquivalentAddressGroup server = new EquivalentAddressGroup(new InetSocketAddress(address.getIpAddress(), address.getPortNumber()));
            List<EquivalentAddressGroup> servers = Collections.singletonList(server);
            listener.onAddresses(servers, Attributes.EMPTY);
        } else {
            if (lastLeader != null) {
                leaderMetrics.close();
                lastLeader = null;
                leaderMetrics = null;
                logger.warn("No leader");
            }
            listener.onError(Status.UNAVAILABLE.withDescription("Unable to resolve leader server"));
        }
    } catch (Exception e) {
        logger.error("Unable to create server with error: ", e);
        listener.onError(Status.UNAVAILABLE.withCause(e));
    }
}
Also used : ClusterMember(com.netflix.titus.api.clustermembership.model.ClusterMember) EquivalentAddressGroup(io.grpc.EquivalentAddressGroup) InetSocketAddress(java.net.InetSocketAddress) ClusterMemberAddress(com.netflix.titus.api.clustermembership.model.ClusterMemberAddress) ClusterMembershipRevision(com.netflix.titus.api.clustermembership.model.ClusterMembershipRevision)

Example 4 with ClusterMembershipRevision

use of com.netflix.titus.api.clustermembership.model.ClusterMembershipRevision in project titus-control-plane by Netflix.

the class MultiNodeClusterMemberResolver method report.

private void report(ClusterMembershipSnapshot newSnapshot) {
    if (lastReportedSnapshot == null) {
        newSnapshot.getMemberRevisions().forEach((memberId, revision) -> {
            logger.info("Discovered new cluster member: id={}, addresses={}", memberId, revision.getCurrent().getClusterMemberAddresses());
        });
        if (newSnapshot.getLeaderRevision().isPresent()) {
            logger.info("Cluster leader is {}", newSnapshot.getLeaderRevision().get().getCurrent().getMemberId());
        } else {
            logger.info("No leader yet");
        }
    } else {
        Map<String, ClusterMembershipRevision<ClusterMember>> previousRevisions = lastReportedSnapshot.getMemberRevisions();
        newSnapshot.getMemberRevisions().forEach((memberId, revision) -> {
            if (!previousRevisions.containsKey(memberId)) {
                logger.info("Discovered new cluster member: id={}, addresses={}", memberId, revision.getCurrent().getClusterMemberAddresses());
            }
        });
        lastReportedSnapshot.getMemberRevisions().forEach((memberId, previousRevision) -> {
            if (!newSnapshot.getMemberRevisions().containsKey(memberId)) {
                logger.info("Removed cluster member: {}", memberId);
            }
        });
        if (lastReportedSnapshot.getLeaderRevision().isPresent()) {
            ClusterMemberLeadership previousLeader = lastReportedSnapshot.getLeaderRevision().get().getCurrent();
            if (newSnapshot.getLeaderRevision().isPresent()) {
                ClusterMemberLeadership newLeader = newSnapshot.getLeaderRevision().get().getCurrent();
                if (!newLeader.getMemberId().equals(previousLeader.getMemberId())) {
                    logger.info("Leader changed from {} to {}", previousLeader.getMemberId(), newLeader.getMemberId());
                }
            } else {
                logger.info("{} is no longer leader, and no new leader is re-elected yet", previousLeader.getMemberId());
            }
        } else if (newSnapshot.getLeaderRevision().isPresent()) {
            logger.info("Cluster leader is {}", newSnapshot.getLeaderRevision().get().getCurrent().getMemberId());
        }
    }
    metrics.updateConnectedMembers(memberResolversByIpAddress);
    metrics.updateSnapshot(newSnapshot);
    this.lastReportedSnapshot = newSnapshot;
}
Also used : ClusterMembershipRevision(com.netflix.titus.api.clustermembership.model.ClusterMembershipRevision) ClusterMemberLeadership(com.netflix.titus.api.clustermembership.model.ClusterMemberLeadership)

Example 5 with ClusterMembershipRevision

use of com.netflix.titus.api.clustermembership.model.ClusterMembershipRevision in project titus-control-plane by Netflix.

the class MultiNodeClusterMemberResolver method buildSnapshot.

private ClusterMembershipSnapshot buildSnapshot() {
    if (memberResolversByIpAddress.isEmpty()) {
        return ClusterMembershipSnapshot.empty();
    }
    List<ClusterMembershipSnapshot> healthySnapshots = findHealthySnapshots();
    ClusterMembershipSnapshot.Builder builder = ClusterMembershipSnapshot.newBuilder();
    Map<String, List<ClusterMembershipRevision<ClusterMember>>> grouped = healthySnapshots.stream().flatMap(snapshot -> snapshot.getMemberRevisions().values().stream()).collect(Collectors.groupingBy(m -> m.getCurrent().getMemberId()));
    List<ClusterMembershipRevision<ClusterMember>> recentRevisions = grouped.values().stream().map(this::findBestMemberRevision).collect(Collectors.toList());
    builder.withMemberRevisions(recentRevisions);
    // Find leader
    Optional<ClusterMembershipRevision<ClusterMemberLeadership>> recentLeader = Optional.empty();
    for (ClusterMembershipSnapshot snapshot : healthySnapshots) {
        if (snapshot.getLeaderRevision().isPresent()) {
            if (recentLeader.isPresent()) {
                if (recentLeader.get().getRevision() < snapshot.getLeaderRevision().get().getRevision()) {
                    recentLeader = snapshot.getLeaderRevision();
                }
            } else {
                recentLeader = snapshot.getLeaderRevision();
            }
        }
    }
    recentLeader.ifPresent(builder::withLeaderRevision);
    // Choose latest version of each
    long minStaleness = healthySnapshots.stream().mapToLong(ClusterMembershipSnapshot::getStalenessMs).min().orElse(0);
    builder.withStalenessMs(minStaleness);
    return builder.build();
}
Also used : Stopwatch(com.google.common.base.Stopwatch) CollectionsExt(com.netflix.titus.common.util.CollectionsExt) LoggerFactory(org.slf4j.LoggerFactory) ClusterMembershipSnapshot(com.netflix.titus.api.clustermembership.model.ClusterMembershipSnapshot) HashMap(java.util.HashMap) ReactorExt(com.netflix.titus.common.util.rx.ReactorExt) Function(java.util.function.Function) Supplier(java.util.function.Supplier) ConcurrentMap(java.util.concurrent.ConcurrentMap) ScheduleReference(com.netflix.titus.common.framework.scheduler.ScheduleReference) ClusterMemberAddress(com.netflix.titus.api.clustermembership.model.ClusterMemberAddress) Duration(java.time.Duration) Map(java.util.Map) ClusterMember(com.netflix.titus.api.clustermembership.model.ClusterMember) ClusterMembershipRevision(com.netflix.titus.api.clustermembership.model.ClusterMembershipRevision) Logger(org.slf4j.Logger) Iterator(java.util.Iterator) Retryers(com.netflix.titus.common.util.retry.Retryers) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) ClusterMemberLeadership(com.netflix.titus.api.clustermembership.model.ClusterMemberLeadership) Set(java.util.Set) Collectors(java.util.stream.Collectors) ReplayProcessor(reactor.core.publisher.ReplayProcessor) TimeUnit(java.util.concurrent.TimeUnit) Flux(reactor.core.publisher.Flux) List(java.util.List) ScheduleDescriptor(com.netflix.titus.common.framework.scheduler.model.ScheduleDescriptor) Optional(java.util.Optional) Preconditions(com.google.common.base.Preconditions) TitusRuntime(com.netflix.titus.common.runtime.TitusRuntime) ClusterMembershipRevision(com.netflix.titus.api.clustermembership.model.ClusterMembershipRevision) ClusterMembershipSnapshot(com.netflix.titus.api.clustermembership.model.ClusterMembershipSnapshot) ClusterMember(com.netflix.titus.api.clustermembership.model.ClusterMember) List(java.util.List)

Aggregations

ClusterMembershipRevision (com.netflix.titus.api.clustermembership.model.ClusterMembershipRevision)5 ClusterMember (com.netflix.titus.api.clustermembership.model.ClusterMember)4 ClusterMemberAddress (com.netflix.titus.api.clustermembership.model.ClusterMemberAddress)3 ClusterMemberLeadership (com.netflix.titus.api.clustermembership.model.ClusterMemberLeadership)3 Function (java.util.function.Function)3 Preconditions (com.google.common.base.Preconditions)2 Stopwatch (com.google.common.base.Stopwatch)2 ClusterMembershipSnapshot (com.netflix.titus.api.clustermembership.model.ClusterMembershipSnapshot)2 ScheduleReference (com.netflix.titus.common.framework.scheduler.ScheduleReference)2 ScheduleDescriptor (com.netflix.titus.common.framework.scheduler.model.ScheduleDescriptor)2 TitusRuntime (com.netflix.titus.common.runtime.TitusRuntime)2 CollectionsExt (com.netflix.titus.common.util.CollectionsExt)2 Retryers (com.netflix.titus.common.util.retry.Retryers)2 ReactorExt (com.netflix.titus.common.util.rx.ReactorExt)2 Duration (java.time.Duration)2 HashMap (java.util.HashMap)2 Iterator (java.util.Iterator)2 List (java.util.List)2 Map (java.util.Map)2 Optional (java.util.Optional)2