Search in sources :

Example 1 with EndpointsForRange

use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.

the class ActiveRepairService method getNeighbors.

/**
 * Return all of the neighbors with whom we share the provided range.
 *
 * @param keyspaceName keyspace to repair
 * @param keyspaceLocalRanges local-range for given keyspaceName
 * @param toRepair token to repair
 * @param dataCenters the data centers to involve in the repair
 *
 * @return neighbors with whom we share the provided range
 */
public static EndpointsForRange getNeighbors(String keyspaceName, Iterable<Range<Token>> keyspaceLocalRanges, Range<Token> toRepair, Collection<String> dataCenters, Collection<String> hosts) {
    StorageService ss = StorageService.instance;
    EndpointsByRange replicaSets = ss.getRangeToAddressMap(keyspaceName);
    Range<Token> rangeSuperSet = null;
    for (Range<Token> range : keyspaceLocalRanges) {
        if (range.contains(toRepair)) {
            rangeSuperSet = range;
            break;
        } else if (range.intersects(toRepair)) {
            throw new IllegalArgumentException(String.format("Requested range %s intersects a local range (%s) " + "but is not fully contained in one; this would lead to " + "imprecise repair. keyspace: %s", toRepair.toString(), range.toString(), keyspaceName));
        }
    }
    if (rangeSuperSet == null || !replicaSets.containsKey(rangeSuperSet))
        return EndpointsForRange.empty(toRepair);
    EndpointsForRange neighbors = replicaSets.get(rangeSuperSet).withoutSelf();
    if (dataCenters != null && !dataCenters.isEmpty()) {
        TokenMetadata.Topology topology = ss.getTokenMetadata().cloneOnlyTokenMap().getTopology();
        Multimap<String, InetAddressAndPort> dcEndpointsMap = topology.getDatacenterEndpoints();
        Iterable<InetAddressAndPort> dcEndpoints = concat(transform(dataCenters, dcEndpointsMap::get));
        return neighbors.select(dcEndpoints, true);
    } else if (hosts != null && !hosts.isEmpty()) {
        Set<InetAddressAndPort> specifiedHost = new HashSet<>();
        for (final String host : hosts) {
            try {
                final InetAddressAndPort endpoint = InetAddressAndPort.getByName(host.trim());
                if (endpoint.equals(FBUtilities.getBroadcastAddressAndPort()) || neighbors.endpoints().contains(endpoint))
                    specifiedHost.add(endpoint);
            } catch (UnknownHostException e) {
                throw new IllegalArgumentException("Unknown host specified " + host, e);
            }
        }
        if (!specifiedHost.contains(FBUtilities.getBroadcastAddressAndPort()))
            throw new IllegalArgumentException("The current host must be part of the repair");
        if (specifiedHost.size() <= 1) {
            String msg = "Specified hosts %s do not share range %s needed for repair. Either restrict repair ranges " + "with -st/-et options, or specify one of the neighbors that share this range with " + "this node: %s.";
            throw new IllegalArgumentException(String.format(msg, hosts, toRepair, neighbors));
        }
        specifiedHost.remove(FBUtilities.getBroadcastAddressAndPort());
        return neighbors.keep(specifiedHost);
    }
    return neighbors;
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) Collections.synchronizedSet(java.util.Collections.synchronizedSet) ImmutableSet(com.google.common.collect.ImmutableSet) UnknownHostException(java.net.UnknownHostException) EndpointsByRange(org.apache.cassandra.locator.EndpointsByRange) Token(org.apache.cassandra.dht.Token) TokenMetadata(org.apache.cassandra.locator.TokenMetadata) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange)

Example 2 with EndpointsForRange

use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.

the class RangeStreamer method convertPreferredEndpointsToWorkMap.

/**
 * The preferred endpoint list is the wrong format because it is keyed by Replica (this node) rather than the source
 * endpoint we will fetch from which streaming wants.
 */
public static Multimap<InetAddressAndPort, FetchReplica> convertPreferredEndpointsToWorkMap(EndpointsByReplica preferredEndpoints) {
    Multimap<InetAddressAndPort, FetchReplica> workMap = HashMultimap.create();
    for (Map.Entry<Replica, EndpointsForRange> e : preferredEndpoints.entrySet()) {
        for (Replica source : e.getValue()) {
            assert (e.getKey()).isSelf();
            assert !source.isSelf();
            workMap.put(source.endpoint(), new FetchReplica(e.getKey(), source));
        }
    }
    logger.debug("Work map {}", workMap);
    return workMap;
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) Map(java.util.Map) HashMap(java.util.HashMap) Replica.fullReplica(org.apache.cassandra.locator.Replica.fullReplica) Replica(org.apache.cassandra.locator.Replica) EndpointsByReplica(org.apache.cassandra.locator.EndpointsByReplica)

Example 3 with EndpointsForRange

use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.

the class RangeStreamer method calculateRangesToFetchWithPreferredEndpoints.

/**
 * Get a map of all ranges and the source that will be cleaned up once this bootstrapped node is added for the given ranges.
 * For each range, the list should only contain a single source. This allows us to consistently migrate data without violating
 * consistency.
 */
public static EndpointsByReplica calculateRangesToFetchWithPreferredEndpoints(BiFunction<InetAddressAndPort, EndpointsForRange, EndpointsForRange> snitchGetSortedListByProximity, AbstractReplicationStrategy strat, ReplicaCollection<?> fetchRanges, boolean useStrictConsistency, TokenMetadata tmdBefore, TokenMetadata tmdAfter, String keyspace, Collection<SourceFilter> sourceFilters) {
    EndpointsByRange rangeAddresses = strat.getRangeAddresses(tmdBefore);
    InetAddressAndPort localAddress = FBUtilities.getBroadcastAddressAndPort();
    logger.debug("Keyspace: {}", keyspace);
    logger.debug("To fetch RN: {}", fetchRanges);
    logger.debug("Fetch ranges: {}", rangeAddresses);
    Predicate<Replica> testSourceFilters = and(sourceFilters);
    Function<EndpointsForRange, EndpointsForRange> sorted = endpoints -> snitchGetSortedListByProximity.apply(localAddress, endpoints);
    // This list of replicas is just candidates. With strict consistency it's going to be a narrow list.
    EndpointsByReplica.Builder rangesToFetchWithPreferredEndpoints = new EndpointsByReplica.Builder();
    for (Replica toFetch : fetchRanges) {
        // Replica that is sufficient to provide the data we need
        // With strict consistency and transient replication we may end up with multiple types
        // so this isn't used with strict consistency
        Predicate<Replica> isSufficient = r -> toFetch.isTransient() || r.isFull();
        logger.debug("To fetch {}", toFetch);
        for (Range<Token> range : rangeAddresses.keySet()) {
            if (!range.contains(toFetch.range()))
                continue;
            final EndpointsForRange oldEndpoints = sorted.apply(rangeAddresses.get(range));
            // Ultimately we populate this with whatever is going to be fetched from to satisfy toFetch
            // It could be multiple endpoints and we must fetch from all of them if they are there
            // With transient replication and strict consistency this is to get the full data from a full replica and
            // transient data from the transient replica losing data
            EndpointsForRange sources;
            // Due to CASSANDRA-5953 we can have a higher RF than we have endpoints.
            // So we need to be careful to only be strict when endpoints == RF
            boolean isStrictConsistencyApplicable = useStrictConsistency && (oldEndpoints.size() == strat.getReplicationFactor().allReplicas);
            if (isStrictConsistencyApplicable) {
                EndpointsForRange strictEndpoints;
                // Start with two sets of who replicates the range before and who replicates it after
                EndpointsForRange newEndpoints = strat.calculateNaturalReplicas(toFetch.range().right, tmdAfter);
                logger.debug("Old endpoints {}", oldEndpoints);
                logger.debug("New endpoints {}", newEndpoints);
                // Remove new endpoints from old endpoints based on address
                strictEndpoints = oldEndpoints.without(newEndpoints.endpoints());
                if (strictEndpoints.size() > 1)
                    throw new AssertionError("Expected <= 1 endpoint but found " + strictEndpoints);
                // required for strict consistency
                if (!all(strictEndpoints, testSourceFilters))
                    throw new IllegalStateException("Necessary replicas for strict consistency were removed by source filters: " + buildErrorMessage(sourceFilters, strictEndpoints));
                // So it's an error if we don't find what we need.
                if (strictEndpoints.isEmpty() && toFetch.isTransient())
                    throw new AssertionError("If there are no endpoints to fetch from then we must be transitioning from transient to full for range " + toFetch);
                if (!any(strictEndpoints, isSufficient)) {
                    // need an additional replica; include all our filters, to ensure we include a matching node
                    Optional<Replica> fullReplica = Iterables.<Replica>tryFind(oldEndpoints, and(isSufficient, testSourceFilters)).toJavaUtil();
                    if (fullReplica.isPresent())
                        strictEndpoints = Endpoints.concat(strictEndpoints, EndpointsForRange.of(fullReplica.get()));
                    else
                        throw new IllegalStateException("Couldn't find any matching sufficient replica out of " + buildErrorMessage(sourceFilters, oldEndpoints));
                }
                sources = strictEndpoints;
            } else {
                // Without strict consistency we have given up on correctness so no point in fetching from
                // a random full + transient replica since it's also likely to lose data
                // Also apply testSourceFilters that were given to us so we can safely select a single source
                sources = sorted.apply(oldEndpoints.filter(and(isSufficient, testSourceFilters)));
                // Limit it to just the first possible source, we don't need more than one and downstream
                // will fetch from every source we supply
                sources = sources.size() > 0 ? sources.subList(0, 1) : sources;
            }
            // storing range and preferred endpoint set
            rangesToFetchWithPreferredEndpoints.putAll(toFetch, sources, Conflict.NONE);
            logger.debug("Endpoints to fetch for {} are {}", toFetch, sources);
        }
        EndpointsForRange addressList = rangesToFetchWithPreferredEndpoints.getIfPresent(toFetch);
        if (addressList == null)
            throw new IllegalStateException("Failed to find endpoints to fetch " + toFetch);
        /*
              * When we move forwards (shrink our bucket) we are the one losing a range and no one else loses
              * from that action (we also don't gain). When we move backwards there are two people losing a range. One is a full replica
              * and the other is a transient replica. So we must need fetch from two places in that case for the full range we gain.
              * For a transient range we only need to fetch from one.
              */
        if (useStrictConsistency && addressList.size() > 1 && (addressList.filter(Replica::isFull).size() > 1 || addressList.filter(Replica::isTransient).size() > 1))
            throw new IllegalStateException(String.format("Multiple strict sources found for %s, sources: %s", toFetch, addressList));
        // We must have enough stuff to fetch from
        if (!any(addressList, isSufficient)) {
            if (strat.getReplicationFactor().allReplicas == 1) {
                if (useStrictConsistency) {
                    logger.warn("A node required to move the data consistently is down");
                    throw new IllegalStateException("Unable to find sufficient sources for streaming range " + toFetch + " in keyspace " + keyspace + " with RF=1. " + "Ensure this keyspace contains replicas in the source datacenter.");
                } else
                    logger.warn("Unable to find sufficient sources for streaming range {} in keyspace {} with RF=1. " + "Keyspace might be missing data.", toFetch, keyspace);
            } else {
                if (useStrictConsistency)
                    logger.warn("A node required to move the data consistently is down");
                throw new IllegalStateException("Unable to find sufficient sources for streaming range " + toFetch + " in keyspace " + keyspace);
            }
        }
    }
    return rangesToFetchWithPreferredEndpoints.build();
}
Also used : BiFunction(java.util.function.BiFunction) LoggerFactory(org.slf4j.LoggerFactory) Iterables.all(com.google.common.collect.Iterables.all) StringUtils(org.apache.commons.lang3.StringUtils) Gossiper(org.apache.cassandra.gms.Gossiper) NetworkTopologyStrategy(org.apache.cassandra.locator.NetworkTopologyStrategy) Predicates.and(com.google.common.base.Predicates.and) StreamResultFuture(org.apache.cassandra.streaming.StreamResultFuture) Replica.fullReplica(org.apache.cassandra.locator.Replica.fullReplica) HashMultimap(com.google.common.collect.HashMultimap) Replicas(org.apache.cassandra.locator.Replicas) Endpoints(org.apache.cassandra.locator.Endpoints) Predicates.not(com.google.common.base.Predicates.not) ReplicaCollection(org.apache.cassandra.locator.ReplicaCollection) Map(java.util.Map) EndpointsByRange(org.apache.cassandra.locator.EndpointsByRange) Keyspace(org.apache.cassandra.db.Keyspace) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) FBUtilities(org.apache.cassandra.utils.FBUtilities) Collection(java.util.Collection) Set(java.util.Set) Collectors(java.util.stream.Collectors) RangesAtEndpoint(org.apache.cassandra.locator.RangesAtEndpoint) List(java.util.List) Predicate(com.google.common.base.Predicate) Conflict(org.apache.cassandra.locator.ReplicaCollection.Builder.Conflict) Optional(java.util.Optional) FailureDetector(org.apache.cassandra.gms.FailureDetector) Iterables.any(com.google.common.collect.Iterables.any) InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) Iterables(com.google.common.collect.Iterables) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) Function(java.util.function.Function) SystemKeyspace(org.apache.cassandra.db.SystemKeyspace) ArrayList(java.util.ArrayList) IEndpointSnitch(org.apache.cassandra.locator.IEndpointSnitch) TokenMetadata(org.apache.cassandra.locator.TokenMetadata) ImmutableMultimap(com.google.common.collect.ImmutableMultimap) StreamOperation(org.apache.cassandra.streaming.StreamOperation) Logger(org.slf4j.Logger) Replica(org.apache.cassandra.locator.Replica) IFailureDetector(org.apache.cassandra.gms.IFailureDetector) PreviewKind(org.apache.cassandra.streaming.PreviewKind) AbstractReplicationStrategy(org.apache.cassandra.locator.AbstractReplicationStrategy) StreamPlan(org.apache.cassandra.streaming.StreamPlan) EndpointsByReplica(org.apache.cassandra.locator.EndpointsByReplica) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) LocalStrategy(org.apache.cassandra.locator.LocalStrategy) InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) EndpointsByRange(org.apache.cassandra.locator.EndpointsByRange) Replica.fullReplica(org.apache.cassandra.locator.Replica.fullReplica) Replica(org.apache.cassandra.locator.Replica) EndpointsByReplica(org.apache.cassandra.locator.EndpointsByReplica) EndpointsByReplica(org.apache.cassandra.locator.EndpointsByReplica) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange)

Example 4 with EndpointsForRange

use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.

the class RangeFetchMapCalculator method getRangeFetchMapForTrivialRanges.

@VisibleForTesting
Multimap<InetAddressAndPort, Range<Token>> getRangeFetchMapForTrivialRanges(Multimap<InetAddressAndPort, Range<Token>> optimisedMap) {
    Multimap<InetAddressAndPort, Range<Token>> fetchMap = HashMultimap.create();
    for (Range<Token> trivialRange : trivialRanges) {
        boolean added = false;
        boolean localDCCheck = true;
        while (!added) {
            // sort with the endpoint having the least number of streams first:
            EndpointsForRange replicas = rangesWithSources.get(trivialRange).sorted(Comparator.comparingInt(o -> optimisedMap.get(o.endpoint()).size()));
            Replicas.temporaryAssertFull(replicas);
            for (Replica replica : replicas) {
                if (passFilters(replica, localDCCheck)) {
                    added = true;
                    // see RangeFetchMapCalculator#addEndpoints  and RangeStreamer#getRangeFetchMap
                    if (replica.isSelf())
                        // but don't add localhost to avoid streaming locally
                        continue;
                    fetchMap.put(replica.endpoint(), trivialRange);
                    break;
                }
            }
            if (!added && !localDCCheck)
                throw new IllegalStateException("Unable to find sufficient sources for streaming range " + trivialRange + " in keyspace " + keyspace);
            if (!added)
                logger.info("Using other DC endpoints for streaming for range: {} and keyspace {}", trivialRange, keyspace);
            localDCCheck = false;
        }
    }
    return fetchMap;
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) MaximumFlowAlgorithmResult(org.psjava.algo.graph.flownetwork.MaximumFlowAlgorithmResult) LoggerFactory(org.slf4j.LoggerFactory) CapacityEdge(org.psjava.ds.graph.CapacityEdge) Multimap(com.google.common.collect.Multimap) Function(org.psjava.ds.math.Function) HashMultimap(com.google.common.collect.HashMultimap) Replicas(org.apache.cassandra.locator.Replicas) Predicates(com.google.common.base.Predicates) BigInteger(java.math.BigInteger) MaximumFlowAlgorithm(org.psjava.algo.graph.flownetwork.MaximumFlowAlgorithm) EndpointsByRange(org.apache.cassandra.locator.EndpointsByRange) DatabaseDescriptor(org.apache.cassandra.config.DatabaseDescriptor) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) Logger(org.slf4j.Logger) DFSPathFinder(org.psjava.algo.graph.pathfinder.DFSPathFinder) Collection(java.util.Collection) Set(java.util.Set) IntegerNumberSystem(org.psjava.ds.numbersystrem.IntegerNumberSystem) Collectors(java.util.stream.Collectors) Replica(org.apache.cassandra.locator.Replica) Predicate(com.google.common.base.Predicate) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) MutableCapacityGraph(org.psjava.ds.graph.MutableCapacityGraph) FordFulkersonAlgorithm(org.psjava.algo.graph.flownetwork.FordFulkersonAlgorithm) InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) EndpointsByRange(org.apache.cassandra.locator.EndpointsByRange) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) Replica(org.apache.cassandra.locator.Replica) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 5 with EndpointsForRange

use of org.apache.cassandra.locator.EndpointsForRange in project cassandra by apache.

the class RepairRunnable method getNeighborsAndRanges.

private NeighborsAndRanges getNeighborsAndRanges() throws RepairException {
    Set<InetAddressAndPort> allNeighbors = new HashSet<>();
    List<CommonRange> commonRanges = new ArrayList<>();
    // pre-calculate output of getLocalReplicas and pass it to getNeighbors to increase performance and prevent
    // calculation multiple times
    Iterable<Range<Token>> keyspaceLocalRanges = storageService.getLocalReplicas(keyspace).ranges();
    for (Range<Token> range : options.getRanges()) {
        EndpointsForRange neighbors = ActiveRepairService.getNeighbors(keyspace, keyspaceLocalRanges, range, options.getDataCenters(), options.getHosts());
        if (neighbors.isEmpty()) {
            if (options.ignoreUnreplicatedKeyspaces()) {
                logger.info("{} Found no neighbors for range {} for {} - ignoring since repairing with --ignore-unreplicated-keyspaces", parentSession, range, keyspace);
                continue;
            } else {
                throw RepairException.warn(String.format("Nothing to repair for %s in %s - aborting", range, keyspace));
            }
        }
        addRangeToNeighbors(commonRanges, range, neighbors);
        allNeighbors.addAll(neighbors.endpoints());
    }
    if (options.ignoreUnreplicatedKeyspaces() && allNeighbors.isEmpty()) {
        throw new SkipRepairException(String.format("Nothing to repair for %s in %s - unreplicated keyspace is ignored since repair was called with --ignore-unreplicated-keyspaces", options.getRanges(), keyspace));
    }
    progressCounter.incrementAndGet();
    boolean shouldExcludeDeadParticipants = options.isForcedRepair();
    if (shouldExcludeDeadParticipants) {
        Set<InetAddressAndPort> actualNeighbors = Sets.newHashSet(Iterables.filter(allNeighbors, FailureDetector.instance::isAlive));
        shouldExcludeDeadParticipants = !allNeighbors.equals(actualNeighbors);
        allNeighbors = actualNeighbors;
    }
    return new NeighborsAndRanges(shouldExcludeDeadParticipants, allNeighbors, commonRanges);
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) ArrayList(java.util.ArrayList) Token(org.apache.cassandra.dht.Token) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) Range(org.apache.cassandra.dht.Range) FailureDetector(org.apache.cassandra.gms.FailureDetector) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) HashSet(java.util.HashSet)

Aggregations

EndpointsForRange (org.apache.cassandra.locator.EndpointsForRange)44 InetAddressAndPort (org.apache.cassandra.locator.InetAddressAndPort)36 Test (org.junit.Test)32 Mutation (org.apache.cassandra.db.Mutation)18 PartitionIterator (org.apache.cassandra.db.partitions.PartitionIterator)16 UnfilteredPartitionIterator (org.apache.cassandra.db.partitions.UnfilteredPartitionIterator)16 ByteBuffer (java.nio.ByteBuffer)14 RowUpdateBuilder (org.apache.cassandra.db.RowUpdateBuilder)13 BTreeRow (org.apache.cassandra.db.rows.BTreeRow)10 Row (org.apache.cassandra.db.rows.Row)10 Replica (org.apache.cassandra.locator.Replica)10 RowIterator (org.apache.cassandra.db.rows.RowIterator)9 DeletionTime (org.apache.cassandra.db.DeletionTime)8 HashMap (java.util.HashMap)6 Token (org.apache.cassandra.dht.Token)6 TestableReadRepair (org.apache.cassandra.service.reads.repair.TestableReadRepair)6 RangeTombstone (org.apache.cassandra.db.RangeTombstone)5 ReadCommand (org.apache.cassandra.db.ReadCommand)4 EndpointsByReplica (org.apache.cassandra.locator.EndpointsByReplica)4 VisibleForTesting (com.google.common.annotations.VisibleForTesting)3