Search in sources :

Example 6 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class CancelCompactionsTest method testAnticompaction.

@Test
public void testAnticompaction() throws InterruptedException, ExecutionException {
    ColumnFamilyStore cfs = MockSchema.newCFS();
    List<SSTableReader> sstables = createSSTables(cfs, 10, 0);
    List<SSTableReader> alreadyRepairedSSTables = createSSTables(cfs, 10, 10);
    for (SSTableReader sstable : alreadyRepairedSSTables) AbstractPendingRepairTest.mutateRepaired(sstable, System.currentTimeMillis());
    assertEquals(20, cfs.getLiveSSTables().size());
    List<TestCompactionTask> tcts = new ArrayList<>();
    tcts.add(new TestCompactionTask(cfs, new HashSet<>(sstables.subList(0, 2))));
    tcts.add(new TestCompactionTask(cfs, new HashSet<>(sstables.subList(3, 4))));
    tcts.add(new TestCompactionTask(cfs, new HashSet<>(sstables.subList(5, 7))));
    tcts.add(new TestCompactionTask(cfs, new HashSet<>(sstables.subList(8, 9))));
    List<TestCompactionTask> nonAffectedTcts = new ArrayList<>();
    nonAffectedTcts.add(new TestCompactionTask(cfs, new HashSet<>(alreadyRepairedSSTables)));
    try {
        tcts.forEach(TestCompactionTask::start);
        nonAffectedTcts.forEach(TestCompactionTask::start);
        List<CompactionInfo.Holder> activeCompactions = getActiveCompactionsForTable(cfs);
        assertEquals(5, activeCompactions.size());
        // make sure that sstables are fully contained so that the metadata gets mutated
        Range<Token> range = new Range<>(token(-1), token(49));
        UUID prsid = UUID.randomUUID();
        ActiveRepairService.instance.registerParentRepairSession(prsid, InetAddressAndPort.getLocalHost(), Collections.singletonList(cfs), Collections.singleton(range), true, 1, true, PreviewKind.NONE);
        InetAddressAndPort local = FBUtilities.getBroadcastAddressAndPort();
        RangesAtEndpoint rae = RangesAtEndpoint.builder(local).add(new Replica(local, range, true)).build();
        PendingAntiCompaction pac = new PendingAntiCompaction(prsid, Collections.singleton(cfs), rae, Executors.newSingleThreadExecutor(), () -> false);
        Future<?> fut = pac.run();
        Thread.sleep(600);
        List<TestCompactionTask> toAbort = new ArrayList<>();
        for (CompactionInfo.Holder holder : getActiveCompactionsForTable(cfs)) {
            if (holder.getCompactionInfo().getSSTables().stream().anyMatch(sstable -> sstable.intersects(Collections.singleton(range)) && !sstable.isRepaired() && !sstable.isPendingRepair())) {
                assertTrue(holder.isStopRequested());
                for (TestCompactionTask tct : tcts) if (tct.sstables.equals(holder.getCompactionInfo().getSSTables()))
                    toAbort.add(tct);
            } else
                assertFalse(holder.isStopRequested());
        }
        assertEquals(2, toAbort.size());
        toAbort.forEach(TestCompactionTask::abort);
        fut.get();
        for (SSTableReader sstable : sstables) assertTrue(!sstable.intersects(Collections.singleton(range)) || sstable.isPendingRepair());
    } finally {
        tcts.forEach(TestCompactionTask::abort);
        nonAffectedTcts.forEach(TestCompactionTask::abort);
    }
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) RangesAtEndpoint(org.apache.cassandra.locator.RangesAtEndpoint) ArrayList(java.util.ArrayList) Token(org.apache.cassandra.dht.Token) Range(org.apache.cassandra.dht.Range) Replica(org.apache.cassandra.locator.Replica) PendingAntiCompaction(org.apache.cassandra.db.repair.PendingAntiCompaction) SSTableReader(org.apache.cassandra.io.sstable.format.SSTableReader) ColumnFamilyStore(org.apache.cassandra.db.ColumnFamilyStore) UUID(java.util.UUID) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 7 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class StorageProxy method proposePaxos.

/**
 * Propose the {@param proposal} accoding to the {@param replicaPlan}.
 * When {@param backoffIfPartial} is true, the proposer backs off when seeing the proposal being accepted by some but not a quorum.
 * The result of the cooresponding CAS in uncertain as the accepted proposal may or may not be spread to other nodes in later rounds.
 */
private static boolean proposePaxos(Commit proposal, ReplicaPlan.ForPaxosWrite replicaPlan, boolean backoffIfPartial, long queryStartNanoTime) throws WriteTimeoutException, CasWriteUnknownResultException {
    ProposeCallback callback = new ProposeCallback(replicaPlan.contacts().size(), replicaPlan.requiredParticipants(), !backoffIfPartial, replicaPlan.consistencyLevel(), queryStartNanoTime);
    Message<Commit> message = Message.out(PAXOS_PROPOSE_REQ, proposal);
    for (Replica replica : replicaPlan.contacts()) {
        if (replica.isSelf()) {
            PAXOS_PROPOSE_REQ.stage.execute(() -> {
                try {
                    Message<Boolean> response = message.responseWith(doPropose(proposal));
                    callback.onResponse(response);
                } catch (Exception ex) {
                    logger.error("Failed paxos propose locally", ex);
                }
            });
        } else {
            MessagingService.instance().sendWithCallback(message, replica.endpoint(), callback);
        }
    }
    callback.await();
    if (callback.isSuccessful())
        return true;
    if (backoffIfPartial && !callback.isFullyRefused())
        throw new CasWriteUnknownResultException(replicaPlan.consistencyLevel(), callback.getAcceptCount(), replicaPlan.requiredParticipants());
    return false;
}
Also used : Replica(org.apache.cassandra.locator.Replica) CasWriteUnknownResultException(org.apache.cassandra.exceptions.CasWriteUnknownResultException) OverloadedException(org.apache.cassandra.exceptions.OverloadedException) ReadAbortException(org.apache.cassandra.exceptions.ReadAbortException) RejectException(org.apache.cassandra.db.RejectException) CasWriteTimeoutException(org.apache.cassandra.exceptions.CasWriteTimeoutException) WriteFailureException(org.apache.cassandra.exceptions.WriteFailureException) InvalidRequestException(org.apache.cassandra.exceptions.InvalidRequestException) RequestTimeoutException(org.apache.cassandra.exceptions.RequestTimeoutException) ReadTimeoutException(org.apache.cassandra.exceptions.ReadTimeoutException) CasWriteUnknownResultException(org.apache.cassandra.exceptions.CasWriteUnknownResultException) TimeoutException(java.util.concurrent.TimeoutException) UnavailableException(org.apache.cassandra.exceptions.UnavailableException) WriteTimeoutException(org.apache.cassandra.exceptions.WriteTimeoutException) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) TombstoneOverwhelmingException(org.apache.cassandra.db.filter.TombstoneOverwhelmingException) RequestFailureException(org.apache.cassandra.exceptions.RequestFailureException) IsBootstrappingException(org.apache.cassandra.exceptions.IsBootstrappingException) ReadFailureException(org.apache.cassandra.exceptions.ReadFailureException)

Example 8 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class StorageProxy method sendToHintedReplicas.

/**
 * Send the mutations to the right targets, write it locally if it corresponds or writes a hint when the node
 * is not available.
 *
 * Note about hints:
 * <pre>
 * {@code
 * | Hinted Handoff | Consist. Level |
 * | on             |       >=1      | --> wait for hints. We DO NOT notify the handler with handler.response() for hints;
 * | on             |       ANY      | --> wait for hints. Responses count towards consistency.
 * | off            |       >=1      | --> DO NOT fire hints. And DO NOT wait for them to complete.
 * | off            |       ANY      | --> DO NOT fire hints. And DO NOT wait for them to complete.
 * }
 * </pre>
 *
 * @throws OverloadedException if the hints cannot be written/enqueued
 */
public static void sendToHintedReplicas(final Mutation mutation, ReplicaPlan.ForTokenWrite plan, AbstractWriteResponseHandler<IMutation> responseHandler, String localDataCenter, Stage stage) throws OverloadedException {
    // this dc replicas:
    Collection<Replica> localDc = null;
    // extra-datacenter replicas, grouped by dc
    Map<String, Collection<Replica>> dcGroups = null;
    // only need to create a Message for non-local writes
    Message<Mutation> message = null;
    boolean insertLocal = false;
    Replica localReplica = null;
    Collection<Replica> endpointsToHint = null;
    List<InetAddressAndPort> backPressureHosts = null;
    for (Replica destination : plan.contacts()) {
        checkHintOverload(destination);
        if (plan.isAlive(destination)) {
            if (destination.isSelf()) {
                insertLocal = true;
                localReplica = destination;
            } else {
                // belongs on a different server
                if (message == null)
                    message = Message.outWithFlag(MUTATION_REQ, mutation, MessageFlag.CALL_BACK_ON_FAILURE);
                String dc = DatabaseDescriptor.getEndpointSnitch().getDatacenter(destination);
                // (1.1 knows how to forward old-style String message IDs; updated to int in 2.0)
                if (localDataCenter.equals(dc)) {
                    if (localDc == null)
                        localDc = new ArrayList<>(plan.contacts().size());
                    localDc.add(destination);
                } else {
                    if (dcGroups == null)
                        dcGroups = new HashMap<>();
                    Collection<Replica> messages = dcGroups.get(dc);
                    if (messages == null)
                        // most DCs will have <= 3 replicas
                        messages = dcGroups.computeIfAbsent(dc, (v) -> new ArrayList<>(3));
                    messages.add(destination);
                }
                if (backPressureHosts == null)
                    backPressureHosts = new ArrayList<>(plan.contacts().size());
                backPressureHosts.add(destination.endpoint());
            }
        } else {
            // Immediately mark the response as expired since the request will not be sent
            responseHandler.expired();
            if (shouldHint(destination)) {
                if (endpointsToHint == null)
                    endpointsToHint = new ArrayList<>();
                endpointsToHint.add(destination);
            }
        }
    }
    if (endpointsToHint != null)
        submitHint(mutation, EndpointsForToken.copyOf(mutation.key().getToken(), endpointsToHint), responseHandler);
    if (insertLocal) {
        Preconditions.checkNotNull(localReplica);
        performLocally(stage, localReplica, mutation::apply, responseHandler);
    }
    if (localDc != null) {
        for (Replica destination : localDc) MessagingService.instance().sendWriteWithCallback(message, destination, responseHandler, true);
    }
    if (dcGroups != null) {
        // for each datacenter, send the message to one node to relay the write to other replicas
        for (Collection<Replica> dcTargets : dcGroups.values()) sendMessagesToNonlocalDC(message, EndpointsForToken.copyOf(mutation.key().getToken(), dcTargets), responseHandler);
    }
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Replica(org.apache.cassandra.locator.Replica) Collection(java.util.Collection) Mutation(org.apache.cassandra.db.Mutation) CounterMutation(org.apache.cassandra.db.CounterMutation) IMutation(org.apache.cassandra.db.IMutation)

Example 9 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class RangeRelocator method computeRanges.

private static void computeRanges(RangesAtEndpoint srcRanges, RangesAtEndpoint dstRanges, RangesAtEndpoint.Builder ranges) {
    for (Replica src : srcRanges) {
        boolean intersect = false;
        RangesAtEndpoint remainder = null;
        for (Replica dst : dstRanges) {
            logger.debug("Comparing {} and {}", src, dst);
            // Stream the full range if there's no intersection
            if (!src.intersectsOnRange(dst))
                continue;
            // If we're transitioning from full to transient
            if (src.isFull() && dst.isTransient())
                continue;
            if (remainder == null) {
                remainder = src.subtractIgnoreTransientStatus(dst.range());
            } else {
                // Re-subtract ranges to avoid overstreaming in cases when the single range is split or merged
                RangesAtEndpoint.Builder newRemainder = new RangesAtEndpoint.Builder(remainder.endpoint());
                for (Replica replica : remainder) newRemainder.addAll(replica.subtractIgnoreTransientStatus(dst.range()));
                remainder = newRemainder.build();
            }
            intersect = true;
        }
        if (!intersect) {
            assert remainder == null;
            logger.debug("    Doesn't intersect adding {}", src);
            // should stream whole old range
            ranges.add(src);
        } else {
            ranges.addAll(remainder);
            logger.debug("    Intersects adding {}", remainder);
        }
    }
}
Also used : RangesAtEndpoint(org.apache.cassandra.locator.RangesAtEndpoint) Replica(org.apache.cassandra.locator.Replica) EndpointsByReplica(org.apache.cassandra.locator.EndpointsByReplica)

Example 10 with Replica

use of org.apache.cassandra.locator.Replica in project cassandra by apache.

the class RangeStreamer method getOptimizedWorkMap.

/**
 * Optimized version that also outputs the final work map
 */
private static Multimap<InetAddressAndPort, FetchReplica> getOptimizedWorkMap(EndpointsByReplica rangesWithSources, Collection<SourceFilter> sourceFilters, String keyspace) {
    // For now we just aren't going to use the optimized range fetch map with transient replication to shrink
    // the surface area to test and introduce bugs.
    // In the future it's possible we could run it twice once for full ranges with only full replicas
    // and once with transient ranges and all replicas. Then merge the result.
    EndpointsByRange.Builder unwrapped = new EndpointsByRange.Builder();
    for (Map.Entry<Replica, Replica> entry : rangesWithSources.flattenEntries()) {
        Replicas.temporaryAssertFull(entry.getValue());
        unwrapped.put(entry.getKey().range(), entry.getValue());
    }
    EndpointsByRange unwrappedView = unwrapped.build();
    RangeFetchMapCalculator calculator = new RangeFetchMapCalculator(unwrappedView, sourceFilters, keyspace);
    Multimap<InetAddressAndPort, Range<Token>> rangeFetchMapMap = calculator.getRangeFetchMap();
    logger.info("Output from RangeFetchMapCalculator for keyspace {}", keyspace);
    validateRangeFetchMap(unwrappedView, rangeFetchMapMap, keyspace);
    // Need to rewrap as Replicas
    Multimap<InetAddressAndPort, FetchReplica> wrapped = HashMultimap.create();
    for (Map.Entry<InetAddressAndPort, Range<Token>> entry : rangeFetchMapMap.entries()) {
        Replica toFetch = null;
        for (Replica r : rangesWithSources.keySet()) {
            if (r.range().equals(entry.getValue())) {
                if (toFetch != null)
                    throw new AssertionError(String.format("There shouldn't be multiple replicas for range %s, replica %s and %s here", r.range(), r, toFetch));
                toFetch = r;
            }
        }
        if (toFetch == null)
            throw new AssertionError("Shouldn't be possible for the Replica we fetch to be null here");
        // Committing the cardinal sin of synthesizing a Replica, but it's ok because we assert earlier all of them
        // are full and optimized range fetch map doesn't support transient replication yet.
        wrapped.put(entry.getKey(), new FetchReplica(toFetch, fullReplica(entry.getKey(), entry.getValue())));
    }
    return wrapped;
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) EndpointsByRange(org.apache.cassandra.locator.EndpointsByRange) EndpointsByRange(org.apache.cassandra.locator.EndpointsByRange) EndpointsForRange(org.apache.cassandra.locator.EndpointsForRange) Replica.fullReplica(org.apache.cassandra.locator.Replica.fullReplica) Replica(org.apache.cassandra.locator.Replica) EndpointsByReplica(org.apache.cassandra.locator.EndpointsByReplica) Map(java.util.Map) HashMap(java.util.HashMap)

Aggregations

Replica (org.apache.cassandra.locator.Replica)69 InetAddressAndPort (org.apache.cassandra.locator.InetAddressAndPort)24 Token (org.apache.cassandra.dht.Token)22 Test (org.junit.Test)20 HashMap (java.util.HashMap)18 Mutation (org.apache.cassandra.db.Mutation)15 EndpointsByReplica (org.apache.cassandra.locator.EndpointsByReplica)15 Range (org.apache.cassandra.dht.Range)14 RangesAtEndpoint (org.apache.cassandra.locator.RangesAtEndpoint)13 TokenMetadata (org.apache.cassandra.locator.TokenMetadata)12 Keyspace (org.apache.cassandra.db.Keyspace)11 AbstractReplicationStrategy (org.apache.cassandra.locator.AbstractReplicationStrategy)11 EndpointsForRange (org.apache.cassandra.locator.EndpointsForRange)11 Replica.fullReplica (org.apache.cassandra.locator.Replica.fullReplica)10 Collection (java.util.Collection)8 Map (java.util.Map)8 EndpointsForToken (org.apache.cassandra.locator.EndpointsForToken)8 ReplicaPlan (org.apache.cassandra.locator.ReplicaPlan)8 ArrayList (java.util.ArrayList)7 HashSet (java.util.HashSet)6