Search in sources :

Example 1 with Hint

use of org.apache.cassandra.hints.Hint in project cassandra by apache.

the class StorageProxy method shouldHint.

/**
 * Determines whether a hint should be stored or not.
 * It rejects early if any of the condition is met:
 * - Hints disabled entirely or for the belonging datacetner of the replica
 * - The replica is transient or is the self node
 * - The replica is no longer part of the ring
 * - The hint window has expired
 * - The hints have reached to the size limit for the node
 * Otherwise, it permits.
 *
 * @param replica, the replica for the hint
 * @param tryEnablePersistentWindow, true to consider hint_window_persistent_enabled; otherwise, ignores
 * @return true to permit or false to reject hint
 */
public static boolean shouldHint(Replica replica, boolean tryEnablePersistentWindow) {
    if (!DatabaseDescriptor.hintedHandoffEnabled() || replica.isTransient() || replica.isSelf())
        return false;
    Set<String> disabledDCs = DatabaseDescriptor.hintedHandoffDisabledDCs();
    if (!disabledDCs.isEmpty()) {
        final String dc = DatabaseDescriptor.getEndpointSnitch().getDatacenter(replica);
        if (disabledDCs.contains(dc)) {
            Tracing.trace("Not hinting {} since its data center {} has been disabled {}", replica, dc, disabledDCs);
            return false;
        }
    }
    InetAddressAndPort endpoint = replica.endpoint();
    int maxHintWindow = DatabaseDescriptor.getMaxHintWindow();
    long endpointDowntime = Gossiper.instance.getEndpointDowntime(endpoint);
    boolean hintWindowExpired = endpointDowntime > maxHintWindow;
    UUID hostIdForEndpoint = StorageService.instance.getHostIdForEndpoint(endpoint);
    if (hostIdForEndpoint == null) {
        Tracing.trace("Discarding hint for endpoint not part of ring: {}", endpoint);
        return false;
    }
    // if persisting hints window, hintWindowExpired might be updated according to the timestamp of the earliest hint
    if (tryEnablePersistentWindow && !hintWindowExpired && DatabaseDescriptor.hintWindowPersistentEnabled()) {
        long earliestHint = HintsService.instance.getEarliestHintForHost(hostIdForEndpoint);
        hintWindowExpired = Clock.Global.currentTimeMillis() - maxHintWindow > earliestHint;
        if (hintWindowExpired)
            Tracing.trace("Not hinting {} for which there is the earliest hint stored at {}", replica, earliestHint);
    }
    if (hintWindowExpired) {
        HintsService.instance.metrics.incrPastWindow(endpoint);
        Tracing.trace("Not hinting {} which has been down {} ms", endpoint, endpointDowntime);
        return false;
    }
    long maxHintsSize = DatabaseDescriptor.getMaxHintsSizePerHost();
    long actualTotalHintsSize = HintsService.instance.getTotalHintsSize(hostIdForEndpoint);
    boolean hasHintsReachedMaxSize = maxHintsSize > 0 && actualTotalHintsSize > maxHintsSize;
    if (hasHintsReachedMaxSize) {
        Tracing.trace("Not hinting {} which has reached to the max hints size {} bytes on disk. The actual hints size on disk: {}", endpoint, maxHintsSize, actualTotalHintsSize);
        return false;
    }
    return true;
}
Also used : InetAddressAndPort(org.apache.cassandra.locator.InetAddressAndPort) UUID(java.util.UUID) Hint(org.apache.cassandra.hints.Hint)

Example 2 with Hint

use of org.apache.cassandra.hints.Hint in project cassandra by apache.

the class StorageProxy method sendToHintedEndpoints.

/**
     * Send the mutations to the right targets, write it locally if it corresponds or writes a hint when the node
     * is not available.
     *
     * Note about hints:
     * <pre>
     * {@code
     * | Hinted Handoff | Consist. Level |
     * | on             |       >=1      | --> wait for hints. We DO NOT notify the handler with handler.response() for hints;
     * | on             |       ANY      | --> wait for hints. Responses count towards consistency.
     * | off            |       >=1      | --> DO NOT fire hints. And DO NOT wait for them to complete.
     * | off            |       ANY      | --> DO NOT fire hints. And DO NOT wait for them to complete.
     * }
     * </pre>
     *
     * @throws OverloadedException if the hints cannot be written/enqueued
     */
public static void sendToHintedEndpoints(final Mutation mutation, Iterable<InetAddress> targets, AbstractWriteResponseHandler<IMutation> responseHandler, String localDataCenter, Stage stage) throws OverloadedException {
    int targetsSize = Iterables.size(targets);
    // this dc replicas:
    Collection<InetAddress> localDc = null;
    // extra-datacenter replicas, grouped by dc
    Map<String, Collection<InetAddress>> dcGroups = null;
    // only need to create a Message for non-local writes
    MessageOut<Mutation> message = null;
    boolean insertLocal = false;
    ArrayList<InetAddress> endpointsToHint = null;
    List<InetAddress> backPressureHosts = null;
    for (InetAddress destination : targets) {
        checkHintOverload(destination);
        if (FailureDetector.instance.isAlive(destination)) {
            if (canDoLocalRequest(destination)) {
                insertLocal = true;
            } else {
                // belongs on a different server
                if (message == null)
                    message = mutation.createMessage();
                String dc = DatabaseDescriptor.getEndpointSnitch().getDatacenter(destination);
                // (1.1 knows how to forward old-style String message IDs; updated to int in 2.0)
                if (localDataCenter.equals(dc)) {
                    if (localDc == null)
                        localDc = new ArrayList<>(targetsSize);
                    localDc.add(destination);
                } else {
                    Collection<InetAddress> messages = (dcGroups != null) ? dcGroups.get(dc) : null;
                    if (messages == null) {
                        // most DCs will have <= 3 replicas
                        messages = new ArrayList<>(3);
                        if (dcGroups == null)
                            dcGroups = new HashMap<>();
                        dcGroups.put(dc, messages);
                    }
                    messages.add(destination);
                }
                if (backPressureHosts == null)
                    backPressureHosts = new ArrayList<>(targetsSize);
                backPressureHosts.add(destination);
            }
        } else {
            if (shouldHint(destination)) {
                if (endpointsToHint == null)
                    endpointsToHint = new ArrayList<>(targetsSize);
                endpointsToHint.add(destination);
            }
        }
    }
    if (backPressureHosts != null)
        MessagingService.instance().applyBackPressure(backPressureHosts, responseHandler.currentTimeout());
    if (endpointsToHint != null)
        submitHint(mutation, endpointsToHint, responseHandler);
    if (insertLocal)
        performLocally(stage, Optional.of(mutation), mutation::apply, responseHandler);
    if (localDc != null) {
        for (InetAddress destination : localDc) MessagingService.instance().sendRR(message, destination, responseHandler, true);
    }
    if (dcGroups != null) {
        // for each datacenter, send the message to one node to relay the write to other replicas
        for (Collection<InetAddress> dcTargets : dcGroups.values()) sendMessagesToNonlocalDC(message, dcTargets, responseHandler);
    }
}
Also used : Hint(org.apache.cassandra.hints.Hint) InetAddress(java.net.InetAddress)

Example 3 with Hint

use of org.apache.cassandra.hints.Hint in project cassandra by apache.

the class HintsServiceMetricsTest method testHintsServiceMetrics.

@Test
public void testHintsServiceMetrics() throws Exception {
    // setup a 3-node cluster with a bytebuddy injection that makes the writting of some hints to fail
    try (Cluster cluster = builder().withNodes(3).withConfig(config -> config.with(NETWORK, GOSSIP, NATIVE_PROTOCOL)).withInstanceInitializer(FailHints::install).start()) {
        // setup a message filter to drop some of the hint request messages from node1
        AtomicInteger hintsNode2 = new AtomicInteger();
        AtomicInteger hintsNode3 = new AtomicInteger();
        cluster.filters().verbs(Verb.HINT_REQ.id).from(1).messagesMatching((from, to, message) -> (to == 2 && hintsNode2.incrementAndGet() <= NUM_TIMEOUTS_PER_NODE) || (to == 3 && hintsNode3.incrementAndGet() <= NUM_TIMEOUTS_PER_NODE)).drop();
        // setup a message filter to drop mutations requests from node1, so it creates hints for those mutations
        AtomicBoolean dropWritesForNode2 = new AtomicBoolean(false);
        AtomicBoolean dropWritesForNode3 = new AtomicBoolean(false);
        cluster.filters().verbs(Verb.MUTATION_REQ.id).from(1).messagesMatching((from, to, message) -> (to == 2 && dropWritesForNode2.get()) || (to == 3 && dropWritesForNode3.get())).drop();
        // fix under replicated keyspaces so they don't produce hint requests while we are dropping mutations
        fixDistributedSchemas(cluster);
        cluster.schemaChange(withKeyspace("CREATE KEYSPACE %s WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 3}"));
        cluster.schemaChange(withKeyspace("CREATE TABLE %s.t (k int PRIMARY KEY, v int)"));
        ICoordinator coordinator = cluster.coordinator(1);
        IInvokableInstance node1 = cluster.get(1);
        IInvokableInstance node2 = cluster.get(2);
        IInvokableInstance node3 = cluster.get(3);
        // write the first half of the rows with the second node dropping mutation requests,
        // so some hints will be created for that node
        dropWritesForNode2.set(true);
        for (int i = 0; i < NUM_ROWS / 2; i++) coordinator.execute(withKeyspace("INSERT INTO %s.t (k, v) VALUES (?, ?)"), QUORUM, i, i);
        dropWritesForNode2.set(false);
        // write the second half of the rows with the third node dropping mutations requests,
        // so some hints will be created for that node
        dropWritesForNode3.set(true);
        for (int i = NUM_ROWS / 2; i < NUM_ROWS; i++) coordinator.execute(withKeyspace("INSERT INTO %s.t (k, v) VALUES (?, ?)"), QUORUM, i, i);
        dropWritesForNode3.set(false);
        // wait until all the hints have been successfully applied to the nodes that have been dropping mutations
        waitUntilAsserted(() -> assertThat(countRows(node2)).isEqualTo(countRows(node3)).isEqualTo(NUM_ROWS));
        // Verify the metrics for the coordinator node, which is the only one actually sending hints.
        // The hint delivery errors that we have injected should have made the service try to send them again.
        // These retries are done periodically and in pages, so the retries may send again some of the hints that
        // were already successfully sent. This way, there may be more succeeded hints than actual hints/rows.
        waitUntilAsserted(() -> assertThat(countHintsSucceeded(node1)).isGreaterThanOrEqualTo(NUM_ROWS));
        waitUntilAsserted(() -> assertThat(countHintsFailed(node1)).isEqualTo(NUM_FAILURES_PER_NODE * 2));
        waitUntilAsserted(() -> assertThat(countHintsTimedOut(node1)).isEqualTo(NUM_TIMEOUTS_PER_NODE * 2));
        // verify delay metrics
        long numGlobalDelays = countGlobalDelays(node1);
        assertThat(numGlobalDelays).isGreaterThanOrEqualTo(NUM_ROWS);
        assertThat(countEndpointDelays(node1, node1)).isEqualTo(0);
        assertThat(countEndpointDelays(node1, node2)).isGreaterThan(0).isLessThanOrEqualTo(numGlobalDelays);
        assertThat(countEndpointDelays(node1, node3)).isGreaterThan(0).isLessThanOrEqualTo(numGlobalDelays);
        assertThat(countEndpointDelays(node1, node2) + countEndpointDelays(node1, node3)).isGreaterThanOrEqualTo(numGlobalDelays);
        // verify that the metrics for the not-coordinator nodes are zero
        for (IInvokableInstance node : Arrays.asList(node2, node3)) {
            assertThat(countHintsSucceeded(node)).isEqualTo(0);
            assertThat(countHintsFailed(node)).isEqualTo(0);
            assertThat(countHintsTimedOut(node)).isEqualTo(0);
            assertThat(countGlobalDelays(node)).isEqualTo(0);
            cluster.forEach(target -> assertThat(countEndpointDelays(node, target)).isEqualTo(0));
        }
    }
}
Also used : Arrays(java.util.Arrays) MethodDelegation(net.bytebuddy.implementation.MethodDelegation) ByteBuddy(net.bytebuddy.ByteBuddy) ElementMatchers.takesArguments(net.bytebuddy.matcher.ElementMatchers.takesArguments) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Callable(java.util.concurrent.Callable) MINUTES(java.util.concurrent.TimeUnit.MINUTES) NATIVE_PROTOCOL(org.apache.cassandra.distributed.api.Feature.NATIVE_PROTOCOL) ThrowingRunnable(org.awaitility.core.ThrowingRunnable) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ICoordinator(org.apache.cassandra.distributed.api.ICoordinator) Metrics(org.apache.cassandra.distributed.shared.Metrics) TestBaseImpl(org.apache.cassandra.distributed.test.TestBaseImpl) AssertionsForClassTypes.assertThat(org.assertj.core.api.AssertionsForClassTypes.assertThat) QUORUM(org.apache.cassandra.distributed.api.ConsistencyLevel.QUORUM) NETWORK(org.apache.cassandra.distributed.api.Feature.NETWORK) Awaitility.await(org.awaitility.Awaitility.await) ElementMatchers.named(net.bytebuddy.matcher.ElementMatchers.named) HintsServiceMetrics(org.apache.cassandra.metrics.HintsServiceMetrics) Test(org.junit.Test) Hint(org.apache.cassandra.hints.Hint) Verb(org.apache.cassandra.net.Verb) ClassLoadingStrategy(net.bytebuddy.dynamic.loading.ClassLoadingStrategy) SuperCall(net.bytebuddy.implementation.bind.annotation.SuperCall) IInvokableInstance(org.apache.cassandra.distributed.api.IInvokableInstance) Future(org.apache.cassandra.utils.concurrent.Future) Cluster(org.apache.cassandra.distributed.Cluster) SECONDS(java.util.concurrent.TimeUnit.SECONDS) GOSSIP(org.apache.cassandra.distributed.api.Feature.GOSSIP) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) ICoordinator(org.apache.cassandra.distributed.api.ICoordinator) IInvokableInstance(org.apache.cassandra.distributed.api.IInvokableInstance) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Cluster(org.apache.cassandra.distributed.Cluster) Hint(org.apache.cassandra.hints.Hint) Test(org.junit.Test)

Example 4 with Hint

use of org.apache.cassandra.hints.Hint in project cassandra by apache.

the class StorageProxy method mutate.

/**
 * Use this method to have these Mutations applied
 * across all replicas. This method will take care
 * of the possibility of a replica being down and hint
 * the data across to some other replica.
 *
 * @param mutations the mutations to be applied across the replicas
 * @param consistencyLevel the consistency level for the operation
 * @param queryStartNanoTime the value of nanoTime() when the query started to be processed
 */
public static void mutate(List<? extends IMutation> mutations, ConsistencyLevel consistencyLevel, long queryStartNanoTime) throws UnavailableException, OverloadedException, WriteTimeoutException, WriteFailureException {
    Tracing.trace("Determining replicas for mutation");
    final String localDataCenter = DatabaseDescriptor.getEndpointSnitch().getLocalDatacenter();
    long startTime = nanoTime();
    List<AbstractWriteResponseHandler<IMutation>> responseHandlers = new ArrayList<>(mutations.size());
    WriteType plainWriteType = mutations.size() <= 1 ? WriteType.SIMPLE : WriteType.UNLOGGED_BATCH;
    try {
        for (IMutation mutation : mutations) {
            if (hasLocalMutation(mutation))
                writeMetrics.localRequests.mark();
            else
                writeMetrics.remoteRequests.mark();
            if (mutation instanceof CounterMutation)
                responseHandlers.add(mutateCounter((CounterMutation) mutation, localDataCenter, queryStartNanoTime));
            else
                responseHandlers.add(performWrite(mutation, consistencyLevel, localDataCenter, standardWritePerformer, null, plainWriteType, queryStartNanoTime));
        }
        // upgrade to full quorum any failed cheap quorums
        for (int i = 0; i < mutations.size(); ++i) {
            if (// at the moment, only non-counter writes support cheap quorums
            !(mutations.get(i) instanceof CounterMutation))
                responseHandlers.get(i).maybeTryAdditionalReplicas(mutations.get(i), standardWritePerformer, localDataCenter);
        }
        // wait for writes.  throws TimeoutException if necessary
        for (AbstractWriteResponseHandler<IMutation> responseHandler : responseHandlers) responseHandler.get();
    } catch (WriteTimeoutException | WriteFailureException ex) {
        if (consistencyLevel == ConsistencyLevel.ANY) {
            hintMutations(mutations);
        } else {
            if (ex instanceof WriteFailureException) {
                writeMetrics.failures.mark();
                writeMetricsForLevel(consistencyLevel).failures.mark();
                WriteFailureException fe = (WriteFailureException) ex;
                Tracing.trace("Write failure; received {} of {} required replies, failed {} requests", fe.received, fe.blockFor, fe.failureReasonByEndpoint.size());
            } else {
                writeMetrics.timeouts.mark();
                writeMetricsForLevel(consistencyLevel).timeouts.mark();
                WriteTimeoutException te = (WriteTimeoutException) ex;
                Tracing.trace("Write timeout; received {} of {} required replies", te.received, te.blockFor);
            }
            throw ex;
        }
    } catch (UnavailableException e) {
        writeMetrics.unavailables.mark();
        writeMetricsForLevel(consistencyLevel).unavailables.mark();
        Tracing.trace("Unavailable");
        throw e;
    } catch (OverloadedException e) {
        writeMetrics.unavailables.mark();
        writeMetricsForLevel(consistencyLevel).unavailables.mark();
        Tracing.trace("Overloaded");
        throw e;
    } finally {
        long latency = nanoTime() - startTime;
        writeMetrics.addNano(latency);
        writeMetricsForLevel(consistencyLevel).addNano(latency);
        updateCoordinatorWriteLatencyTableMetric(mutations, latency);
    }
}
Also used : IMutation(org.apache.cassandra.db.IMutation) WriteType(org.apache.cassandra.db.WriteType) ArrayList(java.util.ArrayList) UnavailableException(org.apache.cassandra.exceptions.UnavailableException) OverloadedException(org.apache.cassandra.exceptions.OverloadedException) Hint(org.apache.cassandra.hints.Hint) CounterMutation(org.apache.cassandra.db.CounterMutation) CasWriteTimeoutException(org.apache.cassandra.exceptions.CasWriteTimeoutException) WriteTimeoutException(org.apache.cassandra.exceptions.WriteTimeoutException) WriteFailureException(org.apache.cassandra.exceptions.WriteFailureException)

Aggregations

Hint (org.apache.cassandra.hints.Hint)4 InetAddress (java.net.InetAddress)1 ArrayList (java.util.ArrayList)1 Arrays (java.util.Arrays)1 UUID (java.util.UUID)1 Callable (java.util.concurrent.Callable)1 MINUTES (java.util.concurrent.TimeUnit.MINUTES)1 SECONDS (java.util.concurrent.TimeUnit.SECONDS)1 AtomicBoolean (java.util.concurrent.atomic.AtomicBoolean)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 ByteBuddy (net.bytebuddy.ByteBuddy)1 ClassLoadingStrategy (net.bytebuddy.dynamic.loading.ClassLoadingStrategy)1 MethodDelegation (net.bytebuddy.implementation.MethodDelegation)1 SuperCall (net.bytebuddy.implementation.bind.annotation.SuperCall)1 ElementMatchers.named (net.bytebuddy.matcher.ElementMatchers.named)1 ElementMatchers.takesArguments (net.bytebuddy.matcher.ElementMatchers.takesArguments)1 CounterMutation (org.apache.cassandra.db.CounterMutation)1 IMutation (org.apache.cassandra.db.IMutation)1 WriteType (org.apache.cassandra.db.WriteType)1 Cluster (org.apache.cassandra.distributed.Cluster)1