Search in sources :

Example 16 with WriteTimeoutException

use of org.apache.cassandra.exceptions.WriteTimeoutException in project cassandra by apache.

the class StorageProxy method mutateMV.

/**
 * Use this method to have these Mutations applied
 * across all replicas.
 *
 * @param mutations the mutations to be applied across the replicas
 * @param writeCommitLog if commitlog should be written
 * @param baseComplete time from epoch in ms that the local base mutation was(or will be) completed
 * @param queryStartNanoTime the value of nanoTime() when the query started to be processed
 */
public static void mutateMV(ByteBuffer dataKey, Collection<Mutation> mutations, boolean writeCommitLog, AtomicLong baseComplete, long queryStartNanoTime) throws UnavailableException, OverloadedException, WriteTimeoutException {
    Tracing.trace("Determining replicas for mutation");
    final String localDataCenter = DatabaseDescriptor.getEndpointSnitch().getLocalDatacenter();
    long startTime = nanoTime();
    try {
        // if we haven't joined the ring, write everything to batchlog because paired replicas may be stale
        final UUID batchUUID = UUIDGen.getTimeUUID();
        if (StorageService.instance.isStarting() || StorageService.instance.isJoining() || StorageService.instance.isMoving()) {
            BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), mutations), writeCommitLog);
        } else {
            List<WriteResponseHandlerWrapper> wrappers = new ArrayList<>(mutations.size());
            // non-local mutations rely on the base mutation commit-log entry for eventual consistency
            Set<Mutation> nonLocalMutations = new HashSet<>(mutations);
            Token baseToken = StorageService.instance.getTokenMetadata().partitioner.getToken(dataKey);
            ConsistencyLevel consistencyLevel = ConsistencyLevel.ONE;
            // Since the base -> view replication is 1:1 we only need to store the BL locally
            ReplicaPlan.ForTokenWrite replicaPlan = ReplicaPlans.forLocalBatchlogWrite();
            BatchlogCleanup cleanup = new BatchlogCleanup(mutations.size(), () -> asyncRemoveFromBatchlog(replicaPlan, batchUUID));
            // add a handler for each mutation - includes checking availability, but doesn't initiate any writes, yet
            for (Mutation mutation : mutations) {
                if (hasLocalMutation(mutation))
                    writeMetrics.localRequests.mark();
                else
                    writeMetrics.remoteRequests.mark();
                String keyspaceName = mutation.getKeyspaceName();
                Token tk = mutation.key().getToken();
                AbstractReplicationStrategy replicationStrategy = Keyspace.open(keyspaceName).getReplicationStrategy();
                Optional<Replica> pairedEndpoint = ViewUtils.getViewNaturalEndpoint(replicationStrategy, baseToken, tk);
                EndpointsForToken pendingReplicas = StorageService.instance.getTokenMetadata().pendingEndpointsForToken(tk, keyspaceName);
                // if there are no paired endpoints there are probably range movements going on, so we write to the local batchlog to replay later
                if (!pairedEndpoint.isPresent()) {
                    if (pendingReplicas.isEmpty())
                        logger.warn("Received base materialized view mutation for key {} that does not belong " + "to this node. There is probably a range movement happening (move or decommission)," + "but this node hasn't updated its ring metadata yet. Adding mutation to " + "local batchlog to be replayed later.", mutation.key());
                    continue;
                }
                // write so the view mutation is sent to the pending endpoint
                if (pairedEndpoint.get().isSelf() && StorageService.instance.isJoined() && pendingReplicas.isEmpty()) {
                    try {
                        mutation.apply(writeCommitLog);
                        nonLocalMutations.remove(mutation);
                        // won't trigger cleanup
                        cleanup.decrement();
                    } catch (Exception exc) {
                        logger.error("Error applying local view update: Mutation (keyspace {}, tables {}, partition key {})", mutation.getKeyspaceName(), mutation.getTableIds(), mutation.key());
                        throw exc;
                    }
                } else {
                    ReplicaLayout.ForTokenWrite liveAndDown = ReplicaLayout.forTokenWrite(replicationStrategy, EndpointsForToken.of(tk, pairedEndpoint.get()), pendingReplicas);
                    wrappers.add(wrapViewBatchResponseHandler(mutation, consistencyLevel, consistencyLevel, liveAndDown, baseComplete, WriteType.BATCH, cleanup, queryStartNanoTime));
                }
            }
            // Apply to local batchlog memtable in this thread
            if (!nonLocalMutations.isEmpty())
                BatchlogManager.store(Batch.createLocal(batchUUID, FBUtilities.timestampMicros(), nonLocalMutations), writeCommitLog);
            // Perform remote writes
            if (!wrappers.isEmpty())
                asyncWriteBatchedMutations(wrappers, localDataCenter, Stage.VIEW_MUTATION);
        }
    } finally {
        viewWriteMetrics.addNano(nanoTime() - startTime);
    }
}
Also used : EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) ReplicaPlan(org.apache.cassandra.locator.ReplicaPlan) ArrayList(java.util.ArrayList) EndpointsForToken(org.apache.cassandra.locator.EndpointsForToken) Token(org.apache.cassandra.dht.Token) Replica(org.apache.cassandra.locator.Replica) OverloadedException(org.apache.cassandra.exceptions.OverloadedException) ReadAbortException(org.apache.cassandra.exceptions.ReadAbortException) RejectException(org.apache.cassandra.db.RejectException) CasWriteTimeoutException(org.apache.cassandra.exceptions.CasWriteTimeoutException) WriteFailureException(org.apache.cassandra.exceptions.WriteFailureException) InvalidRequestException(org.apache.cassandra.exceptions.InvalidRequestException) RequestTimeoutException(org.apache.cassandra.exceptions.RequestTimeoutException) ReadTimeoutException(org.apache.cassandra.exceptions.ReadTimeoutException) CasWriteUnknownResultException(org.apache.cassandra.exceptions.CasWriteUnknownResultException) TimeoutException(java.util.concurrent.TimeoutException) UnavailableException(org.apache.cassandra.exceptions.UnavailableException) WriteTimeoutException(org.apache.cassandra.exceptions.WriteTimeoutException) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) TombstoneOverwhelmingException(org.apache.cassandra.db.filter.TombstoneOverwhelmingException) RequestFailureException(org.apache.cassandra.exceptions.RequestFailureException) IsBootstrappingException(org.apache.cassandra.exceptions.IsBootstrappingException) ReadFailureException(org.apache.cassandra.exceptions.ReadFailureException) ConsistencyLevel(org.apache.cassandra.db.ConsistencyLevel) ReplicaLayout(org.apache.cassandra.locator.ReplicaLayout) BatchlogCleanup(org.apache.cassandra.service.BatchlogResponseHandler.BatchlogCleanup) AbstractReplicationStrategy(org.apache.cassandra.locator.AbstractReplicationStrategy) Mutation(org.apache.cassandra.db.Mutation) CounterMutation(org.apache.cassandra.db.CounterMutation) IMutation(org.apache.cassandra.db.IMutation) UUID(java.util.UUID) HashSet(java.util.HashSet)

Example 17 with WriteTimeoutException

use of org.apache.cassandra.exceptions.WriteTimeoutException in project cassandra by apache.

the class StorageProxy method readWithPaxos.

private static PartitionIterator readWithPaxos(SinglePartitionReadCommand.Group group, ConsistencyLevel consistencyLevel, ClientState state, long queryStartNanoTime) throws InvalidRequestException, UnavailableException, ReadFailureException, ReadTimeoutException {
    assert state != null;
    if (group.queries.size() > 1)
        throw new InvalidRequestException("SERIAL/LOCAL_SERIAL consistency may only be requested for one partition at a time");
    long start = nanoTime();
    SinglePartitionReadCommand command = group.queries.get(0);
    TableMetadata metadata = command.metadata();
    DecoratedKey key = command.partitionKey();
    // calculate the blockFor before repair any paxos round to avoid RS being altered in between.
    int blockForRead = consistencyLevel.blockFor(Keyspace.open(metadata.keyspace).getReplicationStrategy());
    PartitionIterator result = null;
    try {
        final ConsistencyLevel consistencyForReplayCommitsOrFetch = consistencyLevel == ConsistencyLevel.LOCAL_SERIAL ? ConsistencyLevel.LOCAL_QUORUM : ConsistencyLevel.QUORUM;
        try {
            // Commit an empty update to make sure all in-progress updates that should be finished first is, _and_
            // that no other in-progress can get resurrected.
            Supplier<Pair<PartitionUpdate, RowIterator>> updateProposer = Paxos.getPaxosVariant() == Config.PaxosVariant.v1_without_linearizable_reads ? () -> null : () -> Pair.create(PartitionUpdate.emptyUpdate(metadata, key), null);
            // When replaying, we commit at quorum/local quorum, as we want to be sure the following read (done at
            // quorum/local_quorum) sees any replayed updates. Our own update is however empty, and those don't even
            // get committed due to an optimiation described in doPaxos/beingRepairAndPaxos, so the commit
            // consistency is irrelevant (we use ANY just to emphasis that we don't wait on our commit).
            doPaxos(metadata, key, consistencyLevel, consistencyForReplayCommitsOrFetch, ConsistencyLevel.ANY, start, casReadMetrics, updateProposer);
        } catch (WriteTimeoutException e) {
            throw new ReadTimeoutException(consistencyLevel, 0, blockForRead, false);
        } catch (WriteFailureException e) {
            throw new ReadFailureException(consistencyLevel, e.received, e.blockFor, false, e.failureReasonByEndpoint);
        }
        result = fetchRows(group.queries, consistencyForReplayCommitsOrFetch, queryStartNanoTime);
    } catch (UnavailableException e) {
        readMetrics.unavailables.mark();
        casReadMetrics.unavailables.mark();
        readMetricsForLevel(consistencyLevel).unavailables.mark();
        logRequestException(e, group.queries);
        throw e;
    } catch (ReadTimeoutException e) {
        readMetrics.timeouts.mark();
        casReadMetrics.timeouts.mark();
        readMetricsForLevel(consistencyLevel).timeouts.mark();
        logRequestException(e, group.queries);
        throw e;
    } catch (ReadAbortException e) {
        readMetrics.markAbort(e);
        casReadMetrics.markAbort(e);
        readMetricsForLevel(consistencyLevel).markAbort(e);
        throw e;
    } catch (ReadFailureException e) {
        readMetrics.failures.mark();
        casReadMetrics.failures.mark();
        readMetricsForLevel(consistencyLevel).failures.mark();
        throw e;
    } finally {
        long latency = nanoTime() - start;
        readMetrics.addNano(latency);
        casReadMetrics.addNano(latency);
        readMetricsForLevel(consistencyLevel).addNano(latency);
        Keyspace.open(metadata.keyspace).getColumnFamilyStore(metadata.name).metric.coordinatorReadLatency.update(latency, TimeUnit.NANOSECONDS);
    }
    return result;
}
Also used : TableMetadata(org.apache.cassandra.schema.TableMetadata) ReadFailureException(org.apache.cassandra.exceptions.ReadFailureException) ReadTimeoutException(org.apache.cassandra.exceptions.ReadTimeoutException) SinglePartitionReadCommand(org.apache.cassandra.db.SinglePartitionReadCommand) DecoratedKey(org.apache.cassandra.db.DecoratedKey) UnavailableException(org.apache.cassandra.exceptions.UnavailableException) ReadAbortException(org.apache.cassandra.exceptions.ReadAbortException) Hint(org.apache.cassandra.hints.Hint) ConsistencyLevel(org.apache.cassandra.db.ConsistencyLevel) CasWriteTimeoutException(org.apache.cassandra.exceptions.CasWriteTimeoutException) WriteTimeoutException(org.apache.cassandra.exceptions.WriteTimeoutException) WriteFailureException(org.apache.cassandra.exceptions.WriteFailureException) UnfilteredPartitionIterator(org.apache.cassandra.db.partitions.UnfilteredPartitionIterator) PartitionIterator(org.apache.cassandra.db.partitions.PartitionIterator) InvalidRequestException(org.apache.cassandra.exceptions.InvalidRequestException) Pair(org.apache.cassandra.utils.Pair)

Example 18 with WriteTimeoutException

use of org.apache.cassandra.exceptions.WriteTimeoutException in project cassandra by apache.

the class Keyspace method applyInternal.

/**
 * This method appends a row to the global CommitLog, then updates memtables and indexes.
 *
 * @param mutation       the row to write.  Must not be modified after calling apply, since commitlog append
 *                       may happen concurrently, depending on the CL Executor type.
 * @param makeDurable    if true, don't return unless write has been made durable
 * @param updateIndexes  false to disable index updates (used by CollationController "defragmenting")
 * @param isDroppable    true if this should throw WriteTimeoutException if it does not acquire lock within write_request_timeout
 * @param isDeferrable   true if caller is not waiting for future to complete, so that future may be deferred
 */
private Future<?> applyInternal(final Mutation mutation, final boolean makeDurable, boolean updateIndexes, boolean isDroppable, boolean isDeferrable, Promise<?> future) {
    if (TEST_FAIL_WRITES && metadata.name.equals(TEST_FAIL_WRITES_KS))
        throw new RuntimeException("Testing write failures");
    Lock[] locks = null;
    boolean requiresViewUpdate = updateIndexes && viewManager.updatesAffectView(Collections.singleton(mutation), false);
    if (requiresViewUpdate) {
        mutation.viewLockAcquireStart.compareAndSet(0L, currentTimeMillis());
        // the order of lock acquisition doesn't matter (from a deadlock perspective) because we only use tryLock()
        Collection<TableId> tableIds = mutation.getTableIds();
        Iterator<TableId> idIterator = tableIds.iterator();
        locks = new Lock[tableIds.size()];
        for (int i = 0; i < tableIds.size(); i++) {
            TableId tableId = idIterator.next();
            int lockKey = Objects.hash(mutation.key().getKey(), tableId);
            while (true) {
                Lock lock = null;
                if (TEST_FAIL_MV_LOCKS_COUNT == 0)
                    lock = ViewManager.acquireLockFor(lockKey);
                else
                    TEST_FAIL_MV_LOCKS_COUNT--;
                if (lock == null) {
                    // throw WTE only if request is droppable
                    if (isDroppable && (approxTime.isAfter(mutation.approxCreatedAtNanos + DatabaseDescriptor.getWriteRpcTimeout(NANOSECONDS)))) {
                        for (int j = 0; j < i; j++) locks[j].unlock();
                        if (logger.isTraceEnabled())
                            logger.trace("Could not acquire lock for {} and table {}", ByteBufferUtil.bytesToHex(mutation.key().getKey()), columnFamilyStores.get(tableId).name);
                        Tracing.trace("Could not acquire MV lock");
                        if (future != null) {
                            future.tryFailure(new WriteTimeoutException(WriteType.VIEW, ConsistencyLevel.LOCAL_ONE, 0, 1));
                            return future;
                        } else
                            throw new WriteTimeoutException(WriteType.VIEW, ConsistencyLevel.LOCAL_ONE, 0, 1);
                    } else if (isDeferrable) {
                        for (int j = 0; j < i; j++) locks[j].unlock();
                        // This view update can't happen right now. so rather than keep this thread busy
                        // we will re-apply ourself to the queue and try again later
                        Stage.MUTATION.execute(() -> applyInternal(mutation, makeDurable, true, isDroppable, true, future));
                        return future;
                    } else {
                        // being blocked by waiting for futures which will never be processed as all workers are blocked
                        try {
                            // Wait a little bit before retrying to lock
                            Thread.sleep(10);
                        } catch (InterruptedException e) {
                            throw new UncheckedInterruptedException(e);
                        }
                        continue;
                    }
                } else {
                    locks[i] = lock;
                }
                break;
            }
        }
        long acquireTime = currentTimeMillis() - mutation.viewLockAcquireStart.get();
        // Bulk non-droppable operations (e.g. commitlog replay, hint delivery) are not measured
        if (isDroppable) {
            for (TableId tableId : tableIds) columnFamilyStores.get(tableId).metric.viewLockAcquireTime.update(acquireTime, MILLISECONDS);
        }
    }
    int nowInSec = FBUtilities.nowInSeconds();
    try (WriteContext ctx = getWriteHandler().beginWrite(mutation, makeDurable)) {
        for (PartitionUpdate upd : mutation.getPartitionUpdates()) {
            ColumnFamilyStore cfs = columnFamilyStores.get(upd.metadata().id);
            if (cfs == null) {
                logger.error("Attempting to mutate non-existant table {} ({}.{})", upd.metadata().id, upd.metadata().keyspace, upd.metadata().name);
                continue;
            }
            AtomicLong baseComplete = new AtomicLong(Long.MAX_VALUE);
            if (requiresViewUpdate) {
                try {
                    Tracing.trace("Creating materialized view mutations from base table replica");
                    viewManager.forTable(upd.metadata().id).pushViewReplicaUpdates(upd, makeDurable, baseComplete);
                } catch (Throwable t) {
                    JVMStabilityInspector.inspectThrowable(t);
                    logger.error(String.format("Unknown exception caught while attempting to update MaterializedView! %s", upd.metadata().toString()), t);
                    throw t;
                }
            }
            UpdateTransaction indexTransaction = updateIndexes ? cfs.indexManager.newUpdateTransaction(upd, ctx, nowInSec) : UpdateTransaction.NO_OP;
            cfs.getWriteHandler().write(upd, ctx, indexTransaction);
            if (requiresViewUpdate)
                baseComplete.set(currentTimeMillis());
        }
        if (future != null) {
            future.trySuccess(null);
        }
        return future;
    } finally {
        if (locks != null) {
            for (Lock lock : locks) if (lock != null)
                lock.unlock();
        }
    }
}
Also used : TableId(org.apache.cassandra.schema.TableId) UpdateTransaction(org.apache.cassandra.index.transactions.UpdateTransaction) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) Lock(java.util.concurrent.locks.Lock) WriteTimeoutException(org.apache.cassandra.exceptions.WriteTimeoutException) AtomicLong(java.util.concurrent.atomic.AtomicLong) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) PartitionUpdate(org.apache.cassandra.db.partitions.PartitionUpdate)

Aggregations

WriteTimeoutException (org.apache.cassandra.exceptions.WriteTimeoutException)18 CasWriteTimeoutException (org.apache.cassandra.exceptions.CasWriteTimeoutException)10 WriteFailureException (org.apache.cassandra.exceptions.WriteFailureException)7 UnavailableException (org.apache.cassandra.exceptions.UnavailableException)6 ConsistencyLevel (org.apache.cassandra.db.ConsistencyLevel)5 UncheckedInterruptedException (org.apache.cassandra.utils.concurrent.UncheckedInterruptedException)5 UUID (java.util.UUID)4 CasWriteUnknownResultException (org.apache.cassandra.exceptions.CasWriteUnknownResultException)4 InvalidRequestException (org.apache.cassandra.exceptions.InvalidRequestException)4 OverloadedException (org.apache.cassandra.exceptions.OverloadedException)4 ReadAbortException (org.apache.cassandra.exceptions.ReadAbortException)4 ReadFailureException (org.apache.cassandra.exceptions.ReadFailureException)4 ReadTimeoutException (org.apache.cassandra.exceptions.ReadTimeoutException)4 Hint (org.apache.cassandra.hints.Hint)4 Test (org.junit.Test)4 ArrayList (java.util.ArrayList)3 TimeoutException (java.util.concurrent.TimeoutException)3 Lock (java.util.concurrent.locks.Lock)3 CounterMutation (org.apache.cassandra.db.CounterMutation)3 IMutation (org.apache.cassandra.db.IMutation)3