Search in sources :

Example 1 with PartitionUpdate

use of org.apache.cassandra.db.partitions.PartitionUpdate in project cassandra by apache.

the class BatchStatement method verifyBatchSize.

/**
     * Checks batch size to ensure threshold is met. If not, a warning is logged.
     *
     * @param mutations - the batch mutations.
     */
private static void verifyBatchSize(Collection<? extends IMutation> mutations) throws InvalidRequestException {
    // We only warn for batch spanning multiple mutations (#10876)
    if (mutations.size() <= 1)
        return;
    long warnThreshold = DatabaseDescriptor.getBatchSizeWarnThreshold();
    long size = IMutation.dataSize(mutations);
    if (size > warnThreshold) {
        Set<String> tableNames = new HashSet<>();
        for (IMutation mutation : mutations) {
            for (PartitionUpdate update : mutation.getPartitionUpdates()) tableNames.add(update.metadata().toString());
        }
        long failThreshold = DatabaseDescriptor.getBatchSizeFailThreshold();
        String format = "Batch for {} is of size {}, exceeding specified threshold of {} by {}.{}";
        if (size > failThreshold) {
            Tracing.trace(format, tableNames, FBUtilities.prettyPrintMemory(size), FBUtilities.prettyPrintMemory(failThreshold), FBUtilities.prettyPrintMemory(size - failThreshold), " (see batch_size_fail_threshold_in_kb)");
            logger.error(format, tableNames, FBUtilities.prettyPrintMemory(size), FBUtilities.prettyPrintMemory(failThreshold), FBUtilities.prettyPrintMemory(size - failThreshold), " (see batch_size_fail_threshold_in_kb)");
            throw new InvalidRequestException("Batch too large");
        } else if (logger.isWarnEnabled()) {
            logger.warn(format, tableNames, FBUtilities.prettyPrintMemory(size), FBUtilities.prettyPrintMemory(warnThreshold), FBUtilities.prettyPrintMemory(size - warnThreshold), "");
        }
        ClientWarn.instance.warn(MessageFormatter.arrayFormat(format, new Object[] { tableNames, size, warnThreshold, size - warnThreshold, "" }).getMessage());
    }
}
Also used : PartitionUpdate(org.apache.cassandra.db.partitions.PartitionUpdate)

Example 2 with PartitionUpdate

use of org.apache.cassandra.db.partitions.PartitionUpdate in project cassandra by apache.

the class CQL3CasRequest method makeUpdates.

public PartitionUpdate makeUpdates(FilteredPartition current) throws InvalidRequestException {
    PartitionUpdate update = new PartitionUpdate(metadata, key, updatedColumns(), conditions.size());
    for (RowUpdate upd : updates) upd.applyUpdates(current, update);
    Keyspace.openAndGetStore(metadata).indexManager.validate(update);
    return update;
}
Also used : PartitionUpdate(org.apache.cassandra.db.partitions.PartitionUpdate)

Example 3 with PartitionUpdate

use of org.apache.cassandra.db.partitions.PartitionUpdate in project cassandra by apache.

the class CommitLogReader method readMutation.

/**
     * Deserializes and passes a Mutation to the ICommitLogReadHandler requested
     *
     * @param handler Handler that will take action based on deserialized Mutations
     * @param inputBuffer raw byte array w/Mutation data
     * @param size deserialized size of mutation
     * @param minPosition We need to suppress replay of mutations that are before the required minPosition
     * @param entryLocation filePointer offset of mutation within CommitLogSegment
     * @param desc CommitLogDescriptor being worked on
     */
@VisibleForTesting
protected void readMutation(CommitLogReadHandler handler, byte[] inputBuffer, int size, CommitLogPosition minPosition, final int entryLocation, final CommitLogDescriptor desc) throws IOException {
    // For now, we need to go through the motions of deserializing the mutation to determine its size and move
    // the file pointer forward accordingly, even if we're behind the requested minPosition within this SyncSegment.
    boolean shouldReplay = entryLocation > minPosition.position;
    final Mutation mutation;
    try (RebufferingInputStream bufIn = new DataInputBuffer(inputBuffer, 0, size)) {
        mutation = Mutation.serializer.deserialize(bufIn, desc.getMessagingVersion(), SerializationHelper.Flag.LOCAL);
        // doublecheck that what we read is still] valid for the current schema
        for (PartitionUpdate upd : mutation.getPartitionUpdates()) upd.validate();
    } catch (UnknownTableException ex) {
        if (ex.id == null)
            return;
        AtomicInteger i = invalidMutations.get(ex.id);
        if (i == null) {
            i = new AtomicInteger(1);
            invalidMutations.put(ex.id, i);
        } else
            i.incrementAndGet();
        return;
    } catch (Throwable t) {
        JVMStabilityInspector.inspectThrowable(t);
        File f = File.createTempFile("mutation", "dat");
        try (DataOutputStream out = new DataOutputStream(new FileOutputStream(f))) {
            out.write(inputBuffer, 0, size);
        }
        // Checksum passed so this error can't be permissible.
        handler.handleUnrecoverableError(new CommitLogReadException(String.format("Unexpected error deserializing mutation; saved to %s.  " + "This may be caused by replaying a mutation against a table with the same name but incompatible schema.  " + "Exception follows: %s", f.getAbsolutePath(), t), CommitLogReadErrorReason.MUTATION_ERROR, false));
        return;
    }
    if (logger.isTraceEnabled())
        logger.trace("Read mutation for {}.{}: {}", mutation.getKeyspaceName(), mutation.key(), "{" + StringUtils.join(mutation.getPartitionUpdates().iterator(), ", ") + "}");
    if (shouldReplay)
        handler.handleMutation(mutation, size, entryLocation, desc);
}
Also used : UnknownTableException(org.apache.cassandra.exceptions.UnknownTableException) DataInputBuffer(org.apache.cassandra.io.util.DataInputBuffer) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) CommitLogReadException(org.apache.cassandra.db.commitlog.CommitLogReadHandler.CommitLogReadException) RebufferingInputStream(org.apache.cassandra.io.util.RebufferingInputStream) Mutation(org.apache.cassandra.db.Mutation) PartitionUpdate(org.apache.cassandra.db.partitions.PartitionUpdate) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 4 with PartitionUpdate

use of org.apache.cassandra.db.partitions.PartitionUpdate in project cassandra by apache.

the class Keyspace method applyInternal.

/**
     * This method appends a row to the global CommitLog, then updates memtables and indexes.
     *
     * @param mutation       the row to write.  Must not be modified after calling apply, since commitlog append
     *                       may happen concurrently, depending on the CL Executor type.
     * @param writeCommitLog false to disable commitlog append entirely
     * @param updateIndexes  false to disable index updates (used by CollationController "defragmenting")
     * @param isDroppable    true if this should throw WriteTimeoutException if it does not acquire lock within write_request_timeout_in_ms
     * @param isDeferrable   true if caller is not waiting for future to complete, so that future may be deferred
     */
private CompletableFuture<?> applyInternal(final Mutation mutation, final boolean writeCommitLog, boolean updateIndexes, boolean isDroppable, boolean isDeferrable, CompletableFuture<?> future) {
    if (TEST_FAIL_WRITES && metadata.name.equals(TEST_FAIL_WRITES_KS))
        throw new RuntimeException("Testing write failures");
    Lock[] locks = null;
    boolean requiresViewUpdate = updateIndexes && viewManager.updatesAffectView(Collections.singleton(mutation), false);
    if (requiresViewUpdate) {
        mutation.viewLockAcquireStart.compareAndSet(0L, System.currentTimeMillis());
        // the order of lock acquisition doesn't matter (from a deadlock perspective) because we only use tryLock()
        Collection<TableId> tableIds = mutation.getTableIds();
        Iterator<TableId> idIterator = tableIds.iterator();
        locks = new Lock[tableIds.size()];
        for (int i = 0; i < tableIds.size(); i++) {
            TableId tableId = idIterator.next();
            int lockKey = Objects.hash(mutation.key().getKey(), tableId);
            while (true) {
                Lock lock = null;
                if (TEST_FAIL_MV_LOCKS_COUNT == 0)
                    lock = ViewManager.acquireLockFor(lockKey);
                else
                    TEST_FAIL_MV_LOCKS_COUNT--;
                if (lock == null) {
                    //throw WTE only if request is droppable
                    if (isDroppable && (System.currentTimeMillis() - mutation.createdAt) > DatabaseDescriptor.getWriteRpcTimeout()) {
                        for (int j = 0; j < i; j++) locks[j].unlock();
                        logger.trace("Could not acquire lock for {} and table {}", ByteBufferUtil.bytesToHex(mutation.key().getKey()), columnFamilyStores.get(tableId).name);
                        Tracing.trace("Could not acquire MV lock");
                        if (future != null) {
                            future.completeExceptionally(new WriteTimeoutException(WriteType.VIEW, ConsistencyLevel.LOCAL_ONE, 0, 1));
                            return future;
                        } else
                            throw new WriteTimeoutException(WriteType.VIEW, ConsistencyLevel.LOCAL_ONE, 0, 1);
                    } else if (isDeferrable) {
                        for (int j = 0; j < i; j++) locks[j].unlock();
                        // This view update can't happen right now. so rather than keep this thread busy
                        // we will re-apply ourself to the queue and try again later
                        final CompletableFuture<?> mark = future;
                        StageManager.getStage(Stage.MUTATION).execute(() -> applyInternal(mutation, writeCommitLog, true, isDroppable, true, mark));
                        return future;
                    } else {
                        // being blocked by waiting for futures which will never be processed as all workers are blocked
                        try {
                            // Wait a little bit before retrying to lock
                            Thread.sleep(10);
                        } catch (InterruptedException e) {
                        // Just continue
                        }
                        continue;
                    }
                } else {
                    locks[i] = lock;
                }
                break;
            }
        }
        long acquireTime = System.currentTimeMillis() - mutation.viewLockAcquireStart.get();
        // Bulk non-droppable operations (e.g. commitlog replay, hint delivery) are not measured
        if (isDroppable) {
            for (TableId tableId : tableIds) columnFamilyStores.get(tableId).metric.viewLockAcquireTime.update(acquireTime, TimeUnit.MILLISECONDS);
        }
    }
    int nowInSec = FBUtilities.nowInSeconds();
    try (OpOrder.Group opGroup = writeOrder.start()) {
        // write the mutation to the commitlog and memtables
        CommitLogPosition commitLogPosition = null;
        if (writeCommitLog) {
            Tracing.trace("Appending to commitlog");
            commitLogPosition = CommitLog.instance.add(mutation);
        }
        for (PartitionUpdate upd : mutation.getPartitionUpdates()) {
            ColumnFamilyStore cfs = columnFamilyStores.get(upd.metadata().id);
            if (cfs == null) {
                logger.error("Attempting to mutate non-existant table {} ({}.{})", upd.metadata().id, upd.metadata().keyspace, upd.metadata().name);
                continue;
            }
            AtomicLong baseComplete = new AtomicLong(Long.MAX_VALUE);
            if (requiresViewUpdate) {
                try {
                    Tracing.trace("Creating materialized view mutations from base table replica");
                    viewManager.forTable(upd.metadata().id).pushViewReplicaUpdates(upd, writeCommitLog, baseComplete);
                } catch (Throwable t) {
                    JVMStabilityInspector.inspectThrowable(t);
                    logger.error(String.format("Unknown exception caught while attempting to update MaterializedView! %s", upd.metadata().toString()), t);
                    throw t;
                }
            }
            Tracing.trace("Adding to {} memtable", upd.metadata().name);
            UpdateTransaction indexTransaction = updateIndexes ? cfs.indexManager.newUpdateTransaction(upd, opGroup, nowInSec) : UpdateTransaction.NO_OP;
            cfs.apply(upd, indexTransaction, opGroup, commitLogPosition);
            if (requiresViewUpdate)
                baseComplete.set(System.currentTimeMillis());
        }
        if (future != null) {
            future.complete(null);
        }
        return future;
    } finally {
        if (locks != null) {
            for (Lock lock : locks) if (lock != null)
                lock.unlock();
        }
    }
}
Also used : TableId(org.apache.cassandra.schema.TableId) UpdateTransaction(org.apache.cassandra.index.transactions.UpdateTransaction) CommitLogPosition(org.apache.cassandra.db.commitlog.CommitLogPosition) Lock(java.util.concurrent.locks.Lock) WriteTimeoutException(org.apache.cassandra.exceptions.WriteTimeoutException) AtomicLong(java.util.concurrent.atomic.AtomicLong) OpOrder(org.apache.cassandra.utils.concurrent.OpOrder) PartitionUpdate(org.apache.cassandra.db.partitions.PartitionUpdate)

Example 5 with PartitionUpdate

use of org.apache.cassandra.db.partitions.PartitionUpdate in project cassandra by apache.

the class HintVerbHandler method doVerb.

public void doVerb(MessageIn<HintMessage> message, int id) {
    UUID hostId = message.payload.hostId;
    Hint hint = message.payload.hint;
    InetAddress address = StorageService.instance.getEndpointForHostId(hostId);
    // is schema agreement between the sender and the receiver.
    if (hint == null) {
        logger.trace("Failed to decode and apply a hint for {}: {} - table with id {} is unknown", address, hostId, message.payload.unknownTableID);
        reply(id, message.from);
        return;
    }
    // We must perform validation before applying the hint, and there is no other place to do it other than here.
    try {
        hint.mutation.getPartitionUpdates().forEach(PartitionUpdate::validate);
    } catch (MarshalException e) {
        logger.warn("Failed to validate a hint for {}: {} - skipped", address, hostId);
        reply(id, message.from);
        return;
    }
    if (!hostId.equals(StorageService.instance.getLocalHostUUID())) {
        // the node is not the final destination of the hint (must have gotten it from a decommissioning node),
        // so just store it locally, to be delivered later.
        HintsService.instance.write(hostId, hint);
        reply(id, message.from);
    } else if (!StorageProxy.instance.appliesLocally(hint.mutation)) {
        // the topology has changed, and we are no longer a replica of the mutation - since we don't know which node(s)
        // it has been handed over to, re-address the hint to all replicas; see CASSANDRA-5902.
        HintsService.instance.writeForAllReplicas(hint);
        reply(id, message.from);
    } else {
        // the common path - the node is both the destination and a valid replica for the hint.
        hint.applyFuture().thenAccept(o -> reply(id, message.from)).exceptionally(e -> {
            logger.debug("Failed to apply hint", e);
            return null;
        });
    }
}
Also used : InetAddress(java.net.InetAddress) MessagingService(org.apache.cassandra.net.MessagingService) PartitionUpdate(org.apache.cassandra.db.partitions.PartitionUpdate) Logger(org.slf4j.Logger) StorageProxy(org.apache.cassandra.service.StorageProxy) LoggerFactory(org.slf4j.LoggerFactory) StorageService(org.apache.cassandra.service.StorageService) MessageIn(org.apache.cassandra.net.MessageIn) UUID(java.util.UUID) IVerbHandler(org.apache.cassandra.net.IVerbHandler) MarshalException(org.apache.cassandra.serializers.MarshalException) MarshalException(org.apache.cassandra.serializers.MarshalException) UUID(java.util.UUID) InetAddress(java.net.InetAddress) PartitionUpdate(org.apache.cassandra.db.partitions.PartitionUpdate)

Aggregations

PartitionUpdate (org.apache.cassandra.db.partitions.PartitionUpdate)40 Test (org.junit.Test)14 TableMetadata (org.apache.cassandra.schema.TableMetadata)7 ColumnIdentifier (org.apache.cassandra.cql3.ColumnIdentifier)3 Mutation (org.apache.cassandra.db.Mutation)3 ColumnMetadata (org.apache.cassandra.schema.ColumnMetadata)3 Commit (org.apache.cassandra.service.paxos.Commit)3 UntypedResultSet (org.apache.cassandra.cql3.UntypedResultSet)2 InvalidRequestException (org.apache.cassandra.exceptions.InvalidRequestException)2 TableId (org.apache.cassandra.schema.TableId)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 InetAddress (java.net.InetAddress)1 ByteBuffer (java.nio.ByteBuffer)1 Collections.emptyMap (java.util.Collections.emptyMap)1 Collections.singletonMap (java.util.Collections.singletonMap)1 HashSet (java.util.HashSet)1 UUID (java.util.UUID)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1