Search in sources :

Example 1 with CommitLogPosition

use of org.apache.cassandra.db.commitlog.CommitLogPosition in project cassandra by apache.

the class Keyspace method applyInternal.

/**
     * This method appends a row to the global CommitLog, then updates memtables and indexes.
     *
     * @param mutation       the row to write.  Must not be modified after calling apply, since commitlog append
     *                       may happen concurrently, depending on the CL Executor type.
     * @param writeCommitLog false to disable commitlog append entirely
     * @param updateIndexes  false to disable index updates (used by CollationController "defragmenting")
     * @param isDroppable    true if this should throw WriteTimeoutException if it does not acquire lock within write_request_timeout_in_ms
     * @param isDeferrable   true if caller is not waiting for future to complete, so that future may be deferred
     */
private CompletableFuture<?> applyInternal(final Mutation mutation, final boolean writeCommitLog, boolean updateIndexes, boolean isDroppable, boolean isDeferrable, CompletableFuture<?> future) {
    if (TEST_FAIL_WRITES && metadata.name.equals(TEST_FAIL_WRITES_KS))
        throw new RuntimeException("Testing write failures");
    Lock[] locks = null;
    boolean requiresViewUpdate = updateIndexes && viewManager.updatesAffectView(Collections.singleton(mutation), false);
    if (requiresViewUpdate) {
        mutation.viewLockAcquireStart.compareAndSet(0L, System.currentTimeMillis());
        // the order of lock acquisition doesn't matter (from a deadlock perspective) because we only use tryLock()
        Collection<TableId> tableIds = mutation.getTableIds();
        Iterator<TableId> idIterator = tableIds.iterator();
        locks = new Lock[tableIds.size()];
        for (int i = 0; i < tableIds.size(); i++) {
            TableId tableId = idIterator.next();
            int lockKey = Objects.hash(mutation.key().getKey(), tableId);
            while (true) {
                Lock lock = null;
                if (TEST_FAIL_MV_LOCKS_COUNT == 0)
                    lock = ViewManager.acquireLockFor(lockKey);
                else
                    TEST_FAIL_MV_LOCKS_COUNT--;
                if (lock == null) {
                    //throw WTE only if request is droppable
                    if (isDroppable && (System.currentTimeMillis() - mutation.createdAt) > DatabaseDescriptor.getWriteRpcTimeout()) {
                        for (int j = 0; j < i; j++) locks[j].unlock();
                        logger.trace("Could not acquire lock for {} and table {}", ByteBufferUtil.bytesToHex(mutation.key().getKey()), columnFamilyStores.get(tableId).name);
                        Tracing.trace("Could not acquire MV lock");
                        if (future != null) {
                            future.completeExceptionally(new WriteTimeoutException(WriteType.VIEW, ConsistencyLevel.LOCAL_ONE, 0, 1));
                            return future;
                        } else
                            throw new WriteTimeoutException(WriteType.VIEW, ConsistencyLevel.LOCAL_ONE, 0, 1);
                    } else if (isDeferrable) {
                        for (int j = 0; j < i; j++) locks[j].unlock();
                        // This view update can't happen right now. so rather than keep this thread busy
                        // we will re-apply ourself to the queue and try again later
                        final CompletableFuture<?> mark = future;
                        StageManager.getStage(Stage.MUTATION).execute(() -> applyInternal(mutation, writeCommitLog, true, isDroppable, true, mark));
                        return future;
                    } else {
                        // being blocked by waiting for futures which will never be processed as all workers are blocked
                        try {
                            // Wait a little bit before retrying to lock
                            Thread.sleep(10);
                        } catch (InterruptedException e) {
                        // Just continue
                        }
                        continue;
                    }
                } else {
                    locks[i] = lock;
                }
                break;
            }
        }
        long acquireTime = System.currentTimeMillis() - mutation.viewLockAcquireStart.get();
        // Bulk non-droppable operations (e.g. commitlog replay, hint delivery) are not measured
        if (isDroppable) {
            for (TableId tableId : tableIds) columnFamilyStores.get(tableId).metric.viewLockAcquireTime.update(acquireTime, TimeUnit.MILLISECONDS);
        }
    }
    int nowInSec = FBUtilities.nowInSeconds();
    try (OpOrder.Group opGroup = writeOrder.start()) {
        // write the mutation to the commitlog and memtables
        CommitLogPosition commitLogPosition = null;
        if (writeCommitLog) {
            Tracing.trace("Appending to commitlog");
            commitLogPosition = CommitLog.instance.add(mutation);
        }
        for (PartitionUpdate upd : mutation.getPartitionUpdates()) {
            ColumnFamilyStore cfs = columnFamilyStores.get(upd.metadata().id);
            if (cfs == null) {
                logger.error("Attempting to mutate non-existant table {} ({}.{})", upd.metadata().id, upd.metadata().keyspace, upd.metadata().name);
                continue;
            }
            AtomicLong baseComplete = new AtomicLong(Long.MAX_VALUE);
            if (requiresViewUpdate) {
                try {
                    Tracing.trace("Creating materialized view mutations from base table replica");
                    viewManager.forTable(upd.metadata().id).pushViewReplicaUpdates(upd, writeCommitLog, baseComplete);
                } catch (Throwable t) {
                    JVMStabilityInspector.inspectThrowable(t);
                    logger.error(String.format("Unknown exception caught while attempting to update MaterializedView! %s", upd.metadata().toString()), t);
                    throw t;
                }
            }
            Tracing.trace("Adding to {} memtable", upd.metadata().name);
            UpdateTransaction indexTransaction = updateIndexes ? cfs.indexManager.newUpdateTransaction(upd, opGroup, nowInSec) : UpdateTransaction.NO_OP;
            cfs.apply(upd, indexTransaction, opGroup, commitLogPosition);
            if (requiresViewUpdate)
                baseComplete.set(System.currentTimeMillis());
        }
        if (future != null) {
            future.complete(null);
        }
        return future;
    } finally {
        if (locks != null) {
            for (Lock lock : locks) if (lock != null)
                lock.unlock();
        }
    }
}
Also used : TableId(org.apache.cassandra.schema.TableId) UpdateTransaction(org.apache.cassandra.index.transactions.UpdateTransaction) CommitLogPosition(org.apache.cassandra.db.commitlog.CommitLogPosition) Lock(java.util.concurrent.locks.Lock) WriteTimeoutException(org.apache.cassandra.exceptions.WriteTimeoutException) AtomicLong(java.util.concurrent.atomic.AtomicLong) OpOrder(org.apache.cassandra.utils.concurrent.OpOrder) PartitionUpdate(org.apache.cassandra.db.partitions.PartitionUpdate)

Example 2 with CommitLogPosition

use of org.apache.cassandra.db.commitlog.CommitLogPosition in project cassandra by apache.

the class ColumnFamilyStore method truncateBlocking.

/**
 * Truncate deletes the entire column family's data with no expensive tombstone creation
 * @param noSnapshot if {@code true} no snapshot will be taken
 */
private void truncateBlocking(boolean noSnapshot) {
    // We have two goals here:
    // - truncate should delete everything written before truncate was invoked
    // - but not delete anything that isn't part of the snapshot we create.
    // We accomplish this by first flushing manually, then snapshotting, and
    // recording the timestamp IN BETWEEN those actions. Any sstables created
    // with this timestamp or greater time, will not be marked for delete.
    // 
    // Bonus complication: since we store commit log segment position in sstable metadata,
    // truncating those sstables means we will replay any CL segments from the
    // beginning if we restart before they [the CL segments] are discarded for
    // normal reasons post-truncate.  To prevent this, we store truncation
    // position in the System keyspace.
    logger.info("Truncating {}.{}", keyspace.getName(), name);
    viewManager.stopBuild();
    final long truncatedAt;
    final CommitLogPosition replayAfter;
    if (!noSnapshot && (keyspace.getMetadata().params.durableWrites || DatabaseDescriptor.isAutoSnapshot())) {
        replayAfter = forceBlockingFlush();
        viewManager.forceBlockingFlush();
    } else {
        // just nuke the memtable data w/o writing to disk first
        viewManager.dumpMemtables();
        try {
            replayAfter = dumpMemtable().get();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
    long now = currentTimeMillis();
    // make sure none of our sstables are somehow in the future (clock drift, perhaps)
    for (ColumnFamilyStore cfs : concatWithIndexes()) for (SSTableReader sstable : cfs.getLiveSSTables()) now = Math.max(now, sstable.maxDataAge);
    truncatedAt = now;
    Runnable truncateRunnable = new Runnable() {

        public void run() {
            logger.info("Truncating {}.{} with truncatedAt={}", keyspace.getName(), getTableName(), truncatedAt);
            // since truncation can happen at different times on different nodes, we need to make sure
            // that any repairs are aborted, otherwise we might clear the data on one node and then
            // stream in data that is actually supposed to have been deleted
            ActiveRepairService.instance.abort((prs) -> prs.getTableIds().contains(metadata.id), "Stopping parent sessions {} due to truncation of tableId=" + metadata.id);
            data.notifyTruncated(truncatedAt);
            if (!noSnapshot && DatabaseDescriptor.isAutoSnapshot())
                snapshot(Keyspace.getTimestampedSnapshotNameWithPrefix(name, SNAPSHOT_TRUNCATE_PREFIX));
            discardSSTables(truncatedAt);
            indexManager.truncateAllIndexesBlocking(truncatedAt);
            viewManager.truncateBlocking(replayAfter, truncatedAt);
            SystemKeyspace.saveTruncationRecord(ColumnFamilyStore.this, truncatedAt, replayAfter);
            logger.trace("cleaning out row cache");
            invalidateCaches();
        }
    };
    runWithCompactionsDisabled(FutureTask.callable(truncateRunnable), true, true);
    viewManager.build();
    logger.info("Truncate of {}.{} is complete", keyspace.getName(), name);
}
Also used : CommitLogPosition(org.apache.cassandra.db.commitlog.CommitLogPosition) TimeoutException(java.util.concurrent.TimeoutException) InvocationTargetException(java.lang.reflect.InvocationTargetException) UncheckedInterruptedException(org.apache.cassandra.utils.concurrent.UncheckedInterruptedException) ConfigurationException(org.apache.cassandra.exceptions.ConfigurationException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) StartupException(org.apache.cassandra.exceptions.StartupException)

Example 3 with CommitLogPosition

use of org.apache.cassandra.db.commitlog.CommitLogPosition in project cassandra by apache.

the class ColumnFamilyStore method setCommitLogUpperBound.

// atomically set the upper bound for the commit log
private static void setCommitLogUpperBound(AtomicReference<CommitLogPosition> commitLogUpperBound) {
    // we attempt to set the holder to the current commit log context. at the same time all writes to the memtables are
    // also maintaining this value, so if somebody sneaks ahead of us somehow (should be rare) we simply retry,
    // so that we know all operations prior to the position have not reached it yet
    CommitLogPosition lastReplayPosition;
    while (true) {
        lastReplayPosition = new Memtable.LastCommitLogPosition((CommitLog.instance.getCurrentPosition()));
        CommitLogPosition currentLast = commitLogUpperBound.get();
        if ((currentLast == null || currentLast.compareTo(lastReplayPosition) <= 0) && commitLogUpperBound.compareAndSet(currentLast, lastReplayPosition))
            break;
    }
}
Also used : CommitLogPosition(org.apache.cassandra.db.commitlog.CommitLogPosition)

Example 4 with CommitLogPosition

use of org.apache.cassandra.db.commitlog.CommitLogPosition in project cassandra by apache.

the class MetadataSerializerTest method constructMetadata.

public Map<MetadataType, MetadataComponent> constructMetadata() {
    CommitLogPosition club = new CommitLogPosition(11L, 12);
    CommitLogPosition cllb = new CommitLogPosition(9L, 12);
    TableMetadata cfm = SchemaLoader.standardCFMD("ks1", "cf1").build();
    MetadataCollector collector = new MetadataCollector(cfm.comparator).commitLogIntervals(new IntervalSet<>(cllb, club));
    String partitioner = RandomPartitioner.class.getCanonicalName();
    double bfFpChance = 0.1;
    return collector.finalizeMetadata(partitioner, bfFpChance, 0, null, false, SerializationHeader.make(cfm, Collections.emptyList()));
}
Also used : TableMetadata(org.apache.cassandra.schema.TableMetadata) CommitLogPosition(org.apache.cassandra.db.commitlog.CommitLogPosition)

Example 5 with CommitLogPosition

use of org.apache.cassandra.db.commitlog.CommitLogPosition in project cassandra by apache.

the class ColumnFamilyStore method truncateBlocking.

/**
     * Truncate deletes the entire column family's data with no expensive tombstone creation
     */
public void truncateBlocking() {
    // We have two goals here:
    // - truncate should delete everything written before truncate was invoked
    // - but not delete anything that isn't part of the snapshot we create.
    // We accomplish this by first flushing manually, then snapshotting, and
    // recording the timestamp IN BETWEEN those actions. Any sstables created
    // with this timestamp or greater time, will not be marked for delete.
    //
    // Bonus complication: since we store commit log segment position in sstable metadata,
    // truncating those sstables means we will replay any CL segments from the
    // beginning if we restart before they [the CL segments] are discarded for
    // normal reasons post-truncate.  To prevent this, we store truncation
    // position in the System keyspace.
    logger.info("Truncating {}.{}", keyspace.getName(), name);
    final long truncatedAt;
    final CommitLogPosition replayAfter;
    if (keyspace.getMetadata().params.durableWrites || DatabaseDescriptor.isAutoSnapshot()) {
        replayAfter = forceBlockingFlush();
        viewManager.forceBlockingFlush();
    } else {
        // just nuke the memtable data w/o writing to disk first
        viewManager.dumpMemtables();
        try {
            replayAfter = dumpMemtable().get();
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
    long now = System.currentTimeMillis();
    // make sure none of our sstables are somehow in the future (clock drift, perhaps)
    for (ColumnFamilyStore cfs : concatWithIndexes()) for (SSTableReader sstable : cfs.getLiveSSTables()) now = Math.max(now, sstable.maxDataAge);
    truncatedAt = now;
    Runnable truncateRunnable = new Runnable() {

        public void run() {
            logger.debug("Discarding sstable data for truncated CF + indexes");
            data.notifyTruncated(truncatedAt);
            if (DatabaseDescriptor.isAutoSnapshot())
                snapshot(Keyspace.getTimestampedSnapshotNameWithPrefix(name, SNAPSHOT_TRUNCATE_PREFIX));
            discardSSTables(truncatedAt);
            indexManager.truncateAllIndexesBlocking(truncatedAt);
            viewManager.truncateBlocking(replayAfter, truncatedAt);
            SystemKeyspace.saveTruncationRecord(ColumnFamilyStore.this, truncatedAt, replayAfter);
            logger.trace("cleaning out row cache");
            invalidateCaches();
        }
    };
    runWithCompactionsDisabled(Executors.callable(truncateRunnable), true, true);
    logger.info("Truncate of {}.{} is complete", keyspace.getName(), name);
}
Also used : CommitLogPosition(org.apache.cassandra.db.commitlog.CommitLogPosition) InvocationTargetException(java.lang.reflect.InvocationTargetException) ConfigurationException(org.apache.cassandra.exceptions.ConfigurationException) IOException(java.io.IOException) StartupException(org.apache.cassandra.exceptions.StartupException)

Aggregations

CommitLogPosition (org.apache.cassandra.db.commitlog.CommitLogPosition)7 IOException (java.io.IOException)2 InvocationTargetException (java.lang.reflect.InvocationTargetException)2 ConfigurationException (org.apache.cassandra.exceptions.ConfigurationException)2 StartupException (org.apache.cassandra.exceptions.StartupException)2 OpOrder (org.apache.cassandra.utils.concurrent.OpOrder)2 ExecutionException (java.util.concurrent.ExecutionException)1 TimeoutException (java.util.concurrent.TimeoutException)1 AtomicLong (java.util.concurrent.atomic.AtomicLong)1 Lock (java.util.concurrent.locks.Lock)1 PartitionUpdate (org.apache.cassandra.db.partitions.PartitionUpdate)1 WriteTimeoutException (org.apache.cassandra.exceptions.WriteTimeoutException)1 UpdateTransaction (org.apache.cassandra.index.transactions.UpdateTransaction)1 TableId (org.apache.cassandra.schema.TableId)1 TableMetadata (org.apache.cassandra.schema.TableMetadata)1 AsyncPromise (org.apache.cassandra.utils.concurrent.AsyncPromise)1 UncheckedInterruptedException (org.apache.cassandra.utils.concurrent.UncheckedInterruptedException)1 MemtableAllocator (org.apache.cassandra.utils.memory.MemtableAllocator)1