use of org.apache.cassandra.db.commitlog.CommitLogPosition in project cassandra by apache.
the class Keyspace method applyInternal.
/**
* This method appends a row to the global CommitLog, then updates memtables and indexes.
*
* @param mutation the row to write. Must not be modified after calling apply, since commitlog append
* may happen concurrently, depending on the CL Executor type.
* @param writeCommitLog false to disable commitlog append entirely
* @param updateIndexes false to disable index updates (used by CollationController "defragmenting")
* @param isDroppable true if this should throw WriteTimeoutException if it does not acquire lock within write_request_timeout_in_ms
* @param isDeferrable true if caller is not waiting for future to complete, so that future may be deferred
*/
private CompletableFuture<?> applyInternal(final Mutation mutation, final boolean writeCommitLog, boolean updateIndexes, boolean isDroppable, boolean isDeferrable, CompletableFuture<?> future) {
if (TEST_FAIL_WRITES && metadata.name.equals(TEST_FAIL_WRITES_KS))
throw new RuntimeException("Testing write failures");
Lock[] locks = null;
boolean requiresViewUpdate = updateIndexes && viewManager.updatesAffectView(Collections.singleton(mutation), false);
if (requiresViewUpdate) {
mutation.viewLockAcquireStart.compareAndSet(0L, System.currentTimeMillis());
// the order of lock acquisition doesn't matter (from a deadlock perspective) because we only use tryLock()
Collection<TableId> tableIds = mutation.getTableIds();
Iterator<TableId> idIterator = tableIds.iterator();
locks = new Lock[tableIds.size()];
for (int i = 0; i < tableIds.size(); i++) {
TableId tableId = idIterator.next();
int lockKey = Objects.hash(mutation.key().getKey(), tableId);
while (true) {
Lock lock = null;
if (TEST_FAIL_MV_LOCKS_COUNT == 0)
lock = ViewManager.acquireLockFor(lockKey);
else
TEST_FAIL_MV_LOCKS_COUNT--;
if (lock == null) {
//throw WTE only if request is droppable
if (isDroppable && (System.currentTimeMillis() - mutation.createdAt) > DatabaseDescriptor.getWriteRpcTimeout()) {
for (int j = 0; j < i; j++) locks[j].unlock();
logger.trace("Could not acquire lock for {} and table {}", ByteBufferUtil.bytesToHex(mutation.key().getKey()), columnFamilyStores.get(tableId).name);
Tracing.trace("Could not acquire MV lock");
if (future != null) {
future.completeExceptionally(new WriteTimeoutException(WriteType.VIEW, ConsistencyLevel.LOCAL_ONE, 0, 1));
return future;
} else
throw new WriteTimeoutException(WriteType.VIEW, ConsistencyLevel.LOCAL_ONE, 0, 1);
} else if (isDeferrable) {
for (int j = 0; j < i; j++) locks[j].unlock();
// This view update can't happen right now. so rather than keep this thread busy
// we will re-apply ourself to the queue and try again later
final CompletableFuture<?> mark = future;
StageManager.getStage(Stage.MUTATION).execute(() -> applyInternal(mutation, writeCommitLog, true, isDroppable, true, mark));
return future;
} else {
// being blocked by waiting for futures which will never be processed as all workers are blocked
try {
// Wait a little bit before retrying to lock
Thread.sleep(10);
} catch (InterruptedException e) {
// Just continue
}
continue;
}
} else {
locks[i] = lock;
}
break;
}
}
long acquireTime = System.currentTimeMillis() - mutation.viewLockAcquireStart.get();
// Bulk non-droppable operations (e.g. commitlog replay, hint delivery) are not measured
if (isDroppable) {
for (TableId tableId : tableIds) columnFamilyStores.get(tableId).metric.viewLockAcquireTime.update(acquireTime, TimeUnit.MILLISECONDS);
}
}
int nowInSec = FBUtilities.nowInSeconds();
try (OpOrder.Group opGroup = writeOrder.start()) {
// write the mutation to the commitlog and memtables
CommitLogPosition commitLogPosition = null;
if (writeCommitLog) {
Tracing.trace("Appending to commitlog");
commitLogPosition = CommitLog.instance.add(mutation);
}
for (PartitionUpdate upd : mutation.getPartitionUpdates()) {
ColumnFamilyStore cfs = columnFamilyStores.get(upd.metadata().id);
if (cfs == null) {
logger.error("Attempting to mutate non-existant table {} ({}.{})", upd.metadata().id, upd.metadata().keyspace, upd.metadata().name);
continue;
}
AtomicLong baseComplete = new AtomicLong(Long.MAX_VALUE);
if (requiresViewUpdate) {
try {
Tracing.trace("Creating materialized view mutations from base table replica");
viewManager.forTable(upd.metadata().id).pushViewReplicaUpdates(upd, writeCommitLog, baseComplete);
} catch (Throwable t) {
JVMStabilityInspector.inspectThrowable(t);
logger.error(String.format("Unknown exception caught while attempting to update MaterializedView! %s", upd.metadata().toString()), t);
throw t;
}
}
Tracing.trace("Adding to {} memtable", upd.metadata().name);
UpdateTransaction indexTransaction = updateIndexes ? cfs.indexManager.newUpdateTransaction(upd, opGroup, nowInSec) : UpdateTransaction.NO_OP;
cfs.apply(upd, indexTransaction, opGroup, commitLogPosition);
if (requiresViewUpdate)
baseComplete.set(System.currentTimeMillis());
}
if (future != null) {
future.complete(null);
}
return future;
} finally {
if (locks != null) {
for (Lock lock : locks) if (lock != null)
lock.unlock();
}
}
}
use of org.apache.cassandra.db.commitlog.CommitLogPosition in project cassandra by apache.
the class ColumnFamilyStore method truncateBlocking.
/**
* Truncate deletes the entire column family's data with no expensive tombstone creation
* @param noSnapshot if {@code true} no snapshot will be taken
*/
private void truncateBlocking(boolean noSnapshot) {
// We have two goals here:
// - truncate should delete everything written before truncate was invoked
// - but not delete anything that isn't part of the snapshot we create.
// We accomplish this by first flushing manually, then snapshotting, and
// recording the timestamp IN BETWEEN those actions. Any sstables created
// with this timestamp or greater time, will not be marked for delete.
//
// Bonus complication: since we store commit log segment position in sstable metadata,
// truncating those sstables means we will replay any CL segments from the
// beginning if we restart before they [the CL segments] are discarded for
// normal reasons post-truncate. To prevent this, we store truncation
// position in the System keyspace.
logger.info("Truncating {}.{}", keyspace.getName(), name);
viewManager.stopBuild();
final long truncatedAt;
final CommitLogPosition replayAfter;
if (!noSnapshot && (keyspace.getMetadata().params.durableWrites || DatabaseDescriptor.isAutoSnapshot())) {
replayAfter = forceBlockingFlush();
viewManager.forceBlockingFlush();
} else {
// just nuke the memtable data w/o writing to disk first
viewManager.dumpMemtables();
try {
replayAfter = dumpMemtable().get();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
long now = currentTimeMillis();
// make sure none of our sstables are somehow in the future (clock drift, perhaps)
for (ColumnFamilyStore cfs : concatWithIndexes()) for (SSTableReader sstable : cfs.getLiveSSTables()) now = Math.max(now, sstable.maxDataAge);
truncatedAt = now;
Runnable truncateRunnable = new Runnable() {
public void run() {
logger.info("Truncating {}.{} with truncatedAt={}", keyspace.getName(), getTableName(), truncatedAt);
// since truncation can happen at different times on different nodes, we need to make sure
// that any repairs are aborted, otherwise we might clear the data on one node and then
// stream in data that is actually supposed to have been deleted
ActiveRepairService.instance.abort((prs) -> prs.getTableIds().contains(metadata.id), "Stopping parent sessions {} due to truncation of tableId=" + metadata.id);
data.notifyTruncated(truncatedAt);
if (!noSnapshot && DatabaseDescriptor.isAutoSnapshot())
snapshot(Keyspace.getTimestampedSnapshotNameWithPrefix(name, SNAPSHOT_TRUNCATE_PREFIX));
discardSSTables(truncatedAt);
indexManager.truncateAllIndexesBlocking(truncatedAt);
viewManager.truncateBlocking(replayAfter, truncatedAt);
SystemKeyspace.saveTruncationRecord(ColumnFamilyStore.this, truncatedAt, replayAfter);
logger.trace("cleaning out row cache");
invalidateCaches();
}
};
runWithCompactionsDisabled(FutureTask.callable(truncateRunnable), true, true);
viewManager.build();
logger.info("Truncate of {}.{} is complete", keyspace.getName(), name);
}
use of org.apache.cassandra.db.commitlog.CommitLogPosition in project cassandra by apache.
the class ColumnFamilyStore method setCommitLogUpperBound.
// atomically set the upper bound for the commit log
private static void setCommitLogUpperBound(AtomicReference<CommitLogPosition> commitLogUpperBound) {
// we attempt to set the holder to the current commit log context. at the same time all writes to the memtables are
// also maintaining this value, so if somebody sneaks ahead of us somehow (should be rare) we simply retry,
// so that we know all operations prior to the position have not reached it yet
CommitLogPosition lastReplayPosition;
while (true) {
lastReplayPosition = new Memtable.LastCommitLogPosition((CommitLog.instance.getCurrentPosition()));
CommitLogPosition currentLast = commitLogUpperBound.get();
if ((currentLast == null || currentLast.compareTo(lastReplayPosition) <= 0) && commitLogUpperBound.compareAndSet(currentLast, lastReplayPosition))
break;
}
}
use of org.apache.cassandra.db.commitlog.CommitLogPosition in project cassandra by apache.
the class MetadataSerializerTest method constructMetadata.
public Map<MetadataType, MetadataComponent> constructMetadata() {
CommitLogPosition club = new CommitLogPosition(11L, 12);
CommitLogPosition cllb = new CommitLogPosition(9L, 12);
TableMetadata cfm = SchemaLoader.standardCFMD("ks1", "cf1").build();
MetadataCollector collector = new MetadataCollector(cfm.comparator).commitLogIntervals(new IntervalSet<>(cllb, club));
String partitioner = RandomPartitioner.class.getCanonicalName();
double bfFpChance = 0.1;
return collector.finalizeMetadata(partitioner, bfFpChance, 0, null, false, SerializationHeader.make(cfm, Collections.emptyList()));
}
use of org.apache.cassandra.db.commitlog.CommitLogPosition in project cassandra by apache.
the class ColumnFamilyStore method truncateBlocking.
/**
* Truncate deletes the entire column family's data with no expensive tombstone creation
*/
public void truncateBlocking() {
// We have two goals here:
// - truncate should delete everything written before truncate was invoked
// - but not delete anything that isn't part of the snapshot we create.
// We accomplish this by first flushing manually, then snapshotting, and
// recording the timestamp IN BETWEEN those actions. Any sstables created
// with this timestamp or greater time, will not be marked for delete.
//
// Bonus complication: since we store commit log segment position in sstable metadata,
// truncating those sstables means we will replay any CL segments from the
// beginning if we restart before they [the CL segments] are discarded for
// normal reasons post-truncate. To prevent this, we store truncation
// position in the System keyspace.
logger.info("Truncating {}.{}", keyspace.getName(), name);
final long truncatedAt;
final CommitLogPosition replayAfter;
if (keyspace.getMetadata().params.durableWrites || DatabaseDescriptor.isAutoSnapshot()) {
replayAfter = forceBlockingFlush();
viewManager.forceBlockingFlush();
} else {
// just nuke the memtable data w/o writing to disk first
viewManager.dumpMemtables();
try {
replayAfter = dumpMemtable().get();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
long now = System.currentTimeMillis();
// make sure none of our sstables are somehow in the future (clock drift, perhaps)
for (ColumnFamilyStore cfs : concatWithIndexes()) for (SSTableReader sstable : cfs.getLiveSSTables()) now = Math.max(now, sstable.maxDataAge);
truncatedAt = now;
Runnable truncateRunnable = new Runnable() {
public void run() {
logger.debug("Discarding sstable data for truncated CF + indexes");
data.notifyTruncated(truncatedAt);
if (DatabaseDescriptor.isAutoSnapshot())
snapshot(Keyspace.getTimestampedSnapshotNameWithPrefix(name, SNAPSHOT_TRUNCATE_PREFIX));
discardSSTables(truncatedAt);
indexManager.truncateAllIndexesBlocking(truncatedAt);
viewManager.truncateBlocking(replayAfter, truncatedAt);
SystemKeyspace.saveTruncationRecord(ColumnFamilyStore.this, truncatedAt, replayAfter);
logger.trace("cleaning out row cache");
invalidateCaches();
}
};
runWithCompactionsDisabled(Executors.callable(truncateRunnable), true, true);
logger.info("Truncate of {}.{} is complete", keyspace.getName(), name);
}
Aggregations