use of org.apache.cassandra.db.commitlog.ReplayPosition in project eiger by wlloyd.
the class ColumnFamilyStore method createCompactionWriter.
public SSTableWriter createCompactionWriter(long estimatedRows, File location, Collection<SSTableReader> sstables) throws IOException {
ReplayPosition rp = ReplayPosition.getReplayPosition(sstables);
SSTableMetadata.Collector sstableMetadataCollector = SSTableMetadata.createCollector().replayPosition(rp);
// get the max timestamp of the precompacted sstables
for (SSTableReader sstable : sstables) sstableMetadataCollector.updateMaxTimestamp(sstable.getMaxTimestamp());
return new SSTableWriter(getTempSSTablePath(location), estimatedRows, metadata, partitioner, sstableMetadataCollector);
}
use of org.apache.cassandra.db.commitlog.ReplayPosition in project eiger by wlloyd.
the class ColumnFamilyStore method maybeSwitchMemtable.
/**
* flush the given memtable and swap in a new one for its CFS, if it hasn't been frozen already. threadsafe.
*/
public Future<?> maybeSwitchMemtable(Memtable oldMemtable, final boolean writeCommitLog) {
if (oldMemtable.isFrozen()) {
logger.debug("memtable is already frozen; another thread must be flushing it");
return null;
}
/*
* If we can get the writelock, that means no new updates can come in and
* all ongoing updates to memtables have completed. We can get the tail
* of the log and use it as the starting position for log replay on recovery.
*
* This is why we Table.flusherLock needs to be global instead of per-Table:
* we need to schedule discardCompletedSegments calls in the same order as their
* contexts (commitlog position) were read, even though the flush executor
* is multithreaded.
*/
Table.switchLock.writeLock().lock();
try {
if (oldMemtable.isFrozen()) {
logger.debug("memtable is already frozen; another thread must be flushing it");
return null;
}
assert getMemtableThreadSafe() == oldMemtable;
final ReplayPosition ctx = writeCommitLog ? CommitLog.instance.getContext() : ReplayPosition.NONE;
logger.debug("flush position is {}", ctx);
// submit the memtable for any indexed sub-cfses, and our own.
final List<ColumnFamilyStore> icc = new ArrayList<ColumnFamilyStore>();
// don't assume that this.memtable is dirty; forceFlush can bring us here during index build even if it is not
for (ColumnFamilyStore cfs : concatWithIndexes()) {
Memtable mt = cfs.getMemtableThreadSafe();
if (!mt.isClean() && !mt.isFrozen()) {
// We need to freeze indexes too because they can be concurrently flushed too (#3547)
mt.freeze();
icc.add(cfs);
}
}
final CountDownLatch latch = new CountDownLatch(icc.size());
for (ColumnFamilyStore cfs : icc) {
Memtable memtable = cfs.data.switchMemtable();
logger.info("Enqueuing flush of {}", memtable);
memtable.flushAndSignal(latch, flushWriter, ctx);
}
if (memtableSwitchCount == Integer.MAX_VALUE)
memtableSwitchCount = 0;
memtableSwitchCount++;
// while keeping the wait-for-flush (future.get) out of anything latency-sensitive.
return postFlushExecutor.submit(new WrappedRunnable() {
public void runMayThrow() throws InterruptedException, IOException {
latch.await();
if (!icc.isEmpty()) {
for (SecondaryIndex index : indexManager.getIndexesNotBackedByCfs()) {
// flush any non-cfs backed indexes
logger.info("Flushing SecondaryIndex {}", index);
index.forceBlockingFlush();
}
}
if (writeCommitLog) {
// if we're not writing to the commit log, we are replaying the log, so marking
// the log header with "you can discard anything written before the context" is not valid
CommitLog.instance.discardCompletedSegments(metadata.cfId, ctx);
}
}
});
} finally {
Table.switchLock.writeLock().unlock();
}
}
use of org.apache.cassandra.db.commitlog.ReplayPosition in project eiger by wlloyd.
the class ColumnFamilyStore method truncate.
/**
* Truncate practically deletes the entire column family's data
* @return a Future to the delete operation. Call the future's get() to make
* sure the column family has been deleted
*/
public Future<?> truncate() throws IOException, ExecutionException, InterruptedException {
// We have two goals here:
// - truncate should delete everything written before truncate was invoked
// - but not delete anything that isn't part of the snapshot we create.
// We accomplish this by first flushing manually, then snapshotting, and
// recording the timestamp IN BETWEEN those actions. Any sstables created
// with this timestamp or greater time, will not be marked for delete.
//
// Bonus complication: since we store replay position in sstable metadata,
// truncating those sstables means we will replay any CL segments from the
// beginning if we restart before they are discarded for normal reasons
// post-truncate. So we need to (a) force a new segment so the currently
// active one can be discarded, and (b) flush *all* CFs so that unflushed
// data in others don't keep any pre-truncate CL segments alive.
//
// Bonus bonus: simply forceFlush of all the CF is not enough, because if
// for a given column family the memtable is clean, forceFlush will return
// immediately, even though there could be a memtable being flushed at the same
// time. So to guarantee that all segments can be cleaned out, we need to
// "waitForActiveFlushes" after the new segment has been created.
logger.debug("truncating {}", columnFamily);
// flush the CF being truncated before forcing the new segment
forceBlockingFlush();
CommitLog.instance.forceNewSegment();
ReplayPosition position = CommitLog.instance.getContext();
// now flush everyone else. re-flushing ourselves is not necessary, but harmless
for (ColumnFamilyStore cfs : ColumnFamilyStore.all()) cfs.forceFlush();
waitForActiveFlushes();
// if everything was clean, flush won't have called discard
CommitLog.instance.discardCompletedSegments(metadata.cfId, position);
// that was part of the flushed we forced; otherwise on a tie, it won't get deleted.
try {
Thread.sleep(100);
} catch (InterruptedException e) {
throw new AssertionError(e);
}
long truncatedAt = System.currentTimeMillis();
snapshot(Table.getTimestampedSnapshotName(columnFamily));
return CompactionManager.instance.submitTruncate(this, truncatedAt);
}
use of org.apache.cassandra.db.commitlog.ReplayPosition in project eiger by wlloyd.
the class SSTableMetadataSerializerTest method testSerialization.
@Test
public void testSerialization() throws IOException {
EstimatedHistogram rowSizes = new EstimatedHistogram(new long[] { 1L, 2L }, new long[] { 3L, 4L, 5L });
EstimatedHistogram columnCounts = new EstimatedHistogram(new long[] { 6L, 7L }, new long[] { 8L, 9L, 10L });
ReplayPosition rp = new ReplayPosition(11L, 12);
long maxTimestamp = 4162517136L;
SSTableMetadata.Collector collector = SSTableMetadata.createCollector().estimatedRowSize(rowSizes).estimatedColumnCount(columnCounts).replayPosition(rp);
collector.updateMaxTimestamp(maxTimestamp);
SSTableMetadata originalMetadata = collector.finalizeMetadata(RandomPartitioner.class.getCanonicalName());
ByteArrayOutputStream byteOutput = new ByteArrayOutputStream();
DataOutputStream dos = new DataOutputStream(byteOutput);
SSTableMetadata.serializer.serialize(originalMetadata, dos);
ByteArrayInputStream byteInput = new ByteArrayInputStream(byteOutput.toByteArray());
DataInputStream dis = new DataInputStream(byteInput);
Descriptor desc = new Descriptor(Descriptor.CURRENT_VERSION, new File("."), "", "", 0, false);
SSTableMetadata stats = SSTableMetadata.serializer.deserialize(dis, desc);
assert stats.estimatedRowSize.equals(originalMetadata.estimatedRowSize);
assert stats.estimatedRowSize.equals(rowSizes);
assert stats.estimatedColumnCount.equals(originalMetadata.estimatedColumnCount);
assert stats.estimatedColumnCount.equals(columnCounts);
assert stats.replayPosition.equals(originalMetadata.replayPosition);
assert stats.replayPosition.equals(rp);
assert stats.maxTimestamp == maxTimestamp;
assert stats.maxTimestamp == originalMetadata.maxTimestamp;
assert RandomPartitioner.class.getCanonicalName().equals(stats.partitioner);
}
Aggregations