use of org.apache.cassandra.io.sstable.format.SSTableReader in project cassandra by apache.
the class StreamTransferTaskTest method testScheduleTimeout.
@Test
public void testScheduleTimeout() throws Exception {
InetAddress peer = FBUtilities.getBroadcastAddress();
StreamSession session = new StreamSession(peer, peer, null, 0, true, false, null);
ColumnFamilyStore cfs = Keyspace.open(KEYSPACE1).getColumnFamilyStore(CF_STANDARD);
// create two sstables
for (int i = 0; i < 2; i++) {
SchemaLoader.insertData(KEYSPACE1, CF_STANDARD, i, 1);
cfs.forceBlockingFlush();
}
// create streaming task that streams those two sstables
StreamTransferTask task = new StreamTransferTask(session, cfs.metadata.id);
for (SSTableReader sstable : cfs.getLiveSSTables()) {
List<Range<Token>> ranges = new ArrayList<>();
ranges.add(new Range<>(sstable.first.getToken(), sstable.last.getToken()));
task.addTransferFile(sstable.selfRef(), 1, sstable.getPositionsForRanges(ranges), 0);
}
assertEquals(2, task.getTotalNumberOfFiles());
// if file sending completes before timeout then the task should be canceled.
Future f = task.scheduleTimeout(0, 0, TimeUnit.NANOSECONDS);
f.get();
// when timeout runs on second file, task should be completed
f = task.scheduleTimeout(1, 10, TimeUnit.MILLISECONDS);
task.complete(1);
try {
f.get();
Assert.assertTrue(false);
} catch (CancellationException ex) {
}
assertEquals(StreamSession.State.WAIT_COMPLETE, session.state());
// when all streaming are done, time out task should not be scheduled.
assertNull(task.scheduleTimeout(1, 1, TimeUnit.SECONDS));
}
use of org.apache.cassandra.io.sstable.format.SSTableReader in project cassandra by apache.
the class CompactionStress method initCf.
ColumnFamilyStore initCf(StressProfile stressProfile, boolean loadSSTables) {
generateTokens(stressProfile.seedStr, StorageService.instance.getTokenMetadata(), numTokens);
CreateTableStatement.RawStatement createStatement = stressProfile.getCreateStatement();
List<File> dataDirectories = getDataDirectories();
ColumnFamilyStore cfs = StressCQLSSTableWriter.Builder.createOfflineTable(createStatement, Collections.EMPTY_LIST, dataDirectories);
if (loadSSTables) {
Directories.SSTableLister lister = cfs.getDirectories().sstableLister(Directories.OnTxnErr.IGNORE).skipTemporary(true);
List<SSTableReader> sstables = new ArrayList<>();
//Offline open sstables
for (Map.Entry<Descriptor, Set<Component>> entry : lister.list().entrySet()) {
Set<Component> components = entry.getValue();
if (!components.contains(Component.DATA))
continue;
try {
SSTableReader sstable = SSTableReader.openNoValidation(entry.getKey(), components, cfs);
sstables.add(sstable);
} catch (Exception e) {
JVMStabilityInspector.inspectThrowable(e);
System.err.println(String.format("Error Loading %s: %s", entry.getKey(), e.getMessage()));
}
}
cfs.disableAutoCompaction();
//Register with cfs
cfs.addSSTables(sstables);
}
return cfs;
}
use of org.apache.cassandra.io.sstable.format.SSTableReader in project cassandra by apache.
the class AbstractCompactionStrategy method groupSSTablesForAntiCompaction.
/**
* Method for grouping similar SSTables together, This will be used by
* anti-compaction to determine which SSTables should be anitcompacted
* as a group. If a given compaction strategy creates sstables which
* cannot be merged due to some constraint it must override this method.
*/
public Collection<Collection<SSTableReader>> groupSSTablesForAntiCompaction(Collection<SSTableReader> sstablesToGroup) {
int groupSize = 2;
List<SSTableReader> sortedSSTablesToGroup = new ArrayList<>(sstablesToGroup);
Collections.sort(sortedSSTablesToGroup, SSTableReader.sstableComparator);
Collection<Collection<SSTableReader>> groupedSSTables = new ArrayList<>();
Collection<SSTableReader> currGroup = new ArrayList<>();
for (SSTableReader sstable : sortedSSTablesToGroup) {
currGroup.add(sstable);
if (currGroup.size() == groupSize) {
groupedSSTables.add(currGroup);
currGroup = new ArrayList<>();
}
}
if (currGroup.size() != 0)
groupedSSTables.add(currGroup);
return groupedSSTables;
}
use of org.apache.cassandra.io.sstable.format.SSTableReader in project cassandra by apache.
the class CompactionController method getPurgeEvaluator.
/**
* @param key
* @return a predicate for whether tombstones marked for deletion at the given time for the given partition are
* purgeable; we calculate this by checking whether the deletion time is less than the min timestamp of all SSTables
* containing his partition and not participating in the compaction. This means there isn't any data in those
* sstables that might still need to be suppressed by a tombstone at this timestamp.
*/
public Predicate<Long> getPurgeEvaluator(DecoratedKey key) {
if (NEVER_PURGE_TOMBSTONES || !compactingRepaired())
return time -> false;
overlapIterator.update(key);
Set<SSTableReader> filteredSSTables = overlapIterator.overlaps();
Iterable<Memtable> memtables = cfs.getTracker().getView().getAllMemtables();
long minTimestampSeen = Long.MAX_VALUE;
boolean hasTimestamp = false;
for (SSTableReader sstable : filteredSSTables) {
// we check index file instead.
if (sstable.getBloomFilter() instanceof AlwaysPresentFilter && sstable.getPosition(key, SSTableReader.Operator.EQ, false) != null || sstable.getBloomFilter().isPresent(key)) {
minTimestampSeen = Math.min(minTimestampSeen, sstable.getMinTimestamp());
hasTimestamp = true;
}
}
for (Memtable memtable : memtables) {
Partition partition = memtable.getPartition(key);
if (partition != null) {
minTimestampSeen = Math.min(minTimestampSeen, partition.stats().minTimestamp);
hasTimestamp = true;
}
}
if (!hasTimestamp)
return time -> true;
else {
final long finalTimestamp = minTimestampSeen;
return time -> time < finalTimestamp;
}
}
use of org.apache.cassandra.io.sstable.format.SSTableReader in project cassandra by apache.
the class SinglePartitionReadCommand method queryMemtableAndDiskInternal.
private UnfilteredRowIterator queryMemtableAndDiskInternal(ColumnFamilyStore cfs) {
/*
* We have 2 main strategies:
* 1) We query memtables and sstables simulateneously. This is our most generic strategy and the one we use
* unless we have a names filter that we know we can optimize futher.
* 2) If we have a name filter (so we query specific rows), we can make a bet: that all column for all queried row
* will have data in the most recent sstable(s), thus saving us from reading older ones. This does imply we
* have a way to guarantee we have all the data for what is queried, which is only possible for name queries
* and if we have neither non-frozen collections/UDTs nor counters (indeed, for a non-frozen collection or UDT,
* we can't guarantee an older sstable won't have some elements that weren't in the most recent sstables,
* and counters are intrinsically a collection of shards and so have the same problem).
*/
if (clusteringIndexFilter() instanceof ClusteringIndexNamesFilter && !queriesMulticellType())
return queryMemtableAndSSTablesInTimestampOrder(cfs, (ClusteringIndexNamesFilter) clusteringIndexFilter());
Tracing.trace("Acquiring sstable references");
ColumnFamilyStore.ViewFragment view = cfs.select(View.select(SSTableSet.LIVE, partitionKey()));
List<UnfilteredRowIterator> iterators = new ArrayList<>(Iterables.size(view.memtables) + view.sstables.size());
ClusteringIndexFilter filter = clusteringIndexFilter();
long minTimestamp = Long.MAX_VALUE;
try {
for (Memtable memtable : view.memtables) {
Partition partition = memtable.getPartition(partitionKey());
if (partition == null)
continue;
minTimestamp = Math.min(minTimestamp, memtable.getMinTimestamp());
// 'iter' is added to iterators which is closed on exception, or through the closing of the final merged iterator
@SuppressWarnings("resource") UnfilteredRowIterator iter = filter.getUnfilteredRowIterator(columnFilter(), partition);
oldestUnrepairedTombstone = Math.min(oldestUnrepairedTombstone, partition.stats().minLocalDeletionTime);
iterators.add(iter);
}
/*
* We can't eliminate full sstables based on the timestamp of what we've already read like
* in collectTimeOrderedData, but we still want to eliminate sstable whose maxTimestamp < mostRecentTombstone
* we've read. We still rely on the sstable ordering by maxTimestamp since if
* maxTimestamp_s1 > maxTimestamp_s0,
* we're guaranteed that s1 cannot have a row tombstone such that
* timestamp(tombstone) > maxTimestamp_s0
* since we necessarily have
* timestamp(tombstone) <= maxTimestamp_s1
* In other words, iterating in maxTimestamp order allow to do our mostRecentPartitionTombstone elimination
* in one pass, and minimize the number of sstables for which we read a partition tombstone.
*/
Collections.sort(view.sstables, SSTableReader.maxTimestampComparator);
long mostRecentPartitionTombstone = Long.MIN_VALUE;
int nonIntersectingSSTables = 0;
List<SSTableReader> skippedSSTablesWithTombstones = null;
for (SSTableReader sstable : view.sstables) {
// than the most recent update to this sstable, we can skip it
if (sstable.getMaxTimestamp() < mostRecentPartitionTombstone)
break;
if (!shouldInclude(sstable)) {
nonIntersectingSSTables++;
if (sstable.hasTombstones()) {
// if sstable has tombstones we need to check after one pass if it can be safely skipped
if (skippedSSTablesWithTombstones == null)
skippedSSTablesWithTombstones = new ArrayList<>();
skippedSSTablesWithTombstones.add(sstable);
}
continue;
}
minTimestamp = Math.min(minTimestamp, sstable.getMinTimestamp());
// 'iter' is added to iterators which is closed on exception,
@SuppressWarnings("resource") UnfilteredRowIteratorWithLowerBound // or through the closing of the final merged iterator
iter = makeIterator(cfs, sstable);
if (!sstable.isRepaired())
oldestUnrepairedTombstone = Math.min(oldestUnrepairedTombstone, sstable.getMinLocalDeletionTime());
iterators.add(iter);
mostRecentPartitionTombstone = Math.max(mostRecentPartitionTombstone, iter.partitionLevelDeletion().markedForDeleteAt());
}
int includedDueToTombstones = 0;
// Check for sstables with tombstones that are not expired
if (skippedSSTablesWithTombstones != null) {
for (SSTableReader sstable : skippedSSTablesWithTombstones) {
if (sstable.getMaxTimestamp() <= minTimestamp)
continue;
// 'iter' is added to iterators which is close on exception,
@SuppressWarnings("resource") UnfilteredRowIteratorWithLowerBound // or through the closing of the final merged iterator
iter = makeIterator(cfs, sstable);
if (!sstable.isRepaired())
oldestUnrepairedTombstone = Math.min(oldestUnrepairedTombstone, sstable.getMinLocalDeletionTime());
iterators.add(iter);
includedDueToTombstones++;
}
}
if (Tracing.isTracing())
Tracing.trace("Skipped {}/{} non-slice-intersecting sstables, included {} due to tombstones", nonIntersectingSSTables, view.sstables.size(), includedDueToTombstones);
if (iterators.isEmpty())
return EmptyIterators.unfilteredRow(cfs.metadata(), partitionKey(), filter.isReversed());
StorageHook.instance.reportRead(cfs.metadata().id, partitionKey());
return withSSTablesIterated(iterators, cfs.metric);
} catch (RuntimeException | Error e) {
try {
FBUtilities.closeAll(iterators);
} catch (Exception suppressed) {
e.addSuppressed(suppressed);
}
throw e;
}
}
Aggregations